UVa 1351 - String Compression

Published on 2016-02-08

题目地址

描述

有一个长度为 n(n200)n(n\le200) 的字符串,我们现在要尽可能的压缩这个字符串,使这个字符串的长度尽可能的短。如果一个子串 SS 连续重复 kk 次,我们就可以把它压缩为 k(S)k(S),压缩后的长度为 digit(k)+2+length(S)\mathrm{digit}(k) + 2 + \mathrm{length}(S)。允许嵌套压缩,例如:nowletsgogogoletsgogogo 可压缩为 now2(lets3(go))

样例输入

4 
ababcd 
letsgogogo 
nowletsgogogoletsgogogo 
nowletsgogogoletsgogogoandrunrunrun

样例输出

6 
9 
15 
24

分析

很棘手的一道题目,首先我们对于一段字符串,可以利用 KMP 的失配表得到循环节的长度,如果不了解 KMP 算法,可以先看看我写的:KMP 算法详细解析,复杂度是 O(n)O(n) 的。
如果不能压缩嵌套,直接上一个一维 dp[i]dp[i] 表示前 ii 个字符能压缩到的最短长度,然后决策就是枚举压缩部分的长度即可。
现在可以嵌套了,可以考虑写成区间形式,因为一维的状态是绝对无法处理嵌套的情况的。如果 dp[i][j]dp[i][j] 表示区间 [i,j)[i, j) 能压缩到的最短长度,那么决策时较长的区间就可以利用到较短区间的压缩成果,从而达到嵌套的目的!给出方程:

代码

//  Created by Sengxian on 2/8/16.
//  Copyright (c) 2015年 Sengxian. All rights reserved.
//  UVa 1351 字符串区间 DP
#include <algorithm>
#include <iostream>
#include <cctype>
#include <climits>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <vector>
#include <stack>
#include <queue>
#define br putchar('\n');
using namespace std;

inline int ReadInt() {
    int n = 0, ch = getchar();
    while(!isdigit(ch)) ch = getchar();
    while(isdigit(ch)) n = (n << 3) + (n << 1) + ch - '0', ch = getchar();
    return n;
}

inline int digitNum(int k) {
    int a = log10(k);
    if(fabs(a - (int)a) < 1e-6) return a + 1;
    return ceil(log10(k));
}

const int maxn = 200 + 3;
int dp[maxn][maxn], f[maxn], n;
char s[maxn];

//利用 KMP 的失配表得到循环节的长度
int getCircle(int first, int end) {
    f[first] = f[first + 1] = first;
    for(int i = first + 1; i < end; ++i) {
        int j = f[i];
        while(j != first && s[i] != s[j]) j = f[j];
        f[i + 1] = s[i] == s[j] ? j + 1 : first;
    }
    int t = end - f[end];
    if(!((end - first) % t)) return t;
    else return 0;
}

int Solve() {
    for(int i = 0; i < n; ++i) dp[i][i + 1] = 1;
    for(int w = 2; w <= n; ++w)
        for(int i = 0; i + w <= n; ++i) {
            int j = i + w, k = 0;
            dp[i][j] = w; //初始值是原本的长度!
            for(int k = i + 1; k < j; ++k) dp[i][j] = min(dp[i][j], dp[i][k] + dp[k][j]);
            if(k = getCircle(i, j), k && k != w) dp[i][j] = min(dp[i][j], dp[i][i + k] + 2 + digitNum((j - i) / k));
        }
    return dp[0][n];
}

int main() {
    int caseNum = ReadInt();
    while(caseNum--) {
        scanf("%s", s);
        n = strlen(s);
        printf("%d\n", Solve());
    }
    return 0;
}

附加输入

20
kjwlwhtozrhceeeprmitiqotgeejklkuuh
sqbipspuywaqnmahuqxnybtjlfthzapusqcklsejoebdsbkmtjbtkucxbwfdyuxrnzbythjjn
nfoztijwbvsgvucaxawwtlvwlrfxaukpzyruicqmxjssfwueytasgx
sowprqziszzmkbfybqsvxosbmnbgtyzlpvcindqffsurvbpwthuqxmrlbssusrijomrbqijvcdmxgdwzkqpjeguhaobuflevyvzofiljnzhvefuoxjxbrtktilnpztlxrmnyuaijbpefwavtluw
ngxvtklseyrvkevggdrhuvoqxljiffkvlhsftfzzdrwpvsvevmnqjcigpsqvzasljksfqrevidlgviksvzkgdtptnfqmiixturykjfgtitzfblxylkfqdwjqbzcljcfevfogmwawpbbtpbrcl
squcg
dkjnoqrmyhskgsjxwkqlnknaifqdjydplocbhvnfegrmaakwncjamwdvetapudeftigafwfkczyezidonmpzluwpnyghbloxvxxbtdnxclbdwesjrjkcdjtshzclmqiipfjikyfojjuhpmsixfmcofwxh
itrsdgyorkmwbyhvfxjyfgdsktzgqgfbcythetwvekufidcqalogrtabozjeiofmmbttupqaamhkqjaqwqynjyqxaceiqlufmpajfqlhdtrteulbkkqwkjvklzudnqkzgkkndwwgppbvjoyuypqlanxlnsqakbbslmfoibwzrzwco
womozmczzrtptdquweibssfrtyqqaglzvxpwjrvikozduqaquitodahwyambgzadypajjxttotxilxafhtvkwfjufxxnxzrvrtfaqavetsoerolbijngowcvuaitzzqqsxslyorrigyaujdesskipndlpmgonwhiwztupmnyslapxdtrvgamvfzktfah
jpalkwawjarxagufbndjnpevrqoytvccmepyaqxmqqjtyfyzsblitrdkiridpnhdrxeupbihrraryzqqcdyvvbhfvqikfppyotsdualntneroxkraiovmwchokttcirqemwyojoiw
bntleuwuriqwregmxiwrbadxbrirbenfrhqxdoswwisppzdojcfmcilfatwdamittbrwpluovndkojzxlemnpxvrttutfdoyehwtsrhqencuwdtjkfyzftqynmtupjvvrrrjk
bqpeknhewrlvsqoiqdvlymvvkmmbxzdzptdcimgfgtaykqj
wgmwshrcvffselrwgwyolgtrzvrloamkgagbjxdeclyjwqfcodszlms
jkwxkkhtmpuvmzbrkzairfmikfhxtcjcmhayrjrfzncnndexfgiwnuezbowuqgxenzehiwojlrxyudybmgyzcezesxakdzotyvcjtqsejrdevbhhjfinkjtehurmthfucjdvcvalpdrkgztpgbeqlyxssoglvohaxkvbivn
aejgdcxkfccsabmurthphppgzkijivgibrrhtqtbuxtwzgqszahgpymqluatridtawcvnvwjsqgtwymvyvdnurgh
gafqdasccopzmyscgnafzxfxattuzaihcpzhqsjugyuszoxhbxodxuczpvtqxbzztyilqsfysbttrqbvppymlcmazhqwjpxeqfpgzwhsyalrsomhgkwroksnrkjccigsqvapshhshvjzjwjpihjxrbkknwprgvjwtkm
rvhzsqacoltzscylfkyvgnmpkwlfizuzvbanudrkoljjpjuvuuqchcruadzicukzxkprnicevnnlxjirgatnenjfqlnuhxweklyztadrpscmekfkmbzqojvguladkyhulhwhjzyzuanymtkywjokumsoxutjtdeekalubjvxlixxdhxzrollcdcbyv
t
oxlqjhuucrgmpfqyfrpvfbxkfyk
iddzbormxlibfonwwfubzmweptourzwbebafputohcpmsdkokhpjtnnigeezfcdjefrvzkkhobvjegxrppakcquluymzcpigvzcumobcpzlwhjnwapgcfapbacactllokplydobunmquxfqxvyccarecveh

附加输出

34
73
54
147
145
5
153
173
188
137
133
47
55
167
88
163
186
1
27
155