Problem

现在给定一个母串,询问多个子串

对于每个子串,求母串中最短的子串使得子串在其中出现至少$k$次

子串长度和 $\le 10^5$,母串长度 $\le 10^5$

Solution

我们将询问串按照长度分组,对于每种分组,我们计算母串中对应长度的哈希值

将每个哈希值在分组中找对应的询问子串,把下标保存在对应询问上,用保存的下标计算答案

考虑子串长度和限制,复杂度均摊,最坏情况$O(n\sqrt{n})$

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long ull;
using PI = pair<ull, int>;
const ull base = 131;
const int N = 100000 + 10;
ull p[N], Hash[N], h[N];
vector<PI> v[N];
vector<int> ID[N];
int n, k[N], L[N], L1[N];
char s[N], t[N];
ull get_hash(int L, int R) { return h[R] - h[L - 1] * p[R - L + 1]; }
int main() {
scanf("%s", s);
scanf("%d", &n);
int len = strlen(s);
for (int i = p[0] = 1; i < N; i++) p[i] = p[i - 1] * base;
for (int i = 0; i < len; i++) h[i + 1] = h[i] * base + s[i];
for (int i = 1; i <= n; i++) {
scanf("%d%s", &k[i], t);
Hash[i] = 0;
L1[i] = L[i] = strlen(t);
for (int j = 0; j < L[i]; j++) Hash[i] = Hash[i] * base + t[j];
v[L[i]].emplace_back(Hash[i], i);
}
sort(L + 1, L + n + 1);
int cnt = unique(L + 1, L + n + 1) - (L + 1);
for (int i = 1; i <= cnt; i++) {
sort(v[L[i]].begin(), v[L[i]].end());
for (int j = 1; j <= len - L[i] + 1; j++) {
ull H = get_hash(j, j + L[i] - 1);
auto it = lower_bound(v[L[i]].begin(), v[L[i]].end(), PI{H, 0});
if (it->first == H) ID[it->second].push_back(j);
}
}
for (int i = 1; i <= n; i++) {
if (ID[i].size() < k[i])
puts("-1");
else {
int ans = len;
for (int j = 0; j + k[i] - 1 < ID[i].size(); j++)
ans = min(ans, ID[i][j + k[i] - 1] - ID[i][j]);
printf("%d\n", ans + L1[i]);
}
}
return 0;
}