Codeforces 666E Forensic Examination
- 問題概要:
文字列Sと、文字列X_1,...,X_Nが与えられる。次のクエリをQ個処理せよ。
クエリ: l,r,p,qが与えられるので、X_lからX_rのなかで文字列[s_p...s_q]が部分文字列として一番多く現れるものとその回数を求めよ。
- 解法:
suffix array+LCP+sparse tableでクエリを区間の最頻値を求める形にしておく。この区間たちはラミナー(交わるなら一方が他方に含まれる)なので、マージテクをしながらクエリを区間が小さい順に見ていくとsegtreeの操作で解ける。
サイズがでかいので前半パートにlogを2つつけると死にます。後半パートは50000につくので大丈夫。
こういう実装遅すぎるし演習量が足りません。精進します
#include<stdio.h> #include<vector> #include<algorithm> #include<string> #include<iostream> #include<set> #include<time.h> using namespace std; vector<int>sais(vector<int>vec)//1-originにして末尾に0を加えてから呼ぶのを忘れないこと { if (vec.empty()) { vector<int>v; return v; } vector<int>dat; dat.resize(vec.size()); dat[vec.size() - 1] = 0; for (int i = vec.size() - 2; i >= 0; i--) { if (vec[i]>vec[i + 1])dat[i] = 1; else if (vec[i]<vec[i + 1])dat[i] = 0; else dat[i] = dat[i + 1]; } vector<vector<int> >sa; int maxi = 0; for (int i = 0; i<vec.size(); i++)maxi = max(maxi, vec[i]); sa.resize(maxi + 1); vector<int>vnum; vnum.resize(maxi + 1); for (int i = 0; i<vec.size(); i++)vnum[vec[i]]++; vector<bool>islms; islms.resize(vec.size()); fill(islms.begin(), islms.end(), false); for (int i = 0; i <= maxi; i++) { sa[i].resize(vnum[i]); fill(sa[i].begin(), sa[i].end(), -1); } vector<int>pt1, pt2; pt1.resize(maxi + 1); pt2.resize(maxi + 1); for (int i = 0; i <= maxi; i++) { pt2[i] = vnum[i] - 1; } for (int i = vec.size() - 1; i >= 1; i--) { if ((dat[i - 1] == 1 && dat[i] == 0) || i == vec.size() - 1) { sa[vec[i]][pt2[vec[i]]--] = i; islms[i] = true; } } for (int i = 0; i <= maxi; i++) { for (int j = 0; j<vnum[i]; j++) { if (sa[i][j]>0) { if (dat[sa[i][j] - 1] == 1) { sa[vec[sa[i][j] - 1]][pt1[vec[sa[i][j] - 1]]++] = sa[i][j] - 1; } } } } for (int i = 1; i <= maxi; i++) { for (int j = pt2[i] + 1; j<vnum[i]; j++)sa[i][j] = -1; pt2[i] = vnum[i] - 1; } for (int i = maxi; i >= 0; i--) { for (int j = vnum[i] - 1; j >= 0; j--) { if (sa[i][j]>0) { if (dat[sa[i][j] - 1] == 0) { sa[vec[sa[i][j] - 1]][pt2[vec[sa[i][j] - 1]]--] = sa[i][j] - 1; } } } } vector<int>d; d.resize(vec.size()); int cnt = 0; vector<int>bef; bef.push_back(0); bool fl = false; for (int i = 0; i <= maxi; i++) { for (int j = 0; j<vnum[i]; j++) { if (islms[sa[i][j]]) { vector<int>zk; zk.push_back(vec[sa[i][j]]); for (int k = sa[i][j] + 1; k<vec.size(); k++) { zk.push_back(vec[k]); if (islms[k])break; } if (zk != bef)cnt++; else if (vec[sa[i][j]] != 0)fl = true; d[sa[i][j]] = cnt; bef = zk; } } } vector<int>vt; for (int i = 0; i<vec.size(); i++)if (islms[i])vt.push_back(d[i]); vector<int>gv; vector<int>nv; if (fl) { gv = sais(vt); vector<int>v; for (int i = 0; i<vec.size(); i++) { if (islms[i])v.push_back(i); } for (int i = 0; i<gv.size(); i++) { nv.push_back(v[gv[i]]); } } else { gv = vt; nv.resize(gv.size()); int pt = 0; for (int i = 0; i<gv.size(); i++) { for (;;) { if (islms[pt]) { nv[gv[i]] = pt; pt++; break; } pt++; } } } for (int i = 0; i <= maxi; i++) { fill(sa[i].begin(), sa[i].end(), -1); pt1[i] = 0; pt2[i] = vnum[i] - 1; } for (int i = nv.size() - 1; i >= 0; i--) { sa[vec[nv[i]]][pt2[vec[nv[i]]]--] = nv[i]; } for (int i = 0; i <= maxi; i++) { for (int j = 0; j<vnum[i]; j++) { if (sa[i][j]>0) { if (dat[sa[i][j] - 1] == 1) { sa[vec[sa[i][j] - 1]][pt1[vec[sa[i][j] - 1]]++] = sa[i][j] - 1; } } } } for (int i = 1; i <= maxi; i++) { for (int j = pt2[i] + 1; j<vnum[i]; j++)sa[i][j] = -1; pt2[i] = vnum[i] - 1; } for (int i = maxi; i >= 0; i--) { for (int j = vnum[i] - 1; j >= 0; j--) { if (sa[i][j]>0) { if (dat[sa[i][j] - 1] == 0) { sa[vec[sa[i][j] - 1]][pt2[vec[sa[i][j] - 1]]--] = sa[i][j] - 1; } } } } vector<int>ret; for (int i = 0; i <= maxi; i++) { for (int j = 0; j<vnum[i]; j++) { ret.push_back(sa[i][j]); } } return ret; } vector<int>calclcp(vector<int>str, vector<int>sa)//lcp: SAのi-1番目とi番目のlcp { vector<int>rsa; rsa.resize(sa.size()); for (int i = 0; i < sa.size(); i++)rsa[sa[i]] = i; vector<int>lcp; lcp.resize(sa.size()); int now = 1; for (int i = 0; i < str.size() - 1; i++) { if (now != 0)now--; for (;;) { if (str[i + now] == str[sa[rsa[i] - 1] + now])now++; else { lcp[rsa[i]] = now; break; } } } return lcp; } #define SIZE 1048576 class segtree { public: int seg[SIZE * 2]; void init() { for (int i = 1; i < SIZE * 2; i++)seg[i] = 1000000000; } void update(int a, int b) { a += SIZE; seg[a] = min(seg[a], b); for (;;) { a /= 2; if (a == 0)break; seg[a] = min(seg[a * 2], seg[a * 2 + 1]); } } int get(int beg, int end, int node, int lb, int ub) { if (ub < beg || end < lb)return 1000000000; if (beg <= lb&&ub <= end)return seg[node]; return min(get(beg, end, node * 2, lb, (lb + ub) / 2), get(beg, end, node * 2 + 1, (lb + ub) / 2 + 1, ub)); } }; class sparsetable { public: int seg[20][SIZE]; int dat[SIZE]; void init(vector<int>vec) { for (int i = 1; i < SIZE; i++) { int c = 0, now = i; for (;;) { if (now == 0)break; now /= 2; c++; } dat[i] = c - 1; } for (int i = 0; i < vec.size(); i++)seg[0][i] = vec[i]; for (int i = vec.size(); i < SIZE; i++)seg[0][i] = 1000000000; for (int i = 1; i < 20; i++) { for (int j = 0; j < SIZE; j++) { seg[i][j] = min(seg[i - 1][j], seg[i - 1][j + (1 << (i - 1))]); } } } int get(int lb, int ub) { int t = ub - lb + 1; if (t <= 0)return 0; return min(seg[dat[t]][lb], seg[dat[t]][ub - (1 << dat[t]) + 1]); } }; sparsetable sp; //segtree tree; vector<int>str, sa, lcp, rsa; void init(string s) { for (int i = 0; i < s.size(); i++)str.push_back(s[i] - 'a' + 1); str.push_back(0); sa = sais(str); lcp = calclcp(str, sa); //tree.init(); sp.init(lcp); rsa.resize(sa.size()); for (int i = 0; i < sa.size(); i++)rsa[sa[i]] = i; //for (int i = 0; i < str.size(); i++)tree.update(i, lcp[i]); } int getlcp(int a, int b)//a文字目からとb文字目からのlcpの長さ { return sp.get(min(rsa[a], rsa[b]) + 1, max(rsa[a], rsa[b])); } typedef pair<int, int>pii; typedef pair<pii, int>pi3; typedef pair<pi3, pii>pi5; pii ans[500000]; class segtreedy { public: vector<int>seg; vector<int>idx; vector<int>lko; vector<int>rko; vector<pii>vec; void init() { seg.push_back(0); lko.push_back(-1); rko.push_back(-1); idx.push_back(-1); } void add(int pl, int node, int lb, int ub, int num) { if (ub < pl || pl < lb)return; if (lb == ub) { seg[node] += num; idx[node] = -pl; vec.push_back(make_pair(pl, num)); return; } if (lko[node] == -1) { lko[node] = seg.size(); seg.push_back(0); lko.push_back(-1); rko.push_back(-1); idx.push_back(-1); } if (rko[node] == -1) { rko[node] = seg.size(); seg.push_back(0); lko.push_back(-1); rko.push_back(-1); idx.push_back(-1); } add(pl, lko[node], lb, (lb + ub) / 2, num); add(pl, rko[node], (lb + ub) / 2 + 1, ub, num); seg[node] = max(seg[lko[node]], seg[rko[node]]); if (seg[node] == seg[lko[node]])idx[node] = idx[lko[node]]; else idx[node] = idx[rko[node]]; } pii get(int beg, int end, int node, int lb, int ub) { if (ub < beg || end < lb)return make_pair(0, -1); if (beg <= lb&&ub <= end) { //if (idx[node] == -1)return make_pair(seg[node], -lb); return make_pair(seg[node], idx[node]); } if (lko[node] == -1) { lko[node] = seg.size(); seg.push_back(0); lko.push_back(-1); rko.push_back(-1); idx.push_back(-1); } if (rko[node] == -1) { rko[node] = seg.size(); seg.push_back(0); lko.push_back(-1); rko.push_back(-1); idx.push_back(-1); } return max(get(beg, end, lko[node], lb, (lb + ub) / 2), get(beg, end, rko[node], (lb + ub) / 2 + 1, ub)); } void destroy() { seg.clear(); lko.clear(); rko.clear(); vec.clear(); } }; segtreedy dat[600000]; int main() { time_t clll = clock(); string s; cin >> s; vector<string>vec; string conc; int num; scanf("%d", &num); vector<int>toidx; for (int i = 0; i < num; i++) { string z; cin >> z; vec.push_back(z); conc += z; conc.push_back('z' + 1); for (int j = 0; j < z.size(); j++)toidx.push_back(i + 1); toidx.push_back(0); } int spt = conc.size(); conc += s; for (int i = 0; i < s.size(); i++)toidx.push_back(0); init(conc); int query; scanf("%d", &query); vector<pi5>que; for (int i = 0; i < query; i++) { int za, zb, zc, zd; scanf("%d%d%d%d", &za, &zb, &zc, &zd); zc--, zd--; int lb, ub; int beg = 0, end = rsa[spt + zc]; for (;;) { if (beg == end)break; int med = (beg + end) / 2; if (getlcp(sa[med], spt + zc) >= zd - zc + 1)end = med; else beg = med+1; } lb = beg; beg = rsa[spt + zc], end = conc.size() - 1; for (;;) { if (beg == end)break; int med = (beg + end+1) / 2; if (getlcp(sa[med], spt + zc) >= zd - zc + 1)beg = med; else end = med-1; } ub = beg; que.push_back(make_pair(make_pair(make_pair(ub - lb + 1, lb), i), make_pair(za, zb))); } //for (int i = 0; i < conc.size(); i++)printf("%d", i % 10); printf("\n"); //cout << conc << endl; //for (int i = 0; i < conc.size(); i++)printf("%d %c %d %d\n", i, conc[sa[i]], sa[i], lcp[i]); sort(que.begin(), que.end()); for (int i = 0; i < conc.size(); i++) { dat[i].init(); if (toidx[sa[i]] != 0)dat[i].add(toidx[sa[i]], 0, 0, SIZE - 1, 1); } //if (double(clock() - clll) / CLOCKS_PER_SEC>2.9)return 0; //for (int i = 0; i < conc.size(); i++)printf("%d ", toidx[sa[i]]); //printf("\n"); set<pii>se; for (int i = 0; i < conc.size(); i++)if (toidx[sa[i]] != 0)se.insert(make_pair(i, i)); se.insert(make_pair(conc.size(), conc.size())); se.insert(make_pair(conc.size() + 1, conc.size())); for (int i = 0; i < que.size(); i++) { int now = que[i].first.first.second; int end = que[i].first.first.second + que[i].first.first.first - 1; //printf("%d %d\n", now, end); set<pii>::iterator it = se.lower_bound(make_pair(now,-1)); for (;;) { int pt1 = (*it).second; it++; if ((*it).first >= end + 1)break; int pt2 = (*it).second; se.erase(it++); it--; se.erase(it++); if (dat[pt1].vec.size() > dat[pt2].vec.size())swap(pt1, pt2); for (int j = 0; j < dat[pt1].vec.size(); j++) { //printf(" %d %d\n", dat[pt1].vec[j].first, dat[pt1].vec[j].second); dat[pt2].add(dat[pt1].vec[j].first, 0, 0, SIZE - 1, dat[pt1].vec[j].second); } dat[pt1].destroy(); se.insert(make_pair(now, pt2)); it--; } it = se.lower_bound(make_pair(now, -1)); if ((*it).first > end) { ans[que[i].first.second] = make_pair(0, -que[i].second.first); continue; } pii t = dat[(*it).second].get(que[i].second.first, que[i].second.second, 0, 0, SIZE - 1); if (t.first == 0)t.second = -que[i].second.first; ans[que[i].first.second] = t; //for (int i = 0; i < 5; i++)printf("%d %d ", dat[(*it).second].get(i, i, 0, 0, SIZE - 1).first, dat[(*it).second].get(i, i, 0, 0, SIZE - 1).second); printf("\n"); } for (int i = 0; i < query; i++) { printf("%d %d\n", -ans[i].second, ans[i].first); } }