字符串~~
- KMP
- 例题
- 1.无线传输
- 2.删除字符串
- 3.二叉树中的链表
- AC自动机
- Manacher
- 例题
- 扩展KMP
- 字符串哈希
KMP
-
(1)
-
(2)
-
(3)
-
经典例题
https://leetcode.cn/problems/find-the-index-of-the-first-occurrence-in-a-string/description/
class Solution {
public:int n,m;int next[10002];//生成next数组void nextarr(string s2){next[0]=-1,next[1]=0;int i=2,cn=0;while(i<m){if(s2[i-1]==s2[cn])next[i++]=++cn;else if(cn>0)cn=next[cn];else next[i++]=0;}}//返回第一个匹配的下标int kmp(string s1,string s2){int x=0,y=0;while(x<n && y<m){if(s1[x]==s2[y]){x++,y++;}else if(y==0){x++;}else{y=next[y];}}return y==m ? x-y : -1;}int strStr(string s1, string s2) {n=s1.length(),m=s2.length();nextarr(s2);return kmp(s1,s2);}
};
若想求出所有匹配的位置,或数量,可以在s2后面加一个不可能匹配上的字符,然后当y==m时,说明匹配到了一个位置
例题
1.无线传输
https://www.luogu.com.cn/problem/P4391
一个神奇的结论,答案等于n-next[n],n为字符串长度
2.删除字符串
https://www.luogu.com.cn/problem/P4824
字符串拼接很费时间
用栈
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
#define N (1000000+2)
struct node{int x;//s1中下标int y;//s2中与x相匹配的下标
}stack1[N];
int n,m;
string s,t;
int next1[N];
int top=-1;
//生成next数组
void nextarr(string s2){next1[0]=-1,next1[1]=0;int i=2,cn=0;while(i<m){if(s2[i-1]==s2[cn])next1[i++]=++cn;else if(cn>0)cn=next1[cn];else next1[i++]=0;}
}
//返回第一个匹配的下标
void kmp(string s1,string s2){int x=0,y=0;while(x<n){if(s1[x]==s2[y]){stack1[++top]={x,y};x++,y++;}else if(y==0){stack1[++top]={x,-1};x++;}else{y=next1[y];}if(y==m){top-=m;y= top!=-1 ? stack1[top].y + 1 : 0;}}
}
int main(){ cin>>s>>t;n=s.length();m=t.length();nextarr(t);kmp(s,t);//直接输出,不要用字符串拼接好再输出,会TLE!!!for(int i=0;i<=top;i++){cout<<s[stack1[i].x];}return 0;
}
3.二叉树中的链表
https://leetcode.cn/problems/linked-list-in-binary-tree/description/
用next数组加速
class Solution {
public:vector<int> s2;int next1[103];int m;void nextarr(vector<int> s2){next1[0]=-1,next1[1]=0;int i=2,cn=0;while(i<m){if(s2[i-1]==s2[cn])next1[i++]=++cn;else if(cn>0)cn=next1[cn];else next1[i++]=0;}}bool dfs(TreeNode* root,int y){if(y==m)return 1;1if(root==nullptr)return 0;while(y>=0 && root->val!=s2[y])y=next1[y];return dfs(root->left,y+1) || dfs(root->right,y+1);}bool isSubPath(ListNode* head, TreeNode* root) {while(head!=nullptr){s2.push_back(head->val);head=head->next;}m=s2.size();nextarr(s2);return dfs(root,0);}
};
AC自动机
作用:求出文章中指定字符串(多个)的词频
fail数组:当前路径的字符串s,与目标字符串们中的前缀匹配的最长后缀设为x,x的路径终点;
1.构造虚拟节点,直通表、
洛谷5357
https://www.luogu.com.cn/problem/P5357
#include <bits/stdc++.h>
#define ll long long
#define N 200005
using namespace std;
int n;
int end1[N];//记录每个字符串终止的位置
int tree[N][30];//前缀树
int fail[N];
int ig[N];//出度
int times[N];
int cnt=0;
void insert(int x,string str){int cur=0;for(auto c:str){int d=c-'a';if(!tree[cur][d])tree[cur][d]=++cnt;cur=tree[cur][d];} end1[x]=cur;
}
//设置fail指针以及设置直通表
void setFail(){int q[N],l=0,r=0;for(int i=0;i<=25;i++){if(tree[0][i]>0)q[r++]=tree[0][i];}while(l<r){int u=q[l++];for(int i=0;i<=25;i++){if(tree[u][i]==0)tree[u][i]=tree[fail[u]][i];else{fail[tree[u][i]]=tree[fail[u]][i];q[r++]=tree[u][i];ig[tree[fail[u]][i]]++;//出度++}}}
}
//建反图统计实际词频
void cp(){int q[N];int l=0,r=0;for(int i=1;i<=cnt;i++)if(ig[i]==0)q[r++]=i;while(l<r){int now=q[l++];times[fail[now]]+=times[now];if((--ig[fail[now]])==0)q[r++]=fail[now];}
}
void solve(){string s;cin>>n;for(int i=1;i<=n;i++){cin>>s;insert(i,s);}setFail();cin>>s;int cur=0;for(auto c:s){int d=c-'a';cur=tree[cur][d];times[cur]++;}cp();for(int i=1;i<=n;i++){cout<<times[end1[i]]<<'\n';}
}
int main(){ios::sync_with_stdio(0);cin.tie(0);solve();return 0;
}
如果不统计词频,而是判断是否存在敏感词,发现了马上停止
添加alert数组
void insert(int x,string str){int cur=0;for(auto c:str){int d=c-'0';if(!tree[cur][d])tree[cur][d]=++cnt;cur=tree[cur][d];} alert[cur]=1;
}
//设置fail指针以及设置直通表
void setFail(){int q[N],l=0,r=0;for(int i=0;i<=9;i++){if(tree[0][i]>0)q[r++]=tree[0][i];}while(l<r){int u=q[l++];for(int i=0;i<=9;i++){if(tree[u][i]==0)tree[u][i]=tree[fail[u]][i];else{fail[tree[u][i]]=tree[fail[u]][i];q[r++]=tree[u][i];}}alert[u] |= alert[fail[u]];}
}
例题:洛谷3311
https://www.luogu.com.cn/problem/P3311
数位dp+AC自动机
#include <bits/stdc++.h>
#define ll long long
#define N 2005
using namespace std;
const ll mod=1e9+7;
int t;
int m;
int n;
// int end1[N];//记录每个字符串终止的位置
int tree[N][30];//前缀树
int fail[N];
int alert[N];
// int ig[N];//出度
// int times[N];
int cnt=0;
ll mem[N][N][2][2];
string s,s1;
void insert(int x,string str){int cur=0;for(auto c:str){int d=c-'0';if(!tree[cur][d])tree[cur][d]=++cnt;cur=tree[cur][d];} alert[cur]=1;
}
//设置fail指针以及设置直通表
void setFail(){int q[N],l=0,r=0;for(int i=0;i<=9;i++){if(tree[0][i]>0)q[r++]=tree[0][i];}while(l<r){int u=q[l++];for(int i=0;i<=9;i++){if(tree[u][i]==0)tree[u][i]=tree[fail[u]][i];else{fail[tree[u][i]]=tree[fail[u]][i];q[r++]=tree[u][i];}}alert[u] |= alert[fail[u]];}
}
//f1:不用考虑大小f2:无前缀0
ll dfs(int x,int cur,int f1,int f2){if(alert[cur])return 0;if(x==n)return f2;if(~mem[x][cur][f1][f2])return mem[x][cur][f1][f2];ll ans=0;int mm=s1[x]-'0';if(f1){if(f2){for(int i=0;i<=9;i++){int next=tree[cur][i];ans=(ans+dfs(x+1,next,1,1))%mod;}}else{for(int i=1;i<=9;i++){int next=tree[cur][i];ans=(ans+dfs(x+1,next,1,1))%mod;}ans=(ans+dfs(x+1,0,1,0))%mod;}}else{if(f2){for(int i=0;i<mm;i++){int next=tree[cur][i];ans=(ans+dfs(x+1,next,1,1))%mod;}int next=tree[cur][mm];ans=(ans+dfs(x+1,next,0,1))%mod;}else{for(int i=1;i<mm;i++){int next=tree[cur][i];ans=(ans+dfs(x+1,next,1,1))%mod;}ans=(ans+dfs(x+1,0,1,0))%mod;int next=tree[cur][mm];ans=(ans+dfs(x+1,next,0,1))%mod;}}mem[x][cur][f1][f2]=ans;return ans;
}
void solve(){cin>>s1;cin>>m;memset(mem,-1,sizeof mem);n=s1.length();for(int i=1;i<=m;i++){cin>>s;insert(i,s);}setFail();cout<<dfs(0,0,0,0)%mod<<'\n';
}
int main(){ios::sync_with_stdio(0);cin.tie(0);solve();return 0;
}
Manacher
作用:求解最长回文子串
https://www.luogu.com.cn/problem/P3805
#include <bits/stdc++.h>
#define ll long long
#define N 23000000
using namespace std;
const ll mod=1e9+7;int p[N];char ss[N];int f(string s){int n=0;ss[n++]='#';for(auto &c:s){ss[n++]=c;ss[n++]='#';}return n;}int manacher(int n){int mx=0;for(int i=0,c=0,r=0,len;i<n;i++){len = r>i ? min(p[2*c-i],r-i) : 1;while(i+len<n && i-len>=0 && ss[i+len]==ss[i-len])len++;if(i+len>r)r=i+len,c=i;mx=max(mx,len);p[i]=len;}return mx-1;}
void solve(){string s;cin>>s;int n=f(s);cout<<manacher(n)<<'\n';
}
int main(){ios::sync_with_stdio(0);cin.tie(0);solve();return 0;
}
例题
https://www.luogu.com.cn/problem/P4555
#include <bits/stdc++.h>
#define ll long long
#define N 200004
using namespace std;
const ll mod=19930726;
int p[N];
char ss[N];
int left1[N];
int right1[N];
int f(string s){int n=0;ss[n++]='#';for(auto &c:s){ss[n++]=c;ss[n++]='#';}return n;
}
void manacher(int n){for(int i=0,c=0,r=0,len;i<n;i++){len = r>i ? min(p[2*c-i],r-i) : 1;while(i+len<n && i-len>=0 && ss[i+len]==ss[i-len]){len++;}if(i+len>r)r=i+len,c=i;p[i]=len;}
}
void solve() {string s;cin>>s;int n=f(s);manacher(n);for(int i=0,j=0;i<n;i++){while(i+p[i]>j){left1[j]=j-i;j+=2;}}for(int i=n-1,j=n-1;i>=0;i--){while(i-p[i]<j){right1[j]=i-j;j-=2;}}int ans=0;for(int i=2;i<n-2;i+=2){ans=max(ans,left1[i]+right1[i]);}cout<<ans<<'\n';
}
int main(){ios::sync_with_stdio(0);cin.tie(0);solve();return 0;
}
扩展KMP
求每个位置出发的字符串和原字符串(或另一个字符串)最大匹配前缀
z[N]
https://www.luogu.com.cn/problem/P5410
#include <bits/stdc++.h>
#define ll long long
using namespace std;
const ll mod=1e9+7;
const int N=2e7+2;
int z[N];
int e[N];
string a,b;
void zkmp(string s){int n=s.length();z[0]=n;for(int i=1,c=0,r=0,len;i<n;i++){len = r>i ? min(z[i-c],r-i) : 0;while(i+len<n && s[i+len]==s[len])len++;if(i+len>r)r=i+len,c=i;z[i]=len;}
}
void ekmp(string s,string s2){int n=s.length(),n2=s2.length();for(int i=0,c=0,r=0,len;i<n;i++){len = r>i ? min(z[i-c],r-i) : 0;while(i+len<n && len<n2 && s[i+len]==s2[len])len++;if(i+len>r)r=i+len,c=i;e[i]=len;}
}
void solve(){cin>>a>>b;zkmp(b);ekmp(a,b);int an=a.length(),bn=b.length();ll ansz=0,anse=0;for(int i=0;i<bn;i++)ansz^=(ll)(i+1)*(z[i]+1);for(int i=0;i<an;i++)anse^=(ll)(i+1)*(e[i]+1);cout<<ansz<<'\n'<<anse<<'\n';
}
int main(){ios::sync_with_stdio(0);cin.tie(0);solve();return 0;
}
字符串哈希
https://www.luogu.com.cn/problem/P3370
#include <bits/stdc++.h>
#define ll unsigned long long
#define N 200004
using namespace std;
const ll mod=19930726;
const ll base=499;
int n;
//0->1,A->10,a->37
ll num[N];
ll f1(char x){if(x>='0' && x<='9')return x-'0'+1;if(x>='A' && x<='Z')return x-'A'+10;if(x>='a' && x<='z')return x-'a'+37;return 0;
}
ll f2(string s){ll ans=f1(s[0]);int n=s.length();for(int i=1;i<n;i++){ans=ans*base+f1(s[i]);}return ans;
}void solve() {cin>>n;string s;for(int i=1;i<=n;i++){cin>>s;num[i]=f2(s);}sort(num+1,num+n+1);int ans=1;for(int i=1;i<n;i++){if(num[i]!=num[i+1])ans++;}cout<<ans<<'\n';
}
int main(){ios::sync_with_stdio(0);cin.tie(0);solve();return 0;
}
例题:
https://leetcode.cn/problems/substring-with-concatenation-of-all-words/description/
class Solution {
public:int base=433;map<unsigned long long,int>my_map;map<unsigned long long,int>my_map1;unsigned long long pow[22000],hash[22000];void build(string &s,int n){pow[0]=1;for(int i=1;i<n;i++)pow[i]=pow[i-1]*base;hash[0]=s[0]-'a'+1;for(int i=1;i<n;i++)hash[i]=hash[i-1]*base+s[i]-'a'+1;}unsigned long long hs(int l,int r){unsigned long long ans=hash[r];ans-=(l==0 ? 0:(hash[l-1]*pow[r-l+1]));return ans;}unsigned long long f2(string &s){unsigned long long ans=s[0]-'a'+1;int n=s.length();for(int i=1;i<n;i++){ans=ans*base+s[i]-'a'+1;}return ans;}vector<int> findSubstring(string s, vector<string>& words) {vector<int> ans;int n=s.length();int len1=words[0].length();int len2=words.size();build(s,n);int cnt1=0,cnt;for(auto &ss:words){unsigned long long tmp=f2(ss);if(my_map1[tmp])my_map1[tmp]++;else my_map1[tmp]=1,cnt1++;}for(int k=0;k<len1;k++){cnt=cnt1;my_map=my_map1;for(int l=k,r=k;r<=n && l+len1<=n;){while(r+len1<=n && (r-l)/len1<len2){unsigned long long tmp=hs(r,r+len1-1);if(my_map.count(tmp)){if((--my_map[tmp])==0)cnt--;}r+=len1;}if(((r-l)/len1)==len2){if(cnt==0)ans.push_back(l);}unsigned long long tmp=hs(l,l+len1-1);if(my_map.count(tmp)){if((++my_map[tmp])==1)cnt++;}l+=len1;}}return ans;}
};
https://www.luogu.com.cn/problem/P3763
二分加字符串哈希;最差复杂度 n log 4 n n{\log ^4}n nlog4n
#include <bits/stdc++.h>
#define ll long long
#define ull unsigned long long
#define N 100004
using namespace std;
const ll mod=19930726;
const ll base=499;
int t;
string s1,s2;
int n1,n2;
ull pow1[N],hash1[N],hash2[N];
inline ull zh(char c){return c-'A'+1;
}
inline void build(){pow1[0]=1;for(int i=1;i<=max(n1,n2);i++)pow1[i]=pow1[i-1]*base;hash1[0]=zh(s1[0]);for(int i=1;i<n1;i++)hash1[i]=hash1[i-1]*base+zh(s1[i]);hash2[0]=zh(s2[0]);for(int i=1;i<n2;i++)hash2[i]=hash2[i-1]*base+zh(s2[i]);
}
inline ull hs1(int l,int r){ull ans=hash1[r];ans-= l==0 ? 0 : (hash1[l-1]*pow1[r-l+1]);return ans;
}
inline ull hs2(int l,int r){ull ans=hash2[r];ans-= l==0 ? 0 : (hash2[l-1]*pow1[r-l+1]);return ans;
}
void solve(){cin>>s1>>s2;n1=s1.length();n2=s2.length();if(n1<n2){cout<<"0\n";return;}build();int ans=0;for(int l=0,r;l+n2-1<n1;l++){r=l+n2-1;int cnt=0;int now=l;int l1=now-l-1,r1=n2;while(cnt<=3 && now-l<n2){l1=now-l-1,r1=n2;while(l1+1<r1){int mid=(l1+r1)>>1;if(hs1(now,l+mid)==hs2(now-l,mid))l1=mid;else r1=mid;}if(l1==n2-1)break;cnt++;now=l+r1+1;}if(cnt<=3)ans++;}cout<<ans<<'\n';
}
int main(){ios::sync_with_stdio(0);cin.tie(0);cout.tie(0);cin>>t;while(t--){solve();}return 0;
}