对数据进行处理,方便匹配

整数

原理

用hash操作,将数作为数组下标存储
照搬原话:大数化小,小数化了(不变)
hash冲突:1.线性 2.链式结构

线性伪代码:

int k = x/m;
int p = flag[k];
while(1)
{
if(hash[p]==x)
{
printf("%d",hash[p]);
break;
}
if(hash[p]==0)
{
hash[p]=x;
break;
}
if(hash[p]!=x&&hash[p]!=0)
++p;
}

字符串(重点)

原理

HASHC=(C1bm1+C2bm2++Cmb0)modhHASH_C = (C_1*b^{m-1}+C_2*b^{m-2}+ …… +C_m*b^0) mod h b,h互质

字符处理:映射:

A->1,B->2,……,a->27,b->28

匹配一段字符串:HASHD=HASHC,k+lenDHASHC,kblenDHASH_D = HASH_{C,k+lenD}-HASH_{C,k}*b^{lenD}

HASHC,kHASH_{C,k}表示字符串C前k个字符的哈希值

乘以lenD原因如图,自行理解:
lenD

同余(当 blenb^{len}过大时)

(a+b) mod c = (a mod c + b mod c) mod c
ab mod c = (a mod c * b mod c) mod c

模版

求T组大写字符串中s1在s2中出现了几次:

#include<cstdio>
#include<cstring>
using namespace std;
typedef unsigned long long ULL;
ULL power[1000002];
ULL T, b = 2;
char s1[10002], s2[1000002];
ULL sum[1000002], s;
int main()
{
	power[0] = 1;
	for(int i=1; i<1000000; ++i)
		power[i] = power[i-1]*b;
	scanf("%lld", &T);
	while(T--)
	{
		scanf("%s%s", s1+1, s2+1);
		int n = strlen(s1+1), m = strlen(s2+1);
		sum[0] = 0;
		for(int i=1; i<=m; ++i)
			sum[i] = sum[i-1]*b+(ULL)(s2[i]-'A'+1);
		s = 0;
		for(int i=1; i<=n; ++i)
			s = s*b+(ULL)(s1[i]-'A'+1);
		int ans = 0;
		for(int i=0; i<=m-n; ++i)
			if(s==sum[i+n]-sum[i]*power[n])
				++ans;
		printf("%d\n", ans);
		
	}
	return 0;
}