go源码解读-strings.stringFinder

stringFinder

1
2
3
4
5
6
7
8
type stringFinder struct {
// 待查找的目标字符串
pattern string
// 坏字符对应的偏移量,索引为对应的字节
badCharSkip [256]int
// 好字符偏移量,索引表示有几个匹配的字符
goodSuffixSkip []int
}

makeStringFinder函数

  • 完成构建badCharSkip和goodSuffixSkip的值
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
func makeStringFinder(pattern string) *stringFinder {
f := &stringFinder{
pattern: pattern,
goodSuffixSkip: make([]int, len(pattern)),
}
// last is the index of the last character in the pattern.
last := len(pattern) - 1
// Build bad character table.
// Bytes not in the pattern can skip one pattern's length.
for i := range f.badCharSkip {
f.badCharSkip[i] = len(pattern)
}
// The loop condition is < instead of <= so that the last byte does not
// have a zero distance to itself. Finding this byte out of place implies
// that it is not in the last position.
for i := 0; i < last; i++ {
f.badCharSkip[pattern[i]] = last - i
}
// Build good suffix table.
// First pass: set each value to the next index which starts a prefix of
// pattern.
lastPrefix := last
for i := last; i >= 0; i-- {
if HasPrefix(pattern, pattern[i+1:]) {
lastPrefix = i + 1
}
// lastPrefix is the shift, and (last-i) is len(suffix).
f.goodSuffixSkip[i] = lastPrefix + last - i
}
// Second pass: find repeats of pattern's suffix starting from the front.
for i := 0; i < last; i++ {
lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])
if pattern[i-lenSuffix] != pattern[last-lenSuffix] {
// (last-i) is the shift, and lenSuffix is len(suffix).
f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i
}
}
return f
}
// 最长相同字符的数量,从后向前
func longestCommonSuffix(a, b string) (i int) {
for ; i < len(a) && i < len(b); i++ {
if a[len(a)-1-i] != b[len(b)-1-i] {
break
}
}
return
}
  • 实际上移动的位数是从两个里面取最大值,next方法如下
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
func (f *stringFinder) next(text string) int {
i := len(f.pattern) - 1
for i < len(text) {
// Compare backwards from the end until the first unmatching character.
j := len(f.pattern) - 1
for j >= 0 && text[i] == f.pattern[j] {
i--
j--
}
if j < 0 {
return i + 1 // match
}
i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])
}
return -1
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
Donate comment here