go源码解读-strings.Compare、strings.Replace

Compare

  • Compare只是提供一个方法,考虑性能的时候不建议调用
  • 建议使用内建操作符== > <进行字符串的比较,效率更高,性能更好
1
2
3
4
5
6
7
8
9
func Compare(a, b string) int {
if a == b {
return 0
}
if a < b {
return -1
}
return +1
}

Replace

replacer接口

  • 抽象出来的所有replace方法都需要实现的接口
  • 包含Replace方法和WriteString方法
  • Replace方法完成s中字符串的替换
  • WriteString方法在将s写入w中完成s字符串的替换操作
1
2
3
4
type replacer interface {
Replace(s string) string
WriteString(w io.Writer, s string) (n int, err error)
}

Replace结构体

  • Replacer结构体完成字符串切片的替换,第一个字符串是旧的,第二个是匹配到替换为新的字符串
  • Replacer是线程安全的,可以用于并发编程中,通过once参数来实现
  • NewReplacer完成Replacer结构体的初始化
1
2
3
4
5
6
7
8
9
10
11
12
type Replacer struct {
once sync.Once // guards buildOnce method
r replacer
oldnew []string
}
func NewReplacer(oldnew ...string) *Replacer {
if len(oldnew)%2 == 1 {
panic("strings.NewReplacer: odd argument count")
}
return &Replacer{oldnew: append([]string(nil), oldnew...)}
}
  • Replace本身实现了replacer接口
  • 实际上的替换操作是通过其参数r来完成的
  • r.once.Do保证参数函数只被执行一次
  • buildOnce函数完成对参数r的赋值,以及参数oldNew的置空操作
1
2
3
4
5
6
7
8
9
10
11
12
13
14
func (r *Replacer) Replace(s string) string {
r.once.Do(r.buildOnce)
return r.r.Replace(s)
}
func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
r.once.Do(r.buildOnce)
return r.r.WriteString(w, s)
}
func (r *Replacer) buildOnce() {
r.r = r.build()
r.oldnew = nil
}

build方法

  • build函数完成Replace中r的构造,根据oldnew内容的不通进行筛选
    • 如果替换的新旧字符串长度为2,且被替换的字符串的长度>1,makeSingleStringReplacer完成替换
    • 如果被替换的字符串不是单字节数据,则调用函数makeGenericReplacer构建通用的替换器
    • 如果只是字节之间的替换,构造器为byteReplacer即可
    • 如果是将单个字节替换为string字符串的情况,则byteStringReplacer可以完成替换能力
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
func (b *Replacer) build() replacer {
oldnew := b.oldnew
// 如果替换的新旧字符串长度为2,且被替换的字符串的长度>1,makeSingleStringReplacer完成替换
if len(oldnew) == 2 && len(oldnew[0]) > 1 {
return makeSingleStringReplacer(oldnew[0], oldnew[1])
}
allNewBytes := true
for i := 0; i < len(oldnew); i += 2 {
// 如果被替换的字符串不是单字节数据,则调用函数makeGenericReplacer构建通用的替换器
if len(oldnew[i]) != 1 {
return makeGenericReplacer(oldnew)
}
if len(oldnew[i+1]) != 1 {
allNewBytes = false
}
}
// 如果只是字节之间的替换,构造器为byteReplacer即可
if allNewBytes {
r := byteReplacer{}
for i := range r {
r[i] = byte(i)
}
for i := len(oldnew) - 2; i >= 0; i -= 2 {
o := oldnew[i][0]
n := oldnew[i+1][0]
r[o] = n
}
return &r
}
// 如果是将单个字节替换为string字符串的情况,则byteStringReplacer可以完成替换能力
r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
for i := len(oldnew) - 2; i >= 0; i -= 2 {
o := oldnew[i][0]
n := oldnew[i+1]
if r.replacements[o] == nil {
// 这个地方用string([]byte{o})是为了处理o为utf8编码的情况
r.toReplace = append(r.toReplace, string([]byte{o}))
}
r.replacements[o] = []byte(n)
}
return &r
}

byteReplacer结构体

  • byteReplacer结构体完成单个ASCII编码字节的替换
  • Replace方法利用一个256大小的字节数组来完成字节的替换工作
  • WriteString完成替换,并将替换之后的数据写入到w中
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
type byteReplacer [256]byte
func (r *byteReplacer) Replace(s string) string {
var buf []byte // 懒初始化
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] != b {
if buf == nil {
buf = []byte(s)
}
buf[i] = r[b]
}
}
if buf == nil {
return s
}
return string(buf)
}
func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
bufsize := 32 << 10
if len(s) < bufsize {
bufsize = len(s)
}
buf := make([]byte, bufsize)
for len(s) > 0 {
ncopy := copy(buf, s)
s = s[ncopy:]
// 替换操作
for i, b := range buf[:ncopy] {
buf[i] = r[b]
}
// 写入操作
wn, err := w.Write(buf[:ncopy])
n += wn
if err != nil {
return n, err
}
}
return n, nil
}

singleStringReplacer结构体

  • singleStringReplacer结构体包含两个参数
    • finder结构体用于进行字符串的查找
    • value是查找到的字符串替换为的值
  • makeSingleStringReplacer完成singleStringReplacer的构建
1
2
3
4
5
6
7
8
9
type singleStringReplacer struct {
finder *stringFinder
// value is the new string that replaces that pattern when it's found.
value string
}
func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
}
  • Replace方法完成字符串的匹配替换
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
func (r *singleStringReplacer) Replace(s string) string {
var buf []byte
i, matched := 0, false
for {
// 返回匹配到的起始位置
match := r.finder.next(s[i:])
if match == -1 {
break
}
matched = true
// 写入buf
buf = append(buf, s[i:i+match]...)
buf = append(buf, r.value...)
i += match + len(r.finder.pattern)
}
if !matched {
return s
}
buf = append(buf, s[i:]...)
return string(buf)
}
  • WriteString将匹配之后的结果写入w中
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
var i, wn int
for {
match := r.finder.next(s[i:])
if match == -1 {
break
}
wn, err = sw.WriteString(s[i : i+match])
n += wn
if err != nil {
return
}
wn, err = sw.WriteString(r.value)
n += wn
if err != nil {
return
}
i += match + len(r.finder.pattern)
}
wn, err = sw.WriteString(s[i:])
n += wn
return
}
func getStringWriter(w io.Writer) io.StringWriter {
sw, ok := w.(io.StringWriter)
if !ok {
sw = stringWriter{w}
}
return sw
}
// stringWriter实现了io.Writer接口
type stringWriter struct {
w io.Writer
}
func (w stringWriter) WriteString(s string) (int, error) {
return w.w.Write([]byte(s))
}

byteStringReplacer 结构体

  • byteStringReplacer完成字节替换成字符串的操作的结构体
    • replacements记录字符和字符串之间的替换映射关系
    • toReplace记录待替换的字符数组,以[]string格式记录
1
2
3
4
5
6
7
8
type byteStringReplacer struct {
// 替换规则
replacements [256][]byte
// 待替换的字符串数组,实际上记录的是单个字符的字符串数组
toReplace []string
}
const countCutOff = 8
  • Replace方法替换中会根据字符串的长度采用效率更高的方法来确认替换后的字符串需要的空间
  • 然后遍历完成替换操作
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
func (r *byteStringReplacer) Replace(s string) string {
newSize := len(s)
anyChanges := false
// Is it faster to use Count?
if len(r.toReplace)*countCutOff <= len(s) {
for _, x := range r.toReplace {
if c := Count(s, x); c != 0 {
// The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
newSize += c * (len(r.replacements[x[0]]) - 1)
anyChanges = true
}
}
} else {
for i := 0; i < len(s); i++ {
b := s[i]
if r.replacements[b] != nil {
// See above for explanation of -1
newSize += len(r.replacements[b]) - 1
anyChanges = true
}
}
}
if !anyChanges {
return s
}
buf := make([]byte, newSize)
j := 0
for i := 0; i < len(s); i++ {
b := s[i]
if r.replacements[b] != nil {
j += copy(buf[j:], r.replacements[b])
} else {
buf[j] = b
j++
}
}
return string(buf)
}
  • WriteString则直接进行循环替换并写入w中
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
last := 0
for i := 0; i < len(s); i++ {
b := s[i]
if r.replacements[b] == nil {
continue
}
if last != i {
nw, err := sw.WriteString(s[last:i])
n += nw
if err != nil {
return n, err
}
}
last = i + 1
nw, err := w.Write(r.replacements[b])
n += nw
if err != nil {
return n, err
}
}
if last != len(s) {
var nw int
nw, err = sw.WriteString(s[last:])
n += nw
}
return
}

genericReplacer结构体

  • genericReplacer实际上是完整的通用算法,其余的各种分支是效率更高的快捷替换方法
  • genericReplacer包含三个参数
    • root节点,,类型时trieNode,Trie树的根节点
    • tableSize int类型 trie树的查找表大小
    • mapping 完成字节到trie树索引的映射
1
2
3
4
5
6
7
8
type genericReplacer struct {
root trieNode
// tableSize is the size of a trie node's lookup table. It is the number
// of unique key bytes.
tableSize int
// mapping maps from key bytes to a dense index for trieNode.table.
mapping [256]byte
}

trieNode结构体

  • trieNode是trie树的一个节点,含有5个参数
    • value为节点key/value对中的值,如果当前节点不是一个完整的key的话,value为空
    • priority 优先值,当前节点为完整路径时大于0,否则优先级为0
    • prefix,next,如果这两个非空,则当前节点有一个子节点
    • table,如果table非空,则其中记录了所有的子节点
1
2
3
4
5
6
7
type trieNode struct {
value string
priority int
prefix string
next *trieNode
table []*trieNode
}
  • trieNode的add方法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
if key == "" {
if t.priority == 0 {
t.value = val
t.priority = priority
}
return
}
if t.prefix != "" {
// n记录key和t.prefix的最大相同的前缀,也就是n之前是匹配的,n之后是不匹配的
var n int
for ; n < len(t.prefix) && n < len(key); n++ {
if t.prefix[n] != key[n] {
break
}
}
if n == len(t.prefix) {
// 如果n和t.prefix的长度相等,也就是说key之后的部分都是没有的索引值
t.next.add(key[n:], val, priority, r)
} else if n == 0 {
// 首字母就不同,这时候需要在root节点创建个新的查找表
var prefixNode *trieNode
if len(t.prefix) == 1 {
prefixNode = t.next
} else {
prefixNode = &trieNode{
prefix: t.prefix[1:],
next: t.next,
}
}
keyNode := new(trieNode)
t.table = make([]*trieNode, r.tableSize)
t.table[r.mapping[t.prefix[0]]] = prefixNode
t.table[r.mapping[key[0]]] = keyNode
t.prefix = ""
t.next = nil
keyNode.add(key[1:], val, priority, r)
} else {
// 插入节点数据
next := &trieNode{
prefix: t.prefix[n:],
next: t.next,
}
t.prefix = t.prefix[:n]
t.next = next
next.add(key[n:], val, priority, r)
}
} else if t.table != nil {
// 插入已经存在table列表中心的节点
m := r.mapping[key[0]]
if t.table[m] == nil {
t.table[m] = new(trieNode)
}
t.table[m].add(key[1:], val, priority, r)
} else {
// 创建根节点
t.prefix = key
t.next = new(trieNode)
t.next.add("", val, priority, r)
}
}

Replace方法

  • Replace方法实际上是对WriteString方法的调用,将s替换之后的结果写入到buf中
  • buf实际上是对io.Writer接口的实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
func (r *genericReplacer) Replace(s string) string {
buf := make(appendSliceWriter, 0, len(s))
r.WriteString(&buf, s)
return string(buf)
}
type appendSliceWriter []byte
// Write writes to the buffer to satisfy io.Writer.
func (w *appendSliceWriter) Write(p []byte) (int, error) {
*w = append(*w, p...)
return len(p), nil
}
// WriteString writes to the buffer without string->[]byte->string allocations.
func (w *appendSliceWriter) WriteString(s string) (int, error) {
*w = append(*w, s...)
return len(s), nil
}

WriteString方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
var last, wn int
var prevMatchEmpty bool
for i := 0; i <= len(s); {
if i != len(s) && r.root.priority == 0 {
index := int(r.mapping[s[i]])
if index == r.tableSize || r.root.table[index] == nil {
i++
continue
}
}
val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
prevMatchEmpty = match && keylen == 0
if match {
wn, err = sw.WriteString(s[last:i])
n += wn
if err != nil {
return
}
wn, err = sw.WriteString(val)
n += wn
if err != nil {
return
}
i += keylen
last = i
continue
}
i++
}
if last != len(s) {
wn, err = sw.WriteString(s[last:])
n += wn
}
return
}
// 查找方法
func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
bestPriority := 0
node := &r.root
n := 0
for node != nil {
if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
bestPriority = node.priority
val = node.value
keylen = n
found = true
}
if s == "" {
break
}
if node.table != nil {
index := r.mapping[s[0]]
if int(index) == r.tableSize {
break
}
node = node.table[index]
s = s[1:]
n++
} else if node.prefix != "" && HasPrefix(s, node.prefix) {
n += len(node.prefix)
s = s[len(node.prefix):]
node = node.next
} else {
break
}
}
return
}
// 创建通用的替换器,实际上是完成trie树的构建
func makeGenericReplacer(oldnew []string) *genericReplacer {
r := new(genericReplacer)
// Find each byte used, then assign them each an index.
for i := 0; i < len(oldnew); i += 2 {
key := oldnew[i]
for j := 0; j < len(key); j++ {
r.mapping[key[j]] = 1
}
}
for _, b := range r.mapping {
r.tableSize += int(b)
}
var index byte
for i, b := range r.mapping {
if b == 0 {
r.mapping[i] = byte(r.tableSize)
} else {
r.mapping[i] = index
index++
}
}
// Ensure root node uses a lookup table (for performance).
r.root.table = make([]*trieNode, r.tableSize)
for i := 0; i < len(oldnew); i += 2 {
r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
}
return r
}
Donate comment here