写这个函数作用

go开发离不开正则表达式,和字符串处理,要想解决table转换成切片,需要解决如何删除html标记,这里使用正则表达式,先去掉一些复杂标记,然后通过替换,减少标记.

转换函数定义

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
func TableArr(s string)[][]string{
	re := regexp.MustCompile("<table[^>]*?>")
	s=re.ReplaceAllString(s, "")
	re = regexp.MustCompile("<tbody[^>]*?>")
	s=re.ReplaceAllString(s, "")
	re = regexp.MustCompile("<tr[^>]*?>")
	s=re.ReplaceAllString(s, "")
	re = regexp.MustCompile("<td[^>]*?>")
	s=re.ReplaceAllString(s, "")
	s=strings.Replace(s,"</tr>","{tr}",-1)
	s=strings.Replace(s,"</td>","{td}",-1)
	re = regexp.MustCompile("<[/!]*?[^<>]*?>")
	s=re.ReplaceAllString(s, "")
	re = regexp.MustCompile("([rn])[s]+")
	s=re.ReplaceAllString(s, "")
	re = regexp.MustCompile("&nbsp;")
	s=re.ReplaceAllString(s, "")
	re = regexp.MustCompile("</tbody>")
	s=re.ReplaceAllString(s, "")
	re = regexp.MustCompile("</table>")
	s=re.ReplaceAllString(s, "")
	re = regexp.MustCompile(`\s{2,}`)
	s=re.ReplaceAllString(s, "")
	s=strings.Replace(s," ","",-1)
	s=strings.Replace(s,"	","",-1)
	s=strings.Replace(s,"\r","",-1)
	s=strings.Replace(s,"\t","",-1)
	s=strings.Replace(s,"\n","",-1)
arr:=strings.Split(s,"{tr}")
arr=arr[:len(arr)-1]
var arr1 [][]string
	for _, v := range arr {
		arr2:=strings.Split(v,"{td}")
		arr2=arr2[:len(arr2)-1]
		arr1=append(arr1,arr2)
	}
	return arr1
}

代码不做过多介绍,主要是正则,此函数根据php类似函数写的,目前应该是网上唯一的一个