Hi guys,
this is day 5 out of 100 days of code!
Today I have coded html href links parser. This is part of web crawler project about which I will post in following days.
package main
import (
"io"
"log"
"net/http"
"fmt"
"golang.org/x/net/html"
)
func main() {
resp, err := http.Get("https://golang.org/")
if err != nil {
log.Fatal(err)
}
for _, v := range getLinks(resp.Body) {
fmt.Println(v)
}
}
//Collect all links from response body and return it as an array of strings
func getLinks(body io.Reader) []string {
var links []string
z := html.NewTokenizer(body)
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
//todo: links list shoudn't contain duplicates
return links
case html.StartTagToken, html.EndTagToken:
token := z.Token()
if "a" == token.Data {
for _, attr := range token.Attr {
if attr.Key == "href" {
links = append(links, attr.Val)
}
}
}
}
}
}