0
Retrieve the comments from an hacker news post
One script reply has been approved by the moderators Verified

Retrieve all the comments from an hacker news post

Created by fernando mumbach675 314 days ago Viewed 2538 times
0
Submitted by fernando mumbach675 Go
Verified 314 days ago
1
package inner
2

3
import (
4
	"strconv"
5
	"strings"
6

7
	"github.com/gocolly/colly/v2"
8
)
9

10
type comment struct {
11
	Author  string `selector:"a.hnuser" json:"author"`
12
	URL     string `selector:".age a[href]" attr:"href" json:"url"`
13
	Comment string `selector:".comment" json:"comment"`
14
	Replies []*comment `json:"replies"`
15
	depth   int
16
}
17

18
func main(itemID string) (interface{}, error) {
19
	comments := make([]*comment, 0)
20

21
	// Instantiate default collector
22
	c := colly.NewCollector()
23

24
	// Extract comment
25
	c.OnHTML(".comment-tree tr.athing", func(e *colly.HTMLElement) {
26
		width, err := strconv.Atoi(e.ChildAttr("td.ind img", "width"))
27
		if err != nil {
28
			return
29
		}
30
		// hackernews uses 40px spacers to indent comment replies,
31
		// so we have to divide the width with it to get the depth
32
		// of the comment
33
		depth := width / 40
34
		c := &comment{
35
			Replies: make([]*comment, 0),
36
			depth:   depth,
37
		}
38
		e.Unmarshal(c)
39
		c.Comment = strings.TrimSpace(c.Comment[:len(c.Comment)-5])
40
		if depth == 0 {
41
			comments = append(comments, c)
42
			return
43
		}
44
		parent := comments[len(comments)-1]
45
		// append comment to its parent
46
		for i := 0; i < depth-1; i++ {
47
			parent = parent.Replies[len(parent.Replies)-1]
48
		}
49
		parent.Replies = append(parent.Replies, c)
50
	})
51

52
	c.Visit("https://news.ycombinator.com/item?id=" + itemID)
53
	return comments, nil
54
}