0
Retrieve the comments from an hacker news post
One script reply has been approved by the moderators Verified

Retrieve all the comments from an hacker news post

Created by fernando mumbach675 614 days ago Viewed 12598 times
0
Submitted by fernando mumbach675 Go
Verified 614 days ago
1
package inner
2

3
import (
4
	"strconv"
5
	"strings"
6

7
	"github.com/gocolly/colly/v2"
8
)
9

10
type comment struct {
11
	Author  string `selector:"a.hnuser" json:"author"`
12
	URL     string `selector:".age a[href]" attr:"href" json:"url"`
13
	Comment string `selector:".comment" json:"comment"`
14
	Replies []*comment `json:"replies"`
15
	depth   int
16
}
17

18
func main(itemID string) (interface{}, error) {
19
	comments := make([]*comment, 0)
20

21
	// Instantiate default collector
22
	c := colly.NewCollector()
23

24
	// Extract comment
25
	c.OnHTML(".comment-tree tr.athing", func(e *colly.HTMLElement) {
26
		width, err := strconv.Atoi(e.ChildAttr("td.ind img", "width"))
27
		if err != nil {
28
			return
29
		}
30
		// hackernews uses 40px spacers to indent comment replies,
31
		// so we have to divide the width with it to get the depth
32
		// of the comment
33
		depth := width / 40
34
		c := &comment{
35
			Replies: make([]*comment, 0),
36
			depth:   depth,
37
		}
38
		e.Unmarshal(c)
39
		c.Comment = strings.TrimSpace(c.Comment[:len(c.Comment)-5])
40
		if depth == 0 {
41
			comments = append(comments, c)
42
			return
43
		}
44
		parent := comments[len(comments)-1]
45
		// append comment to its parent
46
		for i := 0; i < depth-1; i++ {
47
			parent = parent.Replies[len(parent.Replies)-1]
48
		}
49
		parent.Replies = append(parent.Replies, c)
50
	})
51

52
	c.Visit("https://news.ycombinator.com/item?id=" + itemID)
53
	return comments, nil
54
}