Retrieve the comments from an hacker news post

Retrieve all the comments from an hacker news post

Script hackernews Verified

by fernando mumbach675 ยท 5/19/2023

The script

Submitted by fernando mumbach675 Go
Verified 1096 days ago
1
package inner
2

3
import (
4
	"strconv"
5
	"strings"
6

7
	"github.com/gocolly/colly/v2"
8
)
9

10
type comment struct {
11
	Author  string `selector:"a.hnuser" json:"author"`
12
	URL     string `selector:".age a[href]" attr:"href" json:"url"`
13
	Comment string `selector:".comment" json:"comment"`
14
	Replies []*comment `json:"replies"`
15
	depth   int
16
}
17

18
func main(itemID string) (interface{}, error) {
19
	comments := make([]*comment, 0)
20

21
	// Instantiate default collector
22
	c := colly.NewCollector()
23

24
	// Extract comment
25
	c.OnHTML(".comment-tree tr.athing", func(e *colly.HTMLElement) {
26
		width, err := strconv.Atoi(e.ChildAttr("td.ind img", "width"))
27
		if err != nil {
28
			return
29
		}
30
		// hackernews uses 40px spacers to indent comment replies,
31
		// so we have to divide the width with it to get the depth
32
		// of the comment
33
		depth := width / 40
34
		c := &comment{
35
			Replies: make([]*comment, 0),
36
			depth:   depth,
37
		}
38
		e.Unmarshal(c)
39
		c.Comment = strings.TrimSpace(c.Comment[:len(c.Comment)-5])
40
		if depth == 0 {
41
			comments = append(comments, c)
42
			return
43
		}
44
		parent := comments[len(comments)-1]
45
		// append comment to its parent
46
		for i := 0; i < depth-1; i++ {
47
			parent = parent.Replies[len(parent.Replies)-1]
48
		}
49
		parent.Replies = append(parent.Replies, c)
50
	})
51

52
	c.Visit("https://news.ycombinator.com/item?id=" + itemID)
53
	return comments, nil
54
}