Edits history of script submission #368 for ' Retrieve the comments from an hacker news post (hackernews)'

  • go
    One script reply has been approved by the moderators
    Ap­pro­ved
    package inner
    
    import (
    	"strconv"
    	"strings"
    
    	"github.com/gocolly/colly/v2"
    )
    
    type comment struct {
    	Author  string `selector:"a.hnuser" json:"author"`
    	URL     string `selector:".age a[href]" attr:"href" json:"url"`
    	Comment string `selector:".comment" json:"comment"`
    	Replies []*comment `json:"replies"`
    	depth   int
    }
    
    func main(itemID string) (interface{}, error) {
    	comments := make([]*comment, 0)
    
    	// Instantiate default collector
    	c := colly.NewCollector()
    
    	// Extract comment
    	c.OnHTML(".comment-tree tr.athing", func(e *colly.HTMLElement) {
    		width, err := strconv.Atoi(e.ChildAttr("td.ind img", "width"))
    		if err != nil {
    			return
    		}
    		// hackernews uses 40px spacers to indent comment replies,
    		// so we have to divide the width with it to get the depth
    		// of the comment
    		depth := width / 40
    		c := &comment{
    			Replies: make([]*comment, 0),
    			depth:   depth,
    		}
    		e.Unmarshal(c)
    		c.Comment = strings.TrimSpace(c.Comment[:len(c.Comment)-5])
    		if depth == 0 {
    			comments = append(comments, c)
    			return
    		}
    		parent := comments[len(comments)-1]
    		// append comment to its parent
    		for i := 0; i < depth-1; i++ {
    			parent = parent.Replies[len(parent.Replies)-1]
    		}
    		parent.Replies = append(parent.Replies, c)
    	})
    
    	c.Visit("https://news.ycombinator.com/item?id=" + itemID)
    	return comments, nil
    }

    Submitted by fernando mumbach675 1096 days ago

  • go
    package inner
    
    import (
    	"strconv"
    	"strings"
    
    	"github.com/gocolly/colly/v2"
    )
    
    type comment struct {
    	Author  string `selector:"a.hnuser" json:"author"`
    	URL     string `selector:".age a[href]" attr:"href" json:"url"`
    	Comment string `selector:".comment" json:"comment"`
    	Replies []*comment `json:"replies"`
    	depth   int
    }
    
    func main(itemID string) (interface{}, error) {
    	comments := make([]*comment, 0)
    
    	// Instantiate default collector
    	c := colly.NewCollector()
    
    	// Extract comment
    	c.OnHTML(".comment-tree tr.athing", func(e *colly.HTMLElement) {
    		width, err := strconv.Atoi(e.ChildAttr("td.ind img", "width"))
    		if err != nil {
    			return
    		}
    		// hackernews uses 40px spacers to indent comment replies,
    		// so we have to divide the width with it to get the depth
    		// of the comment
    		depth := width / 40
    		c := &comment{
    			Replies: make([]*comment, 0),
    			depth:   depth,
    		}
    		e.Unmarshal(c)
    		c.Comment = strings.TrimSpace(c.Comment[:len(c.Comment)-5])
    		if depth == 0 {
    			comments = append(comments, c)
    			return
    		}
    		parent := comments[len(comments)-1]
    		// append comment to its parent
    		for i := 0; i < depth-1; i++ {
    			parent = parent.Replies[len(parent.Replies)-1]
    		}
    		parent.Replies = append(parent.Replies, c)
    	})
    
    	c.Visit("https://news.ycombinator.com/item?id=" + itemID)
    	return comments, nil
    }

    Submitted by fernando mumbach675 1096 days ago