Skip to content
This repository has been archived by the owner on May 11, 2022. It is now read-only.

Commit

Permalink
issue #14: add X-Click-Options header
Browse files Browse the repository at this point in the history
  • Loading branch information
kamilsk committed Apr 29, 2018
1 parent 4821473 commit 6e599cb
Showing 1 changed file with 18 additions and 1 deletion.
19 changes: 18 additions & 1 deletion http/availability/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

const (
locationHeader = "Location"
clickOptHeader = "X-Click-Options"
)

type Crawler interface {
Expand All @@ -37,15 +38,17 @@ func CrawlerColly(config CrawlerConfig) Crawler {
if err != nil {
return errors.WithMessage(err, fmt.Sprintf("parse entry point URL %q", entry))
}
options := make([]func(*colly.Collector), 0, 7)
options := make([]func(*colly.Collector), 0, 9)
if config.UserAgent != "" {
options = append(options, colly.UserAgent(config.UserAgent))
}
options = append(options, colly.IgnoreRobotsTxt())
if config.Verbose {
options = append(options, colly.Debugger(&debug.LogDebugger{Output: config.Output}))
}
options = append(options, NoCookie())
options = append(options, NoRedirect())
options = append(options, OnRequest())
options = append(options, OnError(bus))
options = append(options, OnResponse(bus))
options = append(options, OnHTML(base, bus))
Expand All @@ -61,6 +64,20 @@ func NoRedirect() func(*colly.Collector) {
}
}

func NoCookie() func(*colly.Collector) {
return func(c *colly.Collector) {
c.DisableCookies()
}
}

func OnRequest() func(*colly.Collector) {
return func(c *colly.Collector) {
c.OnRequest(func(req *colly.Request) {
req.Headers.Set(clickOptHeader, "anonymously")
})
}
}

func OnError(bus EventBus) func(*colly.Collector) {
return func(c *colly.Collector) {
c.OnError(func(resp *colly.Response, err error) {
Expand Down

0 comments on commit 6e599cb

Please sign in to comment.