Skip to content

Commit

Permalink
Merge branch 'anwam/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
ByteSizedMarius committed Feb 16, 2024
2 parents 5491b93 + 92d5872 commit 7808957
Showing 1 changed file with 40 additions and 0 deletions.
40 changes: 40 additions & 0 deletions examples/ecommerce/ecommerce.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// E-Commerce Example. try to extract products description and handle error when inconsistent contents are not found.
package main

import (
"log"
"os"

"github.com/anaskhan96/soup"
)

func main() {
resp, err := soup.Get("https://webscraper.io/test-sites/e-commerce/allinone")
if err != nil {
// handle as your wish
}
doc := soup.HTMLParse(resp)
wrapper := doc.Find("div", "class", "wrapper")
if wrapper.Error != nil && wrapper.Error.(soup.Error).Type == soup.ErrElementNotFound { // assert error as soup.Error type
log.Printf("Wrapper element not found: %v\n", wrapper.Error)
os.Exit(1) // terminate process
}
container := wrapper.FindStrict("div", "class", "container test-site") // use FindStrict if element has more than 1 class
if container.Error != nil {
log.Printf("Container element not found: %s\n" + container.Error.Error()) // print original soup.Error msg
os.Exit(1)
}
mainRow := container.Find("div", "class", "row")
mainCol := mainRow.Find("div", "class", "col-md-9").Find("div", "class", "row")
products := mainCol.FindAllStrict("div", "class", "col-sm-4 col-lg-4 col-md-4") // same as FindStrict but return []soup.Root

for i, product := range products {
caption := product.Find("div", "class", "caption")
if caption.Error != nil {
log.Println(caption.Error.Error())
os.Exit(1)
}
description := caption.Find("p", "class", "description")
log.Printf("Product #%d \n description: %s\n", i, description.Text())
}
}

0 comments on commit 7808957

Please sign in to comment.