From 313e6cf8569897fbf4f03fd0b46796449adbd551 Mon Sep 17 00:00:00 2001
From: Valdir Stumm Junior <stummjr@gmail.com>
Date: Fri, 31 Mar 2017 16:53:52 -0300
Subject: [PATCH] add image_url field

---
 books/spiders/books.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/books/spiders/books.py b/books/spiders/books.py
index 99ab4346..264c77df 100644
--- a/books/spiders/books.py
+++ b/books/spiders/books.py
@@ -3,28 +3,30 @@
 
 
 class BooksSpider(scrapy.Spider):
-    name = "books"
-    allowed_domains = ["books.toscrape.com"]
+    name = 'books'
+    allowed_domains = ['books.toscrape.com']
     start_urls = [
         'http://books.toscrape.com/',
     ]
 
     def parse(self, response):
-        for book_url in response.css("article.product_pod > h3 > a ::attr(href)").extract():
+        for book_url in response.css('article.product_pod > h3 > a ::attr(href)').extract():
             yield scrapy.Request(response.urljoin(book_url), callback=self.parse_book_page)
-        next_page = response.css("li.next > a ::attr(href)").extract_first()
+        next_page = response.css('li.next > a ::attr(href)').extract_first()
         if next_page:
             yield scrapy.Request(response.urljoin(next_page), callback=self.parse)
 
     def parse_book_page(self, response):
         item = {}
-        product = response.css("div.product_main")
-        item["title"] = product.css("h1 ::text").extract_first()
+        product = response.css('div.product_main')
+        item['title'] = product.css('h1 ::text').extract_first()
         item['category'] = response.xpath(
-            "//ul[@class='breadcrumb']/li[@class='active']/preceding-sibling::li[1]/a/text()"
+            '//ul[@class="breadcrumb"]/li[@class="active"]/preceding-sibling::li[1]/a/text()'
         ).extract_first()
         item['description'] = response.xpath(
-            "//div[@id='product_description']/following-sibling::p/text()"
+            '//div[@id="product_description"]/following-sibling::p/text()'
         ).extract_first()
         item['price'] = response.css('p.price_color ::text').extract_first()
+        image_url = response.css('div.carousel-inner img ::attr(src)').extract_first()
+        item['image_url'] = response.urljoin(image_url)
         yield item