scrapy · paulo-raca · Mar 2, 2016
diff --git a/parsel/selector.py b/parsel/selector.py
@@ -98,6 +98,18 @@ def extract_first(self, default=None):
         else:
             return default
 
+    def text_content(self):
+        """
+        Call the ``.text_content()`` method for each element is this list and return
+        their results flattened, as a list of unicode strings.
+        """
+        return [x.text_content() for x in self]
+
+    def text_content_first(self, default=None):
+        for x in self:
+            return x.text_content()
+        else:
+            return default
 
 class Selector(object):
     """
@@ -222,6 +234,13 @@ def extract(self):
             else:
                 return six.text_type(self.root)
 
+    def text_content(self):
+        """
+        Returns the text content of the element, including the text  content of
+        its children, with no markup.
+        """
+        return six.text_type(self.root.xpath("normalize-space()"))
+
     def register_namespace(self, prefix, uri):
         """
         Register the given namespace to be used in this :class:`Selector`.

diff --git a/tests/test_selector.py b/tests/test_selector.py
@@ -86,6 +86,61 @@ def test_extract_first_default(self):
 
         self.assertEqual(sel.xpath('//div/text()').extract_first(default='missing'), 'missing')
 
+    def test_text_content_first(self):
+        """Test if text_first() returns first element"""
+        body = u'<ul><li id="1">1</li><li id="2">2</li></ul>'
+        sel = self.sscls(text=body)
+
+        self.assertEqual(sel.xpath('//ul/li').text_content_first(),
+                         sel.xpath('//ul/li').text_content()[0])
+
+        self.assertEqual(sel.xpath('//ul/li[@id="1"]').text_content_first(),
+                         sel.xpath('//ul/li[@id="1"]').text_content()[0])
+
+        self.assertEqual(sel.xpath('//ul/li[2]').text_content_first(),
+                         sel.xpath('//ul/li').text_content()[1])
+
+        self.assertEqual(sel.xpath('//ul/li[@id="doesnt-exist"]').text_content_first(), None)
+
+        self.assertEqual(sel.xpath('//ul/li').text_content_first(), '1')
+
+        self.assertEqual(sel.xpath('//ul/li[2]').text_content_first(), '2'),
+
+        self.assertEqual(sel.xpath('//ul').text_content_first(), '12'),
+
+    def test_text_content_first_default(self):
+        """Test if text_first() returns default value when no results found"""
+        body = u'<ul><li id="1">1</li><li id="2">2</li></ul>'
+        sel = self.sscls(text=body)
+
+        self.assertEqual(sel.xpath('//div').text_content_first(default='missing'), 'missing')
+
+    def test_text_content(self):
+        """Test if text_first() returns default value when no results found"""
+        body = u'<ul><li id="1">1</li><li id="2">2</li></ul>'
+        sel = self.sscls(text=body)
+
+        self.assertEqual(sel.xpath('//ul').text_content(), [u'12'])
+        self.assertEqual(sel.xpath('//ul/li').text_content(), [u'1', u'2'])
+
+    def test_text_content_with_spaces(self):
+        """Test if text_first() returns default value when no results found"""
+        body = u"""
+            <p>
+              Mary <b>had    </b>   a little   <i> <br/>
+              lamb  </i>   
+            </p> 
+            <div>meh meh</div>
+            <p>    
+              It's
+              <txd>fleece</txd>
+              was w<em>hi</em>te as s<span>no</span>w.
+            </p> 
+        """
+        sel = self.sscls(text=body)
+
+        self.assertEqual(sel.xpath('//p').text_content(),  [u'Mary had a little lamb', u'It\'s fleece was white as snow.'])
+
     def test_re_first(self):
         """Test if re_first() returns first matched element"""
         body = u'<ul><li id="1">1</li><li id="2">2</li></ul>'