From 6bd0054d6953956fbf7358a514d37490393c714b Mon Sep 17 00:00:00 2001 From: Elias Dorneles Date: Fri, 5 Jan 2018 11:29:55 +0100 Subject: [PATCH 1/5] add attrs() and attrs_all() methods --- parsel/selector.py | 26 ++++++++++++++++++++++++++ tests/test_selector.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/parsel/selector.py b/parsel/selector.py index 7b9bdc51..4c2674e0 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -137,6 +137,21 @@ def extract_first(self, default=None): return default get = extract_first + def attrs(self): + """Return the attributes dictionary for the first element. + If the list is empty, return an empty dict. + """ + for x in self: + return x.attrs() + else: + return {} + + def attrs_all(self): + """Return a list that contains the attributes dictionary for + each underlying element. + """ + return [x.attrs() for x in self] + class Selector(object): """ @@ -324,6 +339,17 @@ def remove_namespaces(self): # remove namespace declarations etree.cleanup_namespaces(self.root) + def attrs(self): + """Return the attributes dictionary for underlying element. + """ + return self.root.attrib + + def attrs_all(self): + """Return a list containing the attributes dictionary for underlying element. + For consistency with :meth:`SelectorList.attrs_all` + """ + return [self.attrs()] + def __bool__(self): """ Return ``True`` if there is any real content selected or ``False`` diff --git a/tests/test_selector.py b/tests/test_selector.py index dcac22b9..1365efe1 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -86,6 +86,35 @@ def test_simple_selection_with_variables_escape_friendly(self): lng=lt)], [u'a']) + def test_accessing_attributes(self): + body = u""" + + + + + + """ + sel = self.sscls(text=body) + self.assertEquals({'lang': 'en', 'version': '1.0'}, sel.attrs()) + + # .attrs on a SelectorList, brings the attributes of first-element only + self.assertEquals({'id': 'some-list', 'class': 'list-cls'}, sel.css('ul').attrs()) + self.assertEquals({'class': 'item-cls', 'id': 'list-item-1'}, sel.css('li').attrs()) + + # for the attributes for all children, use attrs_all + self.assertEquals( + [{'class': 'item-cls', 'id': 'list-item-1'}, + {'class': 'item-cls active', 'id': 'list-item-2'}, + {'class': 'item-cls', 'id': 'list-item-3'}], + sel.css('li').attrs_all()) + + # for consistency, .attrs_all is also in Selector + self.assertEquals([{'class': 'item-cls', 'id': 'list-item-1'}], sel.css('li')[0].attrs_all()) + def test_representation_slice(self): body = u"

".format(50 * 'b') sel = self.sscls(text=body) From c39553b0d0b988972f4abbae9b50ce7751324f3c Mon Sep 17 00:00:00 2001 From: Elias Dorneles Date: Sun, 21 Jan 2018 02:30:18 +0100 Subject: [PATCH 2/5] change attribute accessor approach to use .attrib instead See discussion on: https://github.com/scrapy/parsel/pull/107 --- parsel/selector.py | 24 ++---------------------- tests/test_selector.py | 13 +++---------- 2 files changed, 5 insertions(+), 32 deletions(-) diff --git a/parsel/selector.py b/parsel/selector.py index 4c2674e0..cc95d4e7 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -137,21 +137,6 @@ def extract_first(self, default=None): return default get = extract_first - def attrs(self): - """Return the attributes dictionary for the first element. - If the list is empty, return an empty dict. - """ - for x in self: - return x.attrs() - else: - return {} - - def attrs_all(self): - """Return a list that contains the attributes dictionary for - each underlying element. - """ - return [x.attrs() for x in self] - class Selector(object): """ @@ -339,17 +324,12 @@ def remove_namespaces(self): # remove namespace declarations etree.cleanup_namespaces(self.root) - def attrs(self): + @property + def attrib(self): """Return the attributes dictionary for underlying element. """ return self.root.attrib - def attrs_all(self): - """Return a list containing the attributes dictionary for underlying element. - For consistency with :meth:`SelectorList.attrs_all` - """ - return [self.attrs()] - def __bool__(self): """ Return ``True`` if there is any real content selected or ``False`` diff --git a/tests/test_selector.py b/tests/test_selector.py index 1365efe1..9890fc5b 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -99,21 +99,14 @@ def test_accessing_attributes(self): """ sel = self.sscls(text=body) - self.assertEquals({'lang': 'en', 'version': '1.0'}, sel.attrs()) + self.assertEquals({'lang': 'en', 'version': '1.0'}, sel.attrib) + self.assertEquals({'id': 'some-list', 'class': 'list-cls'}, sel.css('ul')[0].attrib) - # .attrs on a SelectorList, brings the attributes of first-element only - self.assertEquals({'id': 'some-list', 'class': 'list-cls'}, sel.css('ul').attrs()) - self.assertEquals({'class': 'item-cls', 'id': 'list-item-1'}, sel.css('li').attrs()) - - # for the attributes for all children, use attrs_all self.assertEquals( [{'class': 'item-cls', 'id': 'list-item-1'}, {'class': 'item-cls active', 'id': 'list-item-2'}, {'class': 'item-cls', 'id': 'list-item-3'}], - sel.css('li').attrs_all()) - - # for consistency, .attrs_all is also in Selector - self.assertEquals([{'class': 'item-cls', 'id': 'list-item-1'}], sel.css('li')[0].attrs_all()) + [e.attrib for e in sel.css('li')]) def test_representation_slice(self): body = u"

".format(50 * 'b') From 0eb7a45fc4dbd9d983577adb533f0f3ff2038896 Mon Sep 17 00:00:00 2001 From: Elias Dorneles Date: Mon, 26 Feb 2018 02:15:06 +0100 Subject: [PATCH 3/5] add .attrib for SelectorList --- parsel/selector.py | 10 ++++++++++ tests/test_selector.py | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/parsel/selector.py b/parsel/selector.py index cc95d4e7..2d45360b 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -137,6 +137,16 @@ def extract_first(self, default=None): return default get = extract_first + @property + def attrib(self): + """Return the attributes dictionary for the first element. + If the list is empty, return an empty dict. + """ + for x in self: + return x.attrib + else: + return {} + class Selector(object): """ diff --git a/tests/test_selector.py b/tests/test_selector.py index 9890fc5b..ab9d1ba2 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -102,6 +102,10 @@ def test_accessing_attributes(self): self.assertEquals({'lang': 'en', 'version': '1.0'}, sel.attrib) self.assertEquals({'id': 'some-list', 'class': 'list-cls'}, sel.css('ul')[0].attrib) + # for a SelectorList, bring the attributes of first-element only + self.assertEquals({'id': 'some-list', 'class': 'list-cls'}, sel.css('ul').attrib) + self.assertEquals({'class': 'item-cls', 'id': 'list-item-1'}, sel.css('li').attrib) + self.assertEquals( [{'class': 'item-cls', 'id': 'list-item-1'}, {'class': 'item-cls active', 'id': 'list-item-2'}, From ec6d3cb4f420ffb8238a205e0afe1d7b42ac3aeb Mon Sep 17 00:00:00 2001 From: Elias Dorneles Date: Mon, 26 Feb 2018 02:18:30 +0100 Subject: [PATCH 4/5] add test for empty attributes cases --- tests/test_selector.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_selector.py b/tests/test_selector.py index ab9d1ba2..1d3250a9 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -105,6 +105,8 @@ def test_accessing_attributes(self): # for a SelectorList, bring the attributes of first-element only self.assertEquals({'id': 'some-list', 'class': 'list-cls'}, sel.css('ul').attrib) self.assertEquals({'class': 'item-cls', 'id': 'list-item-1'}, sel.css('li').attrib) + self.assertEquals({}, sel.css('body').attrib) + self.assertEquals({}, sel.css('non-existing-element').attrib) self.assertEquals( [{'class': 'item-cls', 'id': 'list-item-1'}, From ee042d18b3c134cf2f7b16d19b450dff0a19fba9 Mon Sep 17 00:00:00 2001 From: Elias Dorneles Date: Fri, 8 Jun 2018 09:59:23 +0200 Subject: [PATCH 5/5] return dict copy on sel.attrib This is done to prevent: - changes in this object be reflected in a tree; - keeping a reference to a tree and preventing response GC; - isinstance(sel.attrib, dict) to be False Thanks @kmike ! --- parsel/selector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsel/selector.py b/parsel/selector.py index 2d45360b..1180cabe 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -338,7 +338,7 @@ def remove_namespaces(self): def attrib(self): """Return the attributes dictionary for underlying element. """ - return self.root.attrib + return dict(self.root.attrib) def __bool__(self): """