From 907f495d1439aaeaf036d7af21ffdf6191025258 Mon Sep 17 00:00:00 2001
From: dream2333 <vincentqng@gmail.com>
Date: Mon, 10 Jun 2024 02:47:31 +0800
Subject: [PATCH] fix: drop html elements from a text selector correctly

---
 parsel/selector.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/parsel/selector.py b/parsel/selector.py
index 2027599..7e122c7 100644
--- a/parsel/selector.py
+++ b/parsel/selector.py
@@ -423,6 +423,7 @@ class Selector:
         "_huge_tree",
         "root",
         "_text",
+        "_text_lazy_html_root",
         "body",
         "__weakref__",
     ]
@@ -507,6 +508,10 @@ def __init__(
         self._expr = _expr
         self._huge_tree = huge_tree
         self._text = text
+        # self._text_to_html_root is used to store a temporary root node when
+        # converting text to html for xpath queries. This is needed because
+        # the text may not be valid html and we need to convert it to html
+        self._text_lazy_html_root =None
 
     def __getstate__(self) -> Any:
         raise TypeError("can't pickle Selector objects")
@@ -606,7 +611,9 @@ def xpath(
                 )
         else:
             try:
-                xpathev = self._get_root(self._text or "", type="html").xpath
+                if self._text_lazy_html_root is None:
+                    self._text_lazy_html_root = self._get_root(self._text or "", type="html")
+                xpathev = self._text_lazy_html_root.xpath
             except AttributeError:
                 return typing.cast(
                     SelectorList[_SelectorType], self.selectorlist_cls([])
@@ -625,7 +632,7 @@ def xpath(
         except etree.XPathError as exc:
             raise ValueError(f"XPath error: {exc} in {query}")
 
-        if type(result) is not list:
+        if not isinstance(result, list):
             result = [result]
 
         result = [