daneads · ajwahab · Jan 8, 2020 · Jan 8, 2020 · Jan 8, 2020 · Jan 9, 2020
diff --git a/README.md b/README.md
@@ -58,15 +58,15 @@ pypatent.Search('TTL/(tennis AND (racquet OR racket))')
 
 Alternatively, you can specify one or more Field Code arguments to search within the specified fields. Multiple Field Code arguments will create a search with AND logic. OR logic can be used within a single argument. For more complex logic, use a custom string.
 ```python
-pypatent.Search(pn='adobe', ttl='software') # Equivalent to search('PN/adobe AND TTL/software')
-pypatent.Search(pn=('adobe or macromedia'), ttl='software') # Equivalent to search('PN/(adobe or macromedia) AND TTL/software')
+pypatent.Search(an='adobe', ttl='software') # Equivalent to search('AN/adobe AND TTL/software')
+pypatent.Search(an=('adobe or macromedia'), ttl='software') # Equivalent to search('AN/(adobe or macromedia) AND TTL/software')
 ```
 
 #### Combining search methods 1 and 2
 
 String criteria can be used in conjunction with Field Code arguments:
 ```python
-pypatent.Search('acrobat', pn='adobe', ttl='software') # Equivalent to search('acrobat AND PN/adobe AND TTL/software')
+pypatent.Search('acrobat', an='adobe', ttl='software') # Equivalent to search('acrobat AND AN/adobe AND TTL/software')
 ```
 
 The Field Code arguments have the same meaning as on the [USPTO site](http://patft.uspto.gov/netahtml/PTO/search-adv.htm).
@@ -259,4 +259,4 @@ print(res)
 This version makes searching and storing patent data easier:
 * Simplified to 2 objects: `Search` and `Patent`
 * A `Search` object searches the USPTO site and can output the results as a DataFrame or list. It can scrape the details of each patent, or just get the patent title and URL. Most users will only need to use this object.
-* A `Patent` object fetches and holds a single patent's info. Fetching the patent's details is now optional. This object should only be used when you already have the patent URL and aren't conducting a search.
+* A `Patent` object fetches and holds a single patent's info. Fetching the patent's details is now optional. This object should only be used when you already have the patent URL and aren't conducting a search.
diff --git a/pypatent/__init__.py b/pypatent/__init__.py
@@ -6,7 +6,6 @@
 import pandas as pd
 from selenium import webdriver
 
-
 class WebConnection:
     def __init__(self,
                  use_selenium: bool = False,
@@ -198,6 +197,7 @@ def __repr__(self):
 
 
 class Search:
+
     def __init__(self,
                  string: str = None,
                  results_limit: int = 50,
@@ -263,46 +263,103 @@ def __init__(self,
             self.web_connection = web_connection
         else:
             self.web_connection = WebConnection()
-        args = {k: str(v).replace(' ', '-') for k, v in locals().items() if v and v is not self and v not in [get_patent_details, results_limit, web_connection]}
-        searchstring = ' AND '.join(['%s/%s' % (key, value) for (key, value) in args.items() if key not in ['results_limit']])
+
+        args = {k: v.lower() for k, v in locals().items() if v and v is not self and v not in [get_patent_details, results_limit, web_connection]}
+
+        search_codes = dict({
+          'PN':  'Patent Number',
+          'ISD': 'Issue Date',
+          'TTL': 'Title',
+          'ABST': 'Abstract',
+          'ACLM': 'Claim(s)',
+          'SPEC': 'Description/Specification',
+          'CCL': 'Current US Classification',
+          'CPC': 'Current CPC Classification',
+          'CPCL': 'Current CPC Classification Class',
+          'ICL': 'International Classification',
+          'APN': 'Application Serial Number',
+          'APD': 'Application Date',
+          'APT': 'Application Type',
+          'GOVT': 'Government Interest',
+          'FMID': 'Patent Family ID',
+          'PARN': 'Parent Case Information',
+          'RLAP': 'Related US App. Data',
+          'RLFD': 'Related Application Filing Date',
+          'PRIR': 'Foreign Priority',
+          'PRAD': 'Priority Filing Date',
+          'PCT': 'PCT Information',
+          'PTAD': 'PCT Filing Date',
+          'PT3D': 'PCT 371c124 Date',
+          'PPPD': 'Prior Published Document Date',
+          'REIS': 'Reissue Data',
+          'RPAF': 'Reissued Patent Application Filing Date',
+          'AFFF': '130(b) Affirmation Flag',
+          'AFFT': '130(b) Affirmation Statement',
+          'IN': 'Inventor Name',
+          'IC': 'Inventor City',
+          'IS': 'Inventor State',
+          'ICN': 'Inventor Country',
+          'AANM': 'Applicant Name',
+          'AACI': 'Applicant City',
+          'AAST': 'Applicant State',
+          'AACO': 'Applicant Country',
+          'AAAT': 'Applicant Type',
+          'LREP': 'Attorney or Agent',
+          'AN': 'Assignee Name',
+          'AC': 'Assignee City',
+          'AS': 'Assignee State',
+          'ACN': 'Assignee Country',
+          'EXP': 'Primary Examiner',
+          'EXA': 'Assistant Examiner',
+          'REF': 'Referenced By',
+          'FREF': 'Foreign References',
+          'OREF': 'Other References',
+          'COFC': 'Certificate of Correction',
+          'REEX': 'Re-Examination Certificate',
+          'PTAB': 'PTAB Trial Certificate',
+          'SEC': 'Supplemental Exam Certificate',
+          'ILRN': 'International Registration Number',
+          'ILRD': 'International Registration Date',
+          'ILPD': 'International Registration Publication Date',
+          'ILFD': 'Hague International Filing Date'
+        })
+        for k, v in args.items():
+            if k == 'string' and '/' in v:
+                (kk, p, v) = v.partition('/')
+                if v and kk.upper() in search_codes:
+                    args[k] = '"{}"'.format(v)
+        searchstring = ' and '.join(['%s/%s' % (key, value) for (key, value) in args.items()])
         searchstring = searchstring.replace('string/', '')
         searchstring = searchstring.replace(' ', '+')
+        searchstring = searchstring.replace('-and-', '+and+')
 
         replace_dict = {'/': '%2F'}
-
         for k, v in replace_dict.items():
             searchstring = searchstring.replace(k, v)
 
         base_url = 'http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=%2Fnetahtml%2FPTO%2Fsearch-adv.htm&r=0&p=1&f=S&l=50&Query='
-
         url = base_url + searchstring + '&d=PTXT'
         r = self.web_connection.get(url)
         s = BeautifulSoup(r, 'html.parser')
-        total_results = int(s.find(string=re.compile('out of')).find_next().text.strip())
-
-        patents = self.get_patents_from_results_url(url, limit=results_limit)
-
-        num_results_fetched = len(patents)
-
-        list_num = 2
-
-        base_url_nextpgs = 'http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=%2Fnetahtml%2FPTO%2Fsearch-adv.htm&r=0&f=S&l=50&d=PTXT'
-
-        url_pre = base_url_nextpgs + '&OS=' + searchstring + '&RS=' + searchstring + '&Query=' + searchstring + '&TD=' + str(total_results) + '&Srch1=' + searchstring + '&NextList'
-        url_post = '=Next+50+Hits'
-
-        while (num_results_fetched < total_results) and (num_results_fetched < results_limit):
-            this_url = url_pre + str(list_num) + url_post
-            thispatents = self.get_patents_from_results_url(this_url)
-            patents.extend(thispatents)
-
+        if s.find(string=re.compile('out of')): #only proceed with function if search produces results
+            total_results = int(s.find(string=re.compile('out of')).find_next().text.strip())
+            patents = self.get_patents_from_results_url(url, limit=results_limit)
             num_results_fetched = len(patents)
+            list_num = 2
+            while (num_results_fetched < total_results) and (num_results_fetched < results_limit):
+                url_nextpg = 'http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=%2Fnetahtml%2FPTO%2Fsearch-adv.htm&r=0&p={0}&f=S&l=50&Query={1}&d=PTXT'.format(list_num, searchstring)
+
+                thispatents = self.get_patents_from_results_url(url_nextpg, limit=(results_limit - num_results_fetched))
+                patents.extend(thispatents)
 
-            if num_results_fetched >= results_limit:
-                patents = patents[:results_limit]
+                num_results_fetched = len(patents)
 
-            list_num += 1
+                if num_results_fetched >= results_limit:
+                    patents = patents[:results_limit]
 
+                list_num += 1
+        else:
+            patents = []
         self.patents = patents
 
     def get_patents_from_results_url(self, url: str, limit: int = None) -> list:
@@ -314,18 +371,17 @@ def get_patents_from_results_url(self, url: str, limit: int = None) -> list:
                             i.text.replace('\n', '').strip() != '']
 
         patents = []
-
         for patent_num_idx in range(0, len(patents_raw_list), 2):
-            if limit and (patent_num_idx + 1) > limit:
+            if limit and (patent_num_idx/2 + 1) > limit:
                 break
             patent_title = patents_raw_list[patent_num_idx + 1][0]
             patent_title = re.sub(' +', ' ', patent_title)
             patent_link = patents_raw_list[patent_num_idx][1]
+
             p = Patent(patent_title, patent_link, self.web_connection)
             if self.get_patent_details:
                 p.fetch_details()
             patents.append(p)
-
         return patents
 
     def as_dataframe(self) -> pd.DataFrame: