Skip to content

Commit

Permalink
addedd verify_ssl to config parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinoMensio committed Mar 8, 2023
1 parent 5c5802c commit 7d598b7
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 5 deletions.
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,32 @@ nlp.get_pipe('dbpedia_spotlight').raise_http_errors = False
doc = nlp('')
```

## Ignore SSL verification

In case you need to disable SSL verification (e.g. you are getting `SSLCertVerificationError` and you are certain that you know what you are doing), you can use the parameter `verify_ssl` to do it:

- `True`: HTTPS requests are verified with SSL verification. This is the default.
- `False`: HTTPS requests will trigger a certificate verification. Use carefully.

```python
import spacy
nlp = spacy.blank('en')
# during the pipeline instantiation (e.g. custom dbpedia_rest_endpoint with HTTPS but self-signed certificate)
nlp.add_pipe('dbpedia_spotlight', config={'verify_ssl': False})
# or afterwards
nlp.get_pipe('dbpedia_spotlight').verify_ssl = False
# this will generate a warning, but will not break your processing (e.g. in a loop)
doc = nlp('Google LLC is an American multinational technology company.')
print(doc.ents)

# you can suppress warnings with this
import requests
from urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
# and now no warnings
doc = nlp('Google LLC is an American multinational technology company.')
print(doc.ents)
```
## Using this when training your pipeline

If you are [training a pipeline](https://spacy.io/usage/training#quickstart) and you want to include the component in it, you can add to your `config.cfg`:
Expand Down
13 changes: 8 additions & 5 deletions spacy_dbpedia_spotlight/entity_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@
'span_group': 'dbpedia_spotlight',
'overwrite_ents': True,
'raise_http_errors': True,
'verify_ssl': True,
'debug': False
})
def dbpedia_spotlight_factory(nlp, name, language_code, dbpedia_rest_endpoint, process, confidence, support, types, sparql, policy, span_group, overwrite_ents, raise_http_errors, debug):
def dbpedia_spotlight_factory(nlp, name, language_code, dbpedia_rest_endpoint, process, confidence, support, types, sparql, policy, span_group, overwrite_ents, raise_http_errors, verify_ssl, debug):
'''Factory of the pipeline stage `dbpedia_spotlight`.
Parameters:
- `language_code`: which language to use for entity linking. Possible values are listed in EntityLinker.supported_languages. If the parameter is left as None, the language code is matched with the nlp object currently used.
Expand All @@ -45,6 +46,7 @@ def dbpedia_spotlight_factory(nlp, name, language_code, dbpedia_rest_endpoint, p
- `span_group`: which span group to write the entities to. By default the value is `dbpedia_spotlight` which writes to `doc.spans['dbpedia_spotlight']`
- `overwrite_ents`: if set to False, it won't overwrite `doc.ents` in cases of overlapping spans with current entities, and only produce the results in `doc.spans[span_group]. If it is True, it will move the entities from doc.ents into `doc.spans['ents_original']`
- `raise_http_errors`: if set to True, it will raise the HTTPErrors generated by the dbpedia REST API. If False instead, HTTPErrors will be ignored. Default to True.
- `verify_ssl`: if set to False, it will not verify SSL certificates (strongly discouraged). Default to True for verification.
- `debug`: prints several debug information to stdout
'''
logger.remove()
Expand All @@ -54,14 +56,14 @@ def dbpedia_spotlight_factory(nlp, name, language_code, dbpedia_rest_endpoint, p
logger.add(sys.stdout, level="INFO")
logger.debug(f'dbpedia_spotlight_factory: {nlp}, language_code: {language_code}, dbpedia_rest_endpoint: {dbpedia_rest_endpoint}, '
f'process: {process}, confidence: {confidence}, support: {support}, types: {types}, '
f'sparql: {sparql}, policy: {policy}, overwrite_ents: {overwrite_ents}')
f'sparql: {sparql}, policy: {policy}, overwrite_ents: {overwrite_ents}, raise_http_errors: {raise_http_errors}, verify_ssl: {verify_ssl}')
# take the language code from the nlp object
nlp_lang_code = nlp.meta['lang']
logger.debug(f'nlp.meta["lang"]={nlp_lang_code}')
# language_code can override the language code from the nlp object
if not language_code:
language_code = nlp_lang_code
return EntityLinker(language_code, dbpedia_rest_endpoint, process, confidence, support, types, sparql, policy, span_group, overwrite_ents, raise_http_errors, debug)
return EntityLinker(language_code, dbpedia_rest_endpoint, process, confidence, support, types, sparql, policy, span_group, overwrite_ents, raise_http_errors, verify_ssl, debug)


class EntityLinker(object):
Expand All @@ -74,7 +76,7 @@ class EntityLinker(object):
supported_processes = ['annotate', 'spot', 'candidates']

def __init__(self, language_code='en', dbpedia_rest_endpoint=None, process='annotate', confidence=None, support=None,
types=None, sparql=None, policy=None, span_group='dbpedia_spotlight', overwrite_ents=True, raise_http_errors=True, debug=False):
types=None, sparql=None, policy=None, span_group='dbpedia_spotlight', overwrite_ents=True, raise_http_errors=True, verify_ssl=True, debug=False):
# constructor of the pipeline stage
if dbpedia_rest_endpoint is None and language_code not in self.supported_languages:
raise ValueError(
Expand All @@ -92,6 +94,7 @@ def __init__(self, language_code='en', dbpedia_rest_endpoint=None, process='anno
self.span_group = span_group
self.overwrite_ents = overwrite_ents
self.raise_http_errors = raise_http_errors
self.verify_ssl = verify_ssl
self.debug = debug
self.dbpedia_rest_endpoint = dbpedia_rest_endpoint

Expand Down Expand Up @@ -216,7 +219,7 @@ def make_request(self, doc: Doc):

# TODO: application/ld+json would be more detailed? https://github.com/digitalbazaar/pyld
return requests.post(
f'{endpoint}/{self.process}', headers={'accept': 'application/json'}, data=params)
f'{endpoint}/{self.process}', headers={'accept': 'application/json'}, verify=self.verify_ssl, data=params)

def get_remote_response(self, doc: Doc):
"""
Expand Down

0 comments on commit 7d598b7

Please sign in to comment.