From 7d598b7ed9f98878faa9edc4ebd96845bf1550b1 Mon Sep 17 00:00:00 2001 From: Martino Mensio Date: Wed, 8 Mar 2023 11:22:19 +0100 Subject: [PATCH] addedd verify_ssl to config parameters --- README.md | 26 ++++++++++++++++++++++++ spacy_dbpedia_spotlight/entity_linker.py | 13 +++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index aec107e..d3b6fd9 100644 --- a/README.md +++ b/README.md @@ -335,6 +335,32 @@ nlp.get_pipe('dbpedia_spotlight').raise_http_errors = False doc = nlp('') ``` +## Ignore SSL verification + +In case you need to disable SSL verification (e.g. you are getting `SSLCertVerificationError` and you are certain that you know what you are doing), you can use the parameter `verify_ssl` to do it: + +- `True`: HTTPS requests are verified with SSL verification. This is the default. +- `False`: HTTPS requests will trigger a certificate verification. Use carefully. + +```python +import spacy +nlp = spacy.blank('en') +# during the pipeline instantiation (e.g. custom dbpedia_rest_endpoint with HTTPS but self-signed certificate) +nlp.add_pipe('dbpedia_spotlight', config={'verify_ssl': False}) +# or afterwards +nlp.get_pipe('dbpedia_spotlight').verify_ssl = False +# this will generate a warning, but will not break your processing (e.g. in a loop) +doc = nlp('Google LLC is an American multinational technology company.') +print(doc.ents) + +# you can suppress warnings with this +import requests +from urllib3.exceptions import InsecureRequestWarning +requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) +# and now no warnings +doc = nlp('Google LLC is an American multinational technology company.') +print(doc.ents) +``` ## Using this when training your pipeline If you are [training a pipeline](https://spacy.io/usage/training#quickstart) and you want to include the component in it, you can add to your `config.cfg`: diff --git a/spacy_dbpedia_spotlight/entity_linker.py b/spacy_dbpedia_spotlight/entity_linker.py index d3ebd24..a5b49b0 100644 --- a/spacy_dbpedia_spotlight/entity_linker.py +++ b/spacy_dbpedia_spotlight/entity_linker.py @@ -29,9 +29,10 @@ 'span_group': 'dbpedia_spotlight', 'overwrite_ents': True, 'raise_http_errors': True, + 'verify_ssl': True, 'debug': False }) -def dbpedia_spotlight_factory(nlp, name, language_code, dbpedia_rest_endpoint, process, confidence, support, types, sparql, policy, span_group, overwrite_ents, raise_http_errors, debug): +def dbpedia_spotlight_factory(nlp, name, language_code, dbpedia_rest_endpoint, process, confidence, support, types, sparql, policy, span_group, overwrite_ents, raise_http_errors, verify_ssl, debug): '''Factory of the pipeline stage `dbpedia_spotlight`. Parameters: - `language_code`: which language to use for entity linking. Possible values are listed in EntityLinker.supported_languages. If the parameter is left as None, the language code is matched with the nlp object currently used. @@ -45,6 +46,7 @@ def dbpedia_spotlight_factory(nlp, name, language_code, dbpedia_rest_endpoint, p - `span_group`: which span group to write the entities to. By default the value is `dbpedia_spotlight` which writes to `doc.spans['dbpedia_spotlight']` - `overwrite_ents`: if set to False, it won't overwrite `doc.ents` in cases of overlapping spans with current entities, and only produce the results in `doc.spans[span_group]. If it is True, it will move the entities from doc.ents into `doc.spans['ents_original']` - `raise_http_errors`: if set to True, it will raise the HTTPErrors generated by the dbpedia REST API. If False instead, HTTPErrors will be ignored. Default to True. + - `verify_ssl`: if set to False, it will not verify SSL certificates (strongly discouraged). Default to True for verification. - `debug`: prints several debug information to stdout ''' logger.remove() @@ -54,14 +56,14 @@ def dbpedia_spotlight_factory(nlp, name, language_code, dbpedia_rest_endpoint, p logger.add(sys.stdout, level="INFO") logger.debug(f'dbpedia_spotlight_factory: {nlp}, language_code: {language_code}, dbpedia_rest_endpoint: {dbpedia_rest_endpoint}, ' f'process: {process}, confidence: {confidence}, support: {support}, types: {types}, ' - f'sparql: {sparql}, policy: {policy}, overwrite_ents: {overwrite_ents}') + f'sparql: {sparql}, policy: {policy}, overwrite_ents: {overwrite_ents}, raise_http_errors: {raise_http_errors}, verify_ssl: {verify_ssl}') # take the language code from the nlp object nlp_lang_code = nlp.meta['lang'] logger.debug(f'nlp.meta["lang"]={nlp_lang_code}') # language_code can override the language code from the nlp object if not language_code: language_code = nlp_lang_code - return EntityLinker(language_code, dbpedia_rest_endpoint, process, confidence, support, types, sparql, policy, span_group, overwrite_ents, raise_http_errors, debug) + return EntityLinker(language_code, dbpedia_rest_endpoint, process, confidence, support, types, sparql, policy, span_group, overwrite_ents, raise_http_errors, verify_ssl, debug) class EntityLinker(object): @@ -74,7 +76,7 @@ class EntityLinker(object): supported_processes = ['annotate', 'spot', 'candidates'] def __init__(self, language_code='en', dbpedia_rest_endpoint=None, process='annotate', confidence=None, support=None, - types=None, sparql=None, policy=None, span_group='dbpedia_spotlight', overwrite_ents=True, raise_http_errors=True, debug=False): + types=None, sparql=None, policy=None, span_group='dbpedia_spotlight', overwrite_ents=True, raise_http_errors=True, verify_ssl=True, debug=False): # constructor of the pipeline stage if dbpedia_rest_endpoint is None and language_code not in self.supported_languages: raise ValueError( @@ -92,6 +94,7 @@ def __init__(self, language_code='en', dbpedia_rest_endpoint=None, process='anno self.span_group = span_group self.overwrite_ents = overwrite_ents self.raise_http_errors = raise_http_errors + self.verify_ssl = verify_ssl self.debug = debug self.dbpedia_rest_endpoint = dbpedia_rest_endpoint @@ -216,7 +219,7 @@ def make_request(self, doc: Doc): # TODO: application/ld+json would be more detailed? https://github.com/digitalbazaar/pyld return requests.post( - f'{endpoint}/{self.process}', headers={'accept': 'application/json'}, data=params) + f'{endpoint}/{self.process}', headers={'accept': 'application/json'}, verify=self.verify_ssl, data=params) def get_remote_response(self, doc: Doc): """