Skip to content

Commit

Permalink
v1.2 (#24)
Browse files Browse the repository at this point in the history
* Flake8 changes.

* Codefactor changes.

* Codefactor changes.

* Codefactor changes. Refactor on data_validations.

* Codefactor changes. Refactor on data_validations.

* Fixed small bugs and checked writing in metadata.

* Fixed bug on empty data.

* Changed setup for new version.

* Fixed small bugs on reading generic and added read from string.

* Version 1.1.1

* Version 1.1.2. Fixed setup.py

* Version 1.1.3. Improved performance on duplicates search.

* Version 1.2. Improved duplicates performance and added support for URN as structure.

* Version 1.2. Improved duplicates performance and added support for URN as structure.

* Version 1.2. Lint changes.
  • Loading branch information
javihern98 committed Dec 1, 2021
1 parent 6681021 commit d6c187b
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 36 deletions.
2 changes: 1 addition & 1 deletion SDMXthon.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: SDMXthon
Version: 1.0.3
Version: 1.2
Summary: Library with SDMX to Pandas, Pandas to SDMX, SDMX validation and SDMX metadata validation
Home-page: UNKNOWN
Author: MeaningfulData
Expand Down
14 changes: 3 additions & 11 deletions SDMXthon.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ sdmxthon/api/api.py
sdmxthon/model/__init__.py
sdmxthon/model/base.py
sdmxthon/model/component.py
sdmxthon/model/component_list.py
sdmxthon/model/dataset.py
sdmxthon/model/definitions.py
sdmxthon/model/descriptors.py
Expand All @@ -24,18 +23,10 @@ sdmxthon/model/message.py
sdmxthon/model/representation.py
sdmxthon/model/utils.py
sdmxthon/parsers/__init__.py
sdmxthon/parsers/data_generic.py
sdmxthon/parsers/data_parser.py
sdmxthon/parsers/data_structure.py
sdmxthon/parsers/data_read.py
sdmxthon/parsers/data_validations.py
sdmxthon/parsers/footer_parser.py
sdmxthon/parsers/gdscollector.py
sdmxthon/parsers/message_parsers.py
sdmxthon/parsers/metadata_validations.py
sdmxthon/parsers/payload_parser.py
sdmxthon/parsers/metadata_read.py
sdmxthon/parsers/read.py
sdmxthon/parsers/references.py
sdmxthon/parsers/status_message.py
sdmxthon/parsers/write.py
sdmxthon/schemas/SDMXCommon.xsd
sdmxthon/schemas/SDMXCommonReferences.xsd
Expand Down Expand Up @@ -97,4 +88,5 @@ sdmxthon/utils/__init__.py
sdmxthon/utils/enums.py
sdmxthon/utils/handlers.py
sdmxthon/utils/mappings.py
sdmxthon/utils/parsing_words.py
sdmxthon/utils/xml_base.py
1 change: 1 addition & 0 deletions SDMXthon.egg-info/requires.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pandas
numpy
validators
requests
xmltodict
4 changes: 2 additions & 2 deletions sdmxthon/parsers/data_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
def get_element_to_list(data, mode):
obs = {}
if VALUE in data[mode]:
if not isinstance(data[mode][VALUE], list):
data[mode][VALUE] = [data[mode][VALUE]]
data[mode][VALUE] = add_list(data[mode][VALUE])
for k in data[mode][VALUE]:
obs[k[ID]] = k[VALUE.lower()]
return obs
Expand Down Expand Up @@ -116,6 +115,7 @@ def get_at_att_str(dataset):
def get_at_att_gen(dataset):
attached_attributes = {}
if VALUE in dataset[ATTRIBUTES]:
dataset[ATTRIBUTES][VALUE] = add_list(dataset[ATTRIBUTES][VALUE])
for k in dataset[ATTRIBUTES][VALUE]:
attached_attributes[k[ID]] = k[VALUE.lower()]
return attached_attributes
Expand Down
22 changes: 15 additions & 7 deletions sdmxthon/parsers/data_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,15 @@ def validate_data(data: DataFrame, dsd: DataStructureDefinition):
if len(grouping_keys) > 0:
duplicated = data[data.duplicated(subset=grouping_keys, keep=False)]
if len(duplicated) > 0:
indexes = list(duplicated.index)
duplicated_dict = duplicated.to_dict(orient="records")
rows = dict(zip(indexes, duplicated_dict))
duplicated.groupby(by=grouping_keys).apply(
lambda x: create_error_SS07(x, errors, grouping_keys)
lambda x: create_error_SS07(x, rows, errors, grouping_keys)
)
del indexes
del duplicated_dict
del rows
del duplicated

return errors
Expand Down Expand Up @@ -631,21 +637,23 @@ def create_error_SS10_SS04(values, code, role, k, errors):
f'{role.lower()} {k}'})


def format_row(row):
def format_row(row, grouping_keys):
string = ''
for k, v in row.items():
string += f' ( {str(k)} : {str(v) if str(v) != "nan" else ""} ) '
for k in grouping_keys:
string += f' ( {str(k)} : ' \
f'{str(row[k]) if str(row[k]) != "nan" else ""} ) '
return string


def create_error_SS07(x, errors, grouping_keys):
def create_error_SS07(x, rows, errors, grouping_keys):
elems = [rows[k] for k in list(x.index)]
errors.append({'Code': 'SS07',
'ErrorLevel': 'WARNING',
'Component': 'Duplicated',
'Type': 'Datapoint',
'Rows': x.to_dict(orient="records").copy(),
'Rows': elems,
'Message': f'Duplicated datapoint '
f'{format_row(x.loc[0, grouping_keys])}'
f'{format_row(elems[0], grouping_keys)}'
})


Expand Down
36 changes: 25 additions & 11 deletions sdmxthon/parsers/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

from sdmxthon.parsers.data_read import create_dataset
from sdmxthon.parsers.metadata_read import create_metadata
from sdmxthon.utils.handlers import split_from_urn
from sdmxthon.utils.parsing_words import SERIES, OBS, STRSPE, GENERIC, \
STRREF, STRUCTURE, STRID, namespaces, HEADER, DATASET, REF, AGENCY_ID, \
ID, VERSION, DIM_OBS, ALL_DIM, STRUCTURES, STR_USAGE
ID, VERSION, DIM_OBS, ALL_DIM, STRUCTURES, STR_USAGE, URN
from sdmxthon.utils.xml_base import validate_doc, \
process_string_to_read

Expand Down Expand Up @@ -98,16 +99,25 @@ def read_xml(infile, mode=None, validate=True):
return datasets


def get_ids_from_structure(element: dict):
if REF in element:
agency_id = element[REF][AGENCY_ID]
id_ = element[REF][ID]
version = element[REF][VERSION]
return agency_id, id_, version
elif URN in element:
return split_from_urn(element[URN])
return None, None, None


def get_elements_from_structure(structure):
if STRUCTURE in structure:
agency_id = structure[STRUCTURE][REF][AGENCY_ID]
id_ = structure[STRUCTURE][REF][ID]
version = structure[STRUCTURE][REF][VERSION]
else:
agency_id = structure[STR_USAGE][REF][AGENCY_ID]
id_ = structure[STR_USAGE][REF][ID]
version = structure[STR_USAGE][REF][VERSION]
return agency_id, id_, version
return get_ids_from_structure(structure[STRUCTURE])

elif STR_USAGE in structure:
return get_ids_from_structure(structure[STR_USAGE])

return None, None, None


def get_dataset_metadata(structure, dataset_ref, mode):
Expand All @@ -118,7 +128,11 @@ def get_dataset_metadata(structure, dataset_ref, mode):

if dataset_ref == structure[STRID]:
agency_id, id_, version = get_elements_from_structure(structure)
return {DIM_OBS: structure[DIM_OBS],
STRID: f"{agency_id}:{id_}({version})"}
if agency_id is not None:
return {DIM_OBS: structure[DIM_OBS],
STRID: f"{agency_id}:{id_}({version})"}
else:
return {DIM_OBS: structure[DIM_OBS],
STRID: f"{id_}({version})"}
else:
raise Exception
5 changes: 3 additions & 2 deletions sdmxthon/testSuite/APImethods/data/data_sample/str_all.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
namespace="urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=BIS:BIS_DER(1.0)"
dimensionAtObservation="AllDimensions">
<common:Structure>
<Ref agencyID="BIS" id="BIS_DER" version="1.0"
class="DataStructure"/>
<URN>
urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=BIS:BIS_DER(1.0)
</URN>
</common:Structure>
</message:Structure>
</message:Header>
Expand Down
4 changes: 4 additions & 0 deletions sdmxthon/utils/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ def split_unique_id(obj_: str):
return agencyID, id, version


def split_from_urn(obj_: str):
return split_unique_id(obj_.split("=", 1)[1])


def get_outfile(obj_: dict, key='', indent=''):
element = obj_.get(key) or []

Expand Down
1 change: 1 addition & 0 deletions sdmxthon/utils/parsing_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
TYPE = 'type'
TEXT = 'text'
URL = 'url'
URN = "URN"

# Representation
CORE_REP = 'CoreRepresentation'
Expand Down
2 changes: 2 additions & 0 deletions sdmxthon/utils/xml_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def process_string_to_read(infile: str):
except requests.ConnectionError:
raise requests.ConnectionError('Invalid URL. '
'No response from server')
elif len(infile) > 10 and "<?" in infile[:10] and "xml" in infile[:10]:
pass
elif '/' in infile or '\\' in infile:
try:
infile = os.path.join(infile)
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
long_description_content_type='text/x-rst',
packages=setuptools.find_packages(),
include_package_data=True,
version='1.0.3',
version='1.2',
license='Apache 2.0',
license_files='license.txt',
author='MeaningfulData',
Expand All @@ -29,7 +29,8 @@
'pandas',
'numpy',
'validators',
'requests'
'requests',
'xmltodict'
],
classifiers=[
'Development Status :: 4 - Beta',
Expand Down

0 comments on commit d6c187b

Please sign in to comment.