Skip to content

Commit

Permalink
Merge pull request #15 from brightway-lca/better-cas
Browse files Browse the repository at this point in the history
More robust CAS validation
  • Loading branch information
cmutel authored Dec 11, 2024
2 parents bf8aec2 + 3171387 commit 77f0256
Show file tree
Hide file tree
Showing 5 changed files with 372 additions and 15 deletions.
4 changes: 2 additions & 2 deletions bw_simapro_csv/blocks/generic_biosphere.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, List

from ..cas import validate_cas
from ..cas import validate_cas_string
from ..utils import add_amount_or_formula, skip_empty
from .base import SimaProCSVBlock

Expand Down Expand Up @@ -44,7 +44,7 @@ def __init__(self, block: List[tuple], header: dict, category: str):
{
"name": line[0],
"unit": line[1],
"cas_number": validate_cas(line[2]),
"cas_number": validate_cas_string(line[2]),
"comment": line[3],
"line_no": line_no,
}
Expand Down
4 changes: 2 additions & 2 deletions bw_simapro_csv/blocks/impact_category.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List

from ..cas import validate_cas
from ..cas import validate_cas_string
from ..utils import asnumber, jump_to_nonempty, skip_empty
from .base import SimaProCSVBlock

Expand Down Expand Up @@ -43,7 +43,7 @@ def __init__(self, block: List[tuple], header: dict):
{
"context": (line[0], line[1]),
"name": line[2],
"cas_number": validate_cas(line[3]),
"cas_number": validate_cas_string(line[3]),
"factor": asnumber(line[4]),
"unit": line[5],
"line_no": line_no,
Expand Down
54 changes: 43 additions & 11 deletions bw_simapro_csv/cas.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,54 @@
from numbers import Number
from typing import Any, Optional
from typing import Optional

import numpy as np
from loguru import logger


def validate_cas(s: Any) -> Optional[str]:
ERROR = "CAS Check Digit error: CAS '{}' has check digit of {}, but it should be {}"
def calculate_check_digit(cas: str) -> int:
return sum((a + 1) * int(b) for a, b in zip(range(9), cas[-1::-1])) % 10

if isinstance(s, str):
s = s.strip()
if not s:

def validate_cas_string(cas: Optional[str]) -> Optional[str]:
if isinstance(cas, str):
cas = cas.strip()
if not cas:
return None
elif isinstance(s, Number) and np.isnan(s):
elif isinstance(cas, Number) and np.isnan(cas):
return None

total = sum((a + 1) * int(b) for a, b in zip(range(9), s.replace("-", "")[-2::-1]))
if not total % 10 == int(s[-1]):
logger.warning("CAS not valid: {} ({})".format(s, ERROR.format(s, s[-1], total % 10)))
if "-" not in cas:
first, second, check_digit = cas[:-3], cas[-3:-1], int(cas[-1])
if str(calculate_check_digit(first + second)) != str(check_digit):
logger.warning(
"Removing invalid CAS number {}; last digit should be {}".format(
cas, check_digit
)
)
return None
return "-".join([first, second, str(check_digit)]).lstrip("0")
elif cas.count("-") == 2 and not cas.split("-")[2]:
# e.g. 1228284-64-
check_digit = str(calculate_check_digit(cas.replace("-", "")))
logger.warning(
"Adding missing CAS check digit, {} -> {}".format(cas, cas + check_digit)
)
return cas + check_digit
elif cas.count("-") == 2:
first, second, third = cas.split("-")
check_digit = calculate_check_digit(first + second)
if str(check_digit) != third:
logger.warning(
"Removing invalid CAS number {}; last digit should be {}".format(
cas, check_digit
)
)
else:
return cas.lstrip("0")
else:
logger.warning(
"Given CAS can't be validated, wrong number of hyphens are present: {}".format(
cas
)
)
return None
return s.lstrip("0")
293 changes: 293 additions & 0 deletions tests/fixtures/cas_missing_check_number.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
{SimaPro 8.0}
{processes}
{Date: 07.03.2014}
{Time: 15:52:27}
{Project: Test}
{CSV Format version: 7.0.0}
{CSV separator: Semicolon}
{Decimal separator: ,}
{Date separator: .}
{Short date format: dd.MM.yyyy}

Process

PlatformId
298f6b5c-46f5-11ec-81d3-0242ac130003

Category type
material

Process identifier
DefaultX25250700002

Type
Unit process

Process name
Test process

Status
Draft

Time period
2005-2009

Geography
Mixed data

Technology
Worst case

Representativeness
Theoretical calculation

Multiple output allocation
Physical causality

Substitution allocation
Actual substitution

Cut off rules
Less than 1% (physical criteria)

Capital goods
First order (only primary flows)

Boundary with nature
Agricultural production is part of production system

Infrastructure
No

Date
24.02.2014

Record
data entry by: [System]

Generator
generated by: [System]

Literature references
Ecoinvent 3;is copyright protected: false

Collection method
text for collection method

Data treatment
text for data treatment

Verification
text for verification

Comment
text for comment

Allocation rules
text for allocation rules

System description
U.S. LCI Database;system description comment

Products
my product;kg;0,5;100;not defined;Agricultural;

Avoided products
Wool, at field/US;kg;1;Undefined;0;0;0;

Resources
Acids;;kg;1;Undefined;0;0;0;

Materials/fuels
Soy oil, refined, at plant/kg/RNA;kg;0;Undefined;0;0;0;

Electricity/heat
Electricity, biomass, at power plant/US;kWh;0,1;Undefined;0;0;0;

Emissions to air
(+-)-Citronellol;low. pop.;kg;1;Lognormal;2;0;0;(1,2,3,4,5)with comment

Emissions to water
(1r,4r)-(+)-Camphor;lake;kg;1;Normal;3;0;0;;80db70fc-46d4-11ec-81d3-0242ac130003

Emissions to soil
1'-Acetoxysafrole;forestry;kg;1;Triangle;0;1;5;

Final waste flows
Asbestos;;kg;1;Uniform;0;1;2;

Non material emissions
Noise from bus km;;km;1;Undefined;0;0;0;

Social issues
venting of argon, crude, liquid;;kg;1;Undefined;0;0;0;

Economic issues
Sample economic issue;;kg;1;Undefined;0;0;0;

Waste to treatment
Dummy, Disposal, msw, to sanitary landfill/kg/GLO;kg;1;Undefined;0;0;0;

Input parameters
input_param;23,4;Uniform;0;13;33;No;this is the parameter commentis this presented in multiple lines?we should even be able tu use ���

Calculated parameters
calc_param;input_param ^ 2;comment for calc_param


End


System description

Name
system name

Category
Others

Description
text for description

Sub-systems
text for sub-systems

Cut-off rules
text for cut-off rules

Energy model
text for energy model

Transport model
text for transport model

Waste model
text for waste model

Other assumptions
text for other assumptions

Other information
text for other information

Allocation rules
text for allocation rules

End

Quantities
Mass;Yes
Energy;Yes
Length;Yes

End


Units
kg;Mass;1;kg
g;Mass;0,001;kg
kWh;Energy;3,6;MJ
MJ;Energy;1;MJ
ton;Mass;1000;kg
µg;Mass;0,000000001;kg
mg;Mass;0,000001;kg
GJ;Energy;1000;MJ
J;Energy;0,000001;MJ
kJ;Energy;0,001;MJ
Mtn;Mass;1000000000;kg
PJ;Energy;1000000000;MJ
TJ;Energy;1000000;MJ
kton;Mass;1000000;kg
ng;Mass;1,0E-12;kg
pg;Mass;1,0E-15;kg
MWh;Energy;3600;MJ
lb;Mass;0,4535924;kg
Btu;Energy;0,001055696;MJ
oz;Mass;0,02834952;kg
tn.sh;Mass;907,1848;kg
tn.lg;Mass;1016,047;kg
km;Length;1000;m
m;Length;1;m
cm;Length;0,01;m
dm;Length;0,1;m
mm;Length;0,001;m
µm;Length;0,000001;m
ft;Length;0,3048;m
inch;Length;0,0254;m
yard;Length;0,9144;m
mile;Length;1609,35;m
kcal;Energy;0,0041855;MJ
Wh;Energy;0,0036;MJ

End


Raw materials
Acids;kg;;

End


Airborne emissions
(+-)-Citronellol;kg;026489-01-0;

End


Waterborne emissions
(1r,4r)-(+)-Camphor;kg;000464-49-3;;80db70fc-46d4-11ec-81d3-0242ac130003

End


Final waste flows
Asbestos;kg;;

End


Emissions to soil
Prothioconazole;kg;178928-70-5;Formula: C14H15Cl2N3OS
Pydiflumetofen;kg;1228284-64-;Formula: C16H16Cl3F2N3O2


End


Non material emissions
Noise from bus km;km;;

End


Social issues
venting of argon, crude, liquid;kg;;

End


Economic issues
Sample economic issue;kg;;

End


Database Input parameters
db_input_param;1;Lognormal;1;0;0;No;database parameter

End

Database Calculated parameters
db_calc_param;db_input_param * 3;calculated database parameter

End

Project Input parameters
proj_input_param;32;Uniform;0;10;35;No;project input parameter

End

Project Calculated parameters
proj_calc_param;db_input_param *4;project calculated parameter

End
Loading

0 comments on commit 77f0256

Please sign in to comment.