diff --git a/requirements.txt b/requirements.txt index f038d89..a1482d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ pluggy==1.5.0 psycopg2-binary==2.9.10 pytest==8.3.5 rda-python-globus +unidecode diff --git a/src/rda_python_common/PgLOG.py b/src/rda_python_common/PgLOG.py index 54ce0e2..c08b3e3 100644 --- a/src/rda_python_common/PgLOG.py +++ b/src/rda_python_common/PgLOG.py @@ -28,6 +28,7 @@ import socket import shutil import traceback +from unidecode import unidecode # define some constants for logging actions MSGLOG = (0x00001) # logging message @@ -1579,33 +1580,16 @@ def check_process_host(hosts, chost = None, mflag = None, pinfo = None, logact = return ret # -# convert special characters +# convert special foreign characters into ascii characters # def convert_chars(name, default = 'X'): - if not name: return default - if re.match(r'^[a-zA-Z0-9]+$', name): return name # no need convert - - z = ord('z') - newchrs = ochrs = '' - for i in range(len(name)): - ch = name[i] - if re.match(r'^[a-zA-Z0-9]$', ch): - newchrs += ch - elif (ch == ' ' or ch == '_') and newchrs: - newchrs += '_' - elif ord(ch) > z and ochrs != None: - if not ochrs: - ochrs = None - with open(PGLOG['DSSHOME'] + "/lib/ExtChrs.txt", "r") as CHR: - ochrs = CHR.readline() - nchrs = CHR.readline() - if ochrs is None: continue - idx = ochrs.find(ch) - if idx >= 0: newchrs += nchrs[idx] - - if newchrs: - return newchrs + if re.match(r'^[a-zA-Z0-9]+$', name): return name # conversion not needed + decoded_name = unidecode(name).strip() + # remove any non-alphanumeric and non-underscore characters + cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '', decoded_name) + if cleaned_name: + return cleaned_name else: return default