diff --git a/pyproject.toml b/pyproject.toml index ea7ca74..e4ad4e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "rda_python_common" -version = "1.0.56" +version = "2.0.0" authors = [ { name="Zaihua Ji", email="zji@ucar.edu" }, ] @@ -33,3 +33,4 @@ pythonpath = [ [project.scripts] pgpassword = "rda_python_common.pgpassword:main" +pg_pass = "rda_python_common.pg_pass:main" diff --git a/src/rda_python_common/PgLOG.py b/src/rda_python_common/PgLOG.py index 8e19d98..f525789 100644 --- a/src/rda_python_common/PgLOG.py +++ b/src/rda_python_common/PgLOG.py @@ -1380,7 +1380,7 @@ def set_common_pglog(): SETPGLOG("TMPDIR", '') if not PGLOG['TMPDIR']: - PGLOG['TMPDIR'] = "/lustre/desc1/gdex/scratch/" + PGLOG['CURUID'] + PGLOG['TMPDIR'] = "/lustre/desc1/scratch/" + PGLOG['CURUID'] os.environ['TMPDIR'] = PGLOG['TMPDIR'] # empty diretory for HOST-sync diff --git a/src/rda_python_common/PgSIG.py b/src/rda_python_common/PgSIG.py index abf27ba..4efe376 100644 --- a/src/rda_python_common/PgSIG.py +++ b/src/rda_python_common/PgSIG.py @@ -190,10 +190,10 @@ def check_daemon(aname, uname = None): if uname: check_vuser(uname, aname) pcmd = "ps -u {} -f | grep {} | grep ' 1 '".format(uname, aname) - mp = "^\s*{}\s+(\d+)\s+1\s+".format(uname) + mp = r"^\s*{}\s+(\d+)\s+1\s+".format(uname) else: pcmd = "ps -C {} -f | grep ' 1 '".format(aname) - mp = "^\s*\w+\s+(\d+)\s+1\s+" + mp = r"^\s*\w+\s+(\d+)\s+1\s+" buf = PgLOG.pgsystem(pcmd, PgLOG.LOGWRN, 20+1024) if buf: @@ -220,10 +220,10 @@ def check_application(aname, uname = None, sargv = None): if uname: check_vuser(uname, aname) pcmd = "ps -u {} -f | grep {} | grep -v ' grep '".format(uname, aname) - mp = "^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname) + mp = r"^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname) else: pcmd = "ps -C {} -f".format(aname) - mp = "^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname) + mp = r"^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname) buf = PgLOG.pgsystem(pcmd, PgLOG.LOGWRN, 20+1024) if not buf: return 0 @@ -293,10 +293,10 @@ def check_multiple_application(aname, uname = None, sargv = None): if uname: check_vuser(uname, aname) pcmd = "ps -u {} -f | grep {} | grep -v ' grep '".format(uname, aname) - mp = "^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname) + mp = r"^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname) else: pcmd = "ps -C {} -f".format(aname) - mp = "^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname) + mp = r"^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname) buf = PgLOG.pgsystem(pcmd, PgLOG.LOGWRN, 20+1024) if not buf: return 0 diff --git a/src/rda_python_common/PgSplit.py b/src/rda_python_common/PgSplit.py index 2f19297..906f823 100644 --- a/src/rda_python_common/PgSplit.py +++ b/src/rda_python_common/PgSplit.py @@ -5,8 +5,8 @@ # Author : Zaihua Ji, zji@ucar.edu # Date : 09/010/2024 # 2025-01-10 transferred to package rda_python_common from -# https://github.com/NCAR/rda-shared-libraries.git -# Purpose : Python library module to handle query and manipulate table wfile +# https://github.com/NCAR/rßda-shared-libraries.git +# Purpose : Python library module to handle query and manß ipulate table wfile # # Github : https://github.com/NCAR/rda-python-common.git # @@ -106,7 +106,7 @@ def trim_wfile_fields(wfrecs): def get_dsid_condition(dsid, condition): if condition: - if re.search('(^|.| )(wid|dsid)\s*=', condition): + if re.search(r'(^|.| )(wid|dsid)\s*=', condition): return condition else: dscnd = "wfile.dsid = '{}' ".format(dsid) diff --git a/src/rda_python_common/pg_cmd.py b/src/rda_python_common/pg_cmd.py new file mode 100644 index 0000000..db1dfaa --- /dev/null +++ b/src/rda_python_common/pg_cmd.py @@ -0,0 +1,493 @@ +# +############################################################################### +# +# Title : pg_cmd.py +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/25/2020 +# 2025-01-10 transferred to package rda_python_common from +# https://github.com/NCAR/rda-shared-libraries.git +# Purpose : python library module for functions to record commands for delayed +# mode or command recovery +# +# Github : https://github.com/NCAR/rda-python-common.git +# +############################################################################### +# +import os +import re +import sys +import time +from .pg_lock import PgLock + +class PgCMD(PgLock): + + def __init__(self): + super().__init__() # initialize parent class + # cached dscheck info + self.DSCHK = {} + self.BOPTIONS = {"hostname" : None, "qoptions" : None, "modules" : None, "environments" : None} + self.BFIELDS = ', '.join(self.BOPTIONS) + self.TRYLMTS = { + 'dsquasar' : 3, + 'dsarch' : 2, + 'default' : 1 + } + self.DLYPTN = r'(^|\s)-(d|BP|BatchProcess|DelayedMode)(\s|$)' + self.DLYOPT = { + 'dsarch' : ' -d', + 'dsupdt' : ' -d', + 'dsrqst' : ' -d' + } + + # params: dict array holding option values + # opt: 2 - each value of the dict array is a list; otherwise 1 + # addhost: 1 to add host name too + # initial set Batch options passed in from command line + def set_batch_options(self, params, opt, addhost = 0): + if 'QS' in params: self.BOPTIONS['qoptions'] = (params['QS'][0] if opt == 2 else params['QS']) + if 'MO' in params: self.BOPTIONS['modules'] = (params['MO'][0] if opt == 2 else params['MO']) + if 'EV' in params: self.BOPTIONS['environments'] = (params['EV'][0] if opt == 2 else params['EV']) + if addhost and 'HN' in params: self.BOPTIONS['hostname'] = (params['HN'][0] if opt == 2 else params['HN']) + + # boptions: dict array holding batch options + # refresh: 1 to clean the previous cached global batch options + # checkkey: 1 to check and valid pre-defined fields + # fill Batch options recorded in RDADB + def fill_batch_options(self, boptions, refresh = 0, checkkey = 0): + if refresh: + for bkey in self.BOPTIONS: + self.BOPTIONS[bkey] = None # clean the hash before filling it up + if not boptions: return + for bkey in boptions: + if not checkkey or bkey in self.BOPTIONS: + self.BOPTIONS[bkey] = boptions[bkey] + + # bkey: batch option field name + # bval: batch option value + # override: 1 to override an existing option + # fill a single Batch option + def set_one_boption(self, bkey, bval, override = 0): + if bval: + if override or not ( bkey in self.BOPTIONS and self.BOPTIONS[bkey]): self.BOPTIONS[bkey] = bval + elif override and bkey in self.BOPTIONS and self.BOPTIONS[bkey]: + self.BOPTIONS[bkey] = None + + # fill the passed in dict record with the pre-saved batch options + def get_batch_options(self, pgrec = None): + record = {} + for bkey in self.BOPTIONS: + if pgrec and bkey in pgrec and pgrec[bkey]: + record[bkey] = pgrec[bkey] + elif self.BOPTIONS[bkey]: + record[bkey] = self.BOPTIONS[bkey] + return record + + # return delay mode option to append to argv string for a specified cmd + def append_delayed_mode(self, cmd, argv): + if cmd in self.DLYOPT and not re.search(self.DLYPTN, argv, re.I): + return self.DLYOPT[cmd] + else: + return '' + + # check given doptions and cmd, and return the try limit and specified hosts + def get_delay_options(self, doptions, cmd): + mcount = 0 + hosts = None + if doptions: + for bval in doptions: + if re.match(r'^(\d+)$', bval): + mcount = int(bval) + if mcount > 99: mcount = 99 + else: + hosts = bval + if mcount == 0: mcount = self.get_try_limit(cmd) + if hosts: self.set_one_boption('hostname', hosts, 1) + return (mcount, hosts) + + # find an existing dscheck record from the cached command argument; create and initialize one if not exist + def init_dscheck(self, oindex, otype, cmd, dsid, action, workdir = None, specialist = None, doptions = None, logact = 0): + cidx = 0 + argv = self.argv_to_string(sys.argv[1:], 0, "Process in Delayed Mode") + argextra = None + if not logact: logact = self.LGEREX + if not workdir: workdir = os.getcwd() + if not specialist: specialist = self.PGLOG['CURUID'] + (mcount, hosts) = self.get_delay_options(doptions, cmd) + if len(argv) > 100: + argextra = argv[100:] + argv = argv[0:100] + bck = self.PGLOG['BCKGRND'] + self.PGLOG['BCKGRND'] = 0 + cinfo = "{}-{}-Chk".format(self.PGLOG['HOSTNAME'], self.current_datetime()) + pgrec = self.get_dscheck(cmd, argv, workdir, specialist, argextra, logact) + if pgrec: # found existing dscheck record + cidx = pgrec['cindex'] + cmsg = "{}{}: {} batch process ".format(cinfo, cidx, self.get_command_info(pgrec)) + cidx = self.lock_dscheck(cidx, 1, self.LOGWRN) + if cidx < 0: + self.pglog(cmsg + "is Running, No restart", self.LOGWRN) + sys.exit(0) + if cidx > 0: + if not hosts and pgrec['hostname']: + hosts = pgrec['hostname'] + self.set_one_boption('hostname', hosts, 0) + if mcount: pgrec['mcount'] = mcount + self.DSCHK['chkcnd'] = "cindex = {}".format(cidx) + if(pgrec['status'] == 'D' or pgrec['fcount'] and pgrec['dcount'] >= pgrec['fcount'] or + pgrec['tcount'] > pgrec['mcount'] or not pgrec['pid'] and pgrec['tcount'] == pgrec['mcount']): + self.pglog("{}is {}".format(cmsg, ('Done' if pgrec['status'] == 'D' else 'Finished')), self.LOGWRN) + self.lock_dscheck(cidx, 0, logact) + sys.exit(0) + if not cidx: # add new dscheck record + record = {} + if hosts and re.match(r'^(ds\d|\d)\d\d.\d$', hosts): + self.pglog(hosts + ": Cannot pass DSID for hostname to submit batch process", self.LGEREX) + if oindex: self.set_command_control(oindex, otype, cmd, logact) + record['oindex'] = oindex + record['dsid'] = dsid + record['action'] = action + record['otype'] = otype + (record['date'], record['time']) = self.get_date_time() + record['command'] = cmd + record['argv'] = argv + if mcount > 0: record['mcount'] = mcount + record['specialist'] = specialist + record['workdir'] = workdir + if argextra: record['argextra'] = argextra + record.update(self.get_batch_options()) + cidx = self.pgadd("dscheck", record, logact|self.AUTOID) + if cidx: + cmsg = "{}{}: {} Adds a new check".format(cinfo, cidx, self.get_command_info(record)) + self.pglog(cmsg, self.LOGWRN) + sys.exit(0) + + (chost, cpid) = self.current_process_info() + (rhost, rpid) = self.current_process_info(1) + + if not self.check_command_specialist_host(hosts, chost, specialist, cmd, action, self.LOGERR): + self.lock_dscheck(cidx, 0, logact) + sys.exit(1) + + record = {} + record['status'] = "R" + if mcount > 0: record['mcount'] = mcount + record['bid'] = (cpid if self.PGLOG['CURBID'] else 0) + if pgrec['stttime'] and pgrec['chktime'] > pgrec['stttime']: + (record['ttltime'], record['quetime']) = self.get_dscheck_runtime(pgrec) + record['chktime'] = record['stttime'] = int(time.time()) + if not pgrec['subtime']: record['subtime'] = record['stttime'] + if dsid and not pgrec['dsid']: record['dsid'] = dsid + if action and not pgrec['action']: record['action'] = action + if oindex and not pgrec['oindex']: record['oindex'] = oindex + if otype and not pgrec['otype']: record['otype'] = otype + if argv and not pgrec['argv']: record['argv'] = argv + record['runhost'] = rhost + if pgrec['command'] == "dsrqst" and pgrec['oindex']: + (record['fcount'], record['dcount'], record['size']) = self.get_dsrqst_counts(pgrec, logact) + self.pgupdt("dscheck", record, self.DSCHK['chkcnd'], logact) + self.DSCHK['dcount'] = pgrec['dcount'] + self.DSCHK['fcount'] = pgrec['fcount'] + self.DSCHK['size'] = pgrec['size'] + self.DSCHK['cindex'] = cidx + self.DSCHK['dflags'] = pgrec['dflags'] + self.PGLOG['DSCHECK'] = self.DSCHK # add global access link + if not self.PGLOG['BCKGRND']: self.PGLOG['BCKGRND'] = 1 # turn off screen output if not yet + tcnt = pgrec['tcount'] + if not pgrec['pid']: tcnt += 1 + tstr = "the {} run".format(self.int2order(tcnt)) if tcnt > 1 else "running" + pstr = "{}<{}>".format(chost, cpid) + if rhost != chost: pstr += "/{}<{}>".format(rhost, rpid) + self.pglog("{}Starts {} ({})".format(cmsg, tstr, pstr), self.LOGWRN) + self.PGLOG['BCKGRND'] = bck + return cidx + + # check and validate if the current host is configured for the specialist + def check_command_specialist_host(self, hosts, chost, specialist, cmd, act = 0, logact = 0): + if cmd == 'dsrqst' and act == 'PR': + mflag = 'G' + else: + cnd = "command = '{}' AND specialist = '{}' AND hostname = '{}'".format(cmd, specialist, chost) + pgrec = self.pgget("dsdaemon", 'matchhost', cnd, logact) + mflag = (pgrec['matchhost'] if pgrec else 'G') + return self.check_process_host(hosts, chost, mflag, "{}-{}".format(specialist, cmd), logact) + + # set command control info + def set_command_control(self, oindex, otype, cmd, logact = 0): + if not oindex: return + pgctl = None + if cmd == "dsrqst": + if otype == 'P': + pgrec = self.pgget("ptrqst", "rindex", "pindex = {}".format(oindex), logact) + if pgrec: pgctl = self.get_partition_control(pgrec, None, None, logact) + else: + pgrec = self.pgget("dsrqst", "dsid, gindex, cindex, rqsttype", "rindex = {}".format(oindex), logact) + if pgrec: pgctl = self.get_dsrqst_control(pgrec, logact) + elif cmd == "dsupdt": + if otype == 'L': + pgrec = self.pgget("dlupdt", "cindex", "lindex = {}".format(oindex), logact) + if not (pgrec and pgrec['cindex']): return + oindex = pgrec['cindex'] + pgctl = self.pgget("dcupdt", self.BFIELDS, "cindex = {}".format(oindex), logact) + if pgctl: + for bkey in pgctl: + self.set_one_boption(bkey, pgctl[bkey], 0) + + # get dsrqst control info + def get_dsrqst_control(self, pgrqst, logact = 0): + cflds = self.BFIELDS + if 'ptcount' in pgrqst and pgrqst['ptcount'] == 0: cflds += ", ptlimit, ptsize" + if pgrqst['cindex']: + pgctl = self.pgget("rcrqst", cflds, "cindex = {}".format(pgrqst['cindex']), logact) + else: + pgctl = None + if not pgctl: + gcnd = "dsid = '{}' AND gindex = ".format(pgrqst['dsid']) + if pgrqst['rqsttype'] in "ST": + tcnd = " AND (rqsttype = 'T' OR rqsttype = 'S')" + else: + tcnd = " AND rqsttype = '{}'".format(pgrqst['rqsttype']) + gindex = pgrqst['gindex'] + while True: + pgctl = self.pgget("rcrqst", cflds, "{}{}{}".format(gcnd, gindex, tcnd), logact) + if pgctl or not gindex: break + pgctl = self.pgget("dsgroup", "pindex", "{}{}".format(gcnd, gindex), logact) + if not pgctl: break + gindex = pgctl['pindex'] + return pgctl + + # get dsrqst partition control info + def get_partition_control(self, pgpart, pgrqst = None, pgctl = None, logact = 0): + if not pgctl: + if not pgrqst and pgpart['rindex']: + pgrqst = self.pgget("dsrqst", "dsid, gindex, cindex, rqsttype", "rindex = {}".format(pgpart['rindex']), logact) + if pgrqst: pgctl = self.get_dsrqst_control(pgrqst, logact) + return pgctl + + # build the dynamic options + def get_dynamic_options(self, cmd, oindex, otype): + + if oindex: cmd += " {}".format(oindex) + if otype: cmd += ' ' + otype + ret = options = '' + for loop in range(3): + ret = self.pgsystem(cmd, self.LOGWRN, 279) # 1+2+4+16+256 + if loop < 2 and self.PGLOG['SYSERR'] and 'Connection timed out' in self.PGLOG['SYSERR']: + time.sleep(self.PGSIG['ETIME']) + else: + break + if ret: + ret = ret.strip() + ms = re.match(r'^(-.+)/(-.+)$', ret) + if ms: + options = ms.group(1) if otype == 'R' else ms.group(2) + elif re.match(r'^(-.+)$', ret): + options = ret + if not options: + if ret: self.PGLOG['SYSERR'] += ret + self.PGLOG['SYSERR'] += " for {}".format(cmd) + + return options + + # retrieve a dscheck record for provided cmd, argv and other conditions + def get_dscheck(self, cmd, argv, workdir, specialist, argextra = None, logact = 0): + cnd = "command = '{}' AND specialist = '{}' AND argv = '{}'".format(cmd, specialist, argv) + pgrecs = self.pgmget("dscheck", "*", cnd, logact) + cnt = len(pgrecs['cindex']) if pgrecs else 0 + if cnt == 0 and cmd in self.DLYOPT: + ms = re.match(r'^(.+){}$'.format(self.DLYOPT[cmd]), argv) + if ms: + argv = ms.group(1) + cnt = 1 + elif not argextra: + dopt = self.append_delayed_mode(cmd, argv) + if dopt: + argv += dopt + cnt = 1 + if cnt: + cnd = "command = '{}' AND specialist = '{}' AND argv = '{}'".format(cmd, specialist, argv) + pgrecs = self.pgmget("dscheck", "*", cnd, logact) + cnt = len(pgrecs['cindex']) if pgrecs else 0 + for i in range(cnt): + pgrec = self.onerecord(pgrecs, i) + if pgrec['workdir'] and self.pgcmp(workdir, pgrec['workdir']): continue + if self.pgcmp(argextra, pgrec['argextra']): continue + return pgrec + return None + + # delete one dsceck record fo given cindex + def delete_dscheck(self, pgrec, chkcnd, logact = 0): + if not chkcnd: + if pgrec: + chkcnd = "cindex = {}".format(pgrec['cindex']) + elif 'chkcnd' in self.DSCHK: + chkcnd = self.DSCHK['chkcnd'] + else: + return 0 # nothing to delete + if not pgrec: + pgrec = self.pgget("dscheck", "*", chkcnd, logact) + if not pgrec: return 0 # dscheck record is gone + record = {} + record['cindex'] = pgrec['cindex'] + record['command'] = pgrec['command'] + record['dsid'] = (pgrec['dsid'] if pgrec['dsid'] else self.PGLOG['DEFDSID']) + record['action'] = (pgrec['action'] if pgrec['action'] else "UN") + record['specialist'] = pgrec['specialist'] + record['hostname'] = pgrec['runhost'] + if pgrec['bid']: record['bid'] = pgrec['bid'] + if pgrec['command'] == "dsrqst" and pgrec['oindex']: + (record['fcount'], record['dcount'], record['size']) = self.get_dsrqst_counts(pgrec, logact) + else: + record['fcount'] = pgrec['fcount'] + record['dcount'] = pgrec['dcount'] + record['size'] = pgrec['size'] + record['tcount'] = pgrec['tcount'] + record['date'] = pgrec['date'] + record['time'] = pgrec['time'] + record['closetime'] = self.curtime(1) + (record['ttltime'], record['quetime']) = self.get_dscheck_runtime(pgrec) + record['argv'] = pgrec['argv'] + if pgrec['argextra']: + record['argv'] += pgrec['argextra'] + if len(record['argv']) > 255: record['argv'] = record['argv'][0:255] + if pgrec['errmsg']: record['errmsg'] = pgrec['errmsg'] + record['status'] = ('F' if pgrec['status'] == "R" else pgrec['status']) + if self.pgget("dschkhist", "", chkcnd): + stat = self.pgupdt("dschkhist", record, chkcnd, logact) + else: + stat = self.pgadd("dschkhist", record, logact) + if stat: + cmsg = "{} cleaned as '{}' at {} on {}".format(self.get_command_info(pgrec), record['status'], self.current_datetime(), self.PGLOG['HOSTNAME']) + self.pglog("Chk{}: {}".format(pgrec['cindex'], cmsg), self.LOGWRN|self.FRCLOG) + stat = self.pgdel("dscheck", chkcnd, logact) + if record['status'] == "E" and 'errmsg' in record: + self.pglog("Chk{}: {} Exits with Error\n{}".format(pgrec['cindex'], self.get_command_info(pgrec), record['errmsg']), logact) + return stat + + # get dsrqst fcount and dcount + def get_dsrqst_counts(self, pgchk, logact = 0): + fcount = pgchk['fcount'] + dcount = pgchk['dcount'] + size = pgchk['size'] + if pgchk['otype'] == 'P': + table = 'ptrqst' + cnd = "pindex = {}".format(pgchk['oindex']) + fields = "fcount" + else: + table = 'dsrqst' + cnd = "rindex = {}".format(pgchk['oindex']) + fields = "fcount, pcount, size_input, size_request" + pgrec = self.pgget(table, fields, cnd, logact) + if pgrec: + fcnt = pgrec['fcount'] + else: + fcnt = 0 + pgrec = {'fcount' : 0} + if not fcnt: fcnt = self.pgget("wfrqst", "", cnd, logact) + if fcnt and fcount != fcnt: fcount = fcnt + if fcount: + if 'pcount' in pgrec and pgrec['pcount']: + dcnt = pgrec['pcount'] + else: + dcnt = self.pgget("wfrqst", "", cnd + " AND status = 'O'", logact) + if dcnt and dcnt != dcount: dcount = dcnt + if not size: + if 'size_input' in pgrec and pgrec['size_input']: + if size != pgrec['size_input']: size = pgrec['size_input'] + elif 'size_request' in pgrec and pgrec['size_request']: + if size != pgrec['size_request']: size = pgrec['size_request'] + elif fcnt: # evaluate total size only if file count is set in request/partition record + pgrec = self.pgget("wfrqst", "sum(size) data_size", cnd, logact) + if pgrec and pgrec['data_size']: size = pgrec['data_size'] + return (fcount, dcount, size) + + # set dscheck fcount + def set_dscheck_fcount(self, count, logact = 0): + record = {'fcount' : count, 'chktime' : int(time.time())} + self.pgupdt("dscheck", record, self.DSCHK['chkcnd'], logact) + self.DSCHK['fcount'] = count + return self.DSCHK['dcount'] # return Done count + + # set dscheck dcount + def set_dscheck_dcount(self, count, size, logact = 0): + record = {'dcount' : count, 'size' : size, 'chktime' : int(time.time())} + self.pgupdt("dscheck", record, self.DSCHK['chkcnd'], logact) + self.DSCHK['dcount'] = count + self.DSCHK['size'] = size + return self.DSCHK['dcount'] # return Done count + + # add dscheck dcount + def add_dscheck_dcount(self, count, size, logact = 0): + record = {} + if count: + self.DSCHK['dcount'] += count + record['dcount'] = self.DSCHK['dcount'] + if size: + self.DSCHK['size'] += size + record['size'] = self.DSCHK['size'] + record['chktime'] = int(time.time()) + self.pgupdt("dscheck", record, self.DSCHK['chkcnd'], logact) + return self.DSCHK['dcount'] # return Done count + + # set dscheck source information + def set_dscheck_attribute(self, fname, value, logact = 0): + record = {} + if value: record[fname] = value + record['chktime'] = int(time.time()) + self.pgupdt("dscheck", record, self.DSCHK['chkcnd'], logact) + + # update dscheck status + def record_dscheck_status(self, stat, logact = 0): + pgrec = self.pgget("dscheck", "lockhost, pid", self.DSCHK['chkcnd'], logact) + if not pgrec: return 0 + if not (pgrec['pid'] and pgrec['lockhost']): return 0 + (chost, cpid) = self.current_process_info() + if pgrec['pid'] != cpid or pgrec['lockhost'] != chost: return 0 + + # update dscheck status only if it is still locked by the current process + record = {'status' : stat, 'chktime' : int(time.time()), 'pid' : 0} + return self.pgupdt("dscheck", record, self.DSCHK['chkcnd'], logact) + + # get the number of tries to execute for a given cmd under dscheck control + def get_try_limit(self, cmd): + return (self.TRYLMTS[cmd] if cmd in self.TRYLMTS else self.TRYLMTS['default']) + + # get the execution time for a dscheck command + @staticmethod + def get_dscheck_runtime(pgrec, current = 0): + ttime = (0 if current else pgrec['ttltime']) + qtime = (0 if current else pgrec['quetime']) + if pgrec['subtime']: + ttime += (pgrec['chktime'] - pgrec['subtime']) + if pgrec['stttime']: qtime += (pgrec['stttime'] - pgrec['subtime']) + return (ttime, qtime) + + # retrieve a command string from a given dscheck record + @staticmethod + def get_command_info(pgrec): + if pgrec['oindex']: + if pgrec['command'] == "dsupdt": + cinfo = "UC{}".format(pgrec['oindex']) + elif pgrec['command'] == "dsrqst": + if pgrec['otype'] == "P": + cinfo = "RPT{}".format(pgrec['oindex']) + else: + cinfo = "Rqst{}".format(pgrec['oindex']) + else: + cinfo ="{}-{}".format(pgrec['command'], pgrec['oindex']) + else: + cinfo = pgrec['command'] + if pgrec['dsid']: cinfo += " " + pgrec['dsid'] + if pgrec['action']: cinfo += " " + pgrec['action'] + cinfo += " of " + pgrec['specialist'] + return cinfo + + # change the dscheck original command information + def change_dscheck_oinfo(self, oidx, otype, nidx, ntype): + cnd = "oindex = {} AND otype = '{}'".format(oidx, otype) + pgchk = self.pgget('dscheck', 'cindex, oindex, otype', cnd, self.LGEREX) + if not pgchk: return 0 # miss dscheck record to change + record = {} + self.DSCHK['oindex'] = record['oindex'] = nidx + self.DSCHK['otype'] = record['otype'] = ntype + cnd = "cindex = {}".format(pgchk['cindex']) + return self.pgupdt('dscheck', record, cnd, self.LGEREX) diff --git a/src/rda_python_common/pg_dbi.py b/src/rda_python_common/pg_dbi.py new file mode 100644 index 0000000..69cb123 --- /dev/null +++ b/src/rda_python_common/pg_dbi.py @@ -0,0 +1,1885 @@ +# +############################################################################### +# +# Title : pg_dbi.py -- PostgreSQL DataBase Interface +# Author : Zaihua Ji, zji@ucar.edu +# Date : 06/07/2022 +# 2025-01-10 transferred to package rda_python_common from +# https://github.com/NCAR/rda-shared-libraries.git +# 2025-11-24 convert to class PgDBI +# Purpose : Python library module to handle query and manipulate PostgreSQL database +# +# Github : https://github.com/NCAR/rda-python-common.git +# +############################################################################### + +import os +import re +import time +import hvac +from datetime import datetime +import psycopg2 as PgSQL +from psycopg2.extras import execute_values +from psycopg2.extras import execute_batch +from os import path as op +from .pg_log import PgLOG + +class PgDBI(PgLOG): + + # PostgreSQL specified query timestamp format + fmtyr = lambda fn: "extract(year from {})::int".format(fn) + fmtqt = lambda fn: "extract(quarter from {})::int".format(fn) + fmtmn = lambda fn: "extract(month from {})::int".format(fn) + fmtdt = lambda fn: "date({})".format(fn) + fmtym = lambda fn: "to_char({}, 'yyyy-mm')".format(fn) + fmthr = lambda fn: "extract(hour from {})::int".format(fn) + + def __init__(self): + super().__init__() # initialize parent class + self.pgdb = None # reference to a connected database object + self.curtran = 0 # 0 - no transaction, 1 - in transaction + self.NMISSES = [] # array of mising userno + self.LMISSES = [] # array of mising logname + self.TABLES = {} # record table field information + self.SEQUENCES = {} # record table sequence fielnames + self.SPECIALIST = {} # hash array refrences to specialist info of dsids + self.self.SYSDOWN = {} + self.PGDBI = {} + self.ADDTBLS = [] + self.PGSIGNS = ['!', '<', '>', '<>'] + self.CHCODE = 1042 + # hard coded db ports for dbnames + self.DBPORTS = {'default' : 0} + self.DBPASS = {} + self.DBBAOS = {} + # hard coded db names for given schema names + self.DBNAMES = { + 'ivaddb' : 'ivaddb', + 'cntldb' : 'ivaddb', + 'cdmsdb' : 'ivaddb', + 'ispddb' : 'ispddb', + 'obsua' : 'upadb', + 'default' : 'rdadb', + } + # hard coded socket paths for machine_dbnames + self.DBSOCKS = {'default' : ''} + # home path for check db on alter host + self.VIEWHOMES = {'default' : self.PGLOG['DSSDBHM']} + # add more to the list if used for names + self.PGRES = ['end', 'window'] + self.SETPGDBI('DEFDB', 'rdadb') + self.SETPGDBI("DEFSC", 'dssdb') + self.SETPGDBI('DEFHOST', self.PGLOG['PSQLHOST']) + self.SETPGDBI("DEFPORT", 0) + self.SETPGDBI("DEFSOCK", '') + self.SETPGDBI("DBNAME", self.PGDBI['DEFDB']) + self.SETPGDBI("SCNAME", self.PGDBI['DEFSC']) + self.SETPGDBI("LNNAME", self.PGDBI['DEFSC']) + self.SETPGDBI("PWNAME", None) + self.SETPGDBI("DBHOST", (os.environ['DSSDBHOST'] if os.environ.get('DSSDBHOST') else self.PGDBI['DEFHOST'])) + self.SETPGDBI("DBPORT", 0) + self.SETPGDBI("ERRLOG", self.LOGERR) # default error logact + self.SETPGDBI("EXITLG", self.LGEREX) # default exit logact + self.SETPGDBI("DBSOCK", '') + self.SETPGDBI("DATADIR", self.PGLOG['DSDHOME']) + self.SETPGDBI("BCKPATH", self.PGLOG['DSSDBHM'] + "/backup") + self.SETPGDBI("SQLPATH", self.PGLOG['DSSDBHM'] + "/sql") + self.SETPGDBI("VWNAME", self.PGDBI['DEFSC']) + self.SETPGDBI("VWPORT", 0) + self.SETPGDBI("VWSOCK", '') + self.SETPGDBI("BAOURL", 'https://bao.k8s.ucar.edu/') + + self.PGDBI['DBSHOST'] = self.get_short_host(self.PGDBI['DBHOST']) + self.PGDBI['DEFSHOST'] = self.get_short_host(self.PGDBI['DEFHOST']) + self.PGDBI['VWHOST'] = self.PGLOG['PVIEWHOST'] + self.PGDBI['MSHOST'] = self.PGLOG['PMISCHOST'] + self.PGDBI['VWSHOST'] = self.get_short_host(self.PGDBI['VWHOST']) + self.PGDBI['MSSHOST'] = self.get_short_host(self.PGDBI['MSHOST']) + self.PGDBI['VWHOME'] = (self.VIEWHOMES[self.PGLOG['HOSTNAME']] if self.PGLOG['HOSTNAME'] in self.VIEWHOMES else self.VIEWHOMES['default']) + self.PGDBI['SCPATH'] = None # additional schema path for set search_path + self.PGDBI['VHSET'] = 0 + self.PGDBI['PGSIZE'] = 1000 # number of records for page_size + self.PGDBI['MTRANS'] = 5000 # max number of changes in one transactions + self.PGDBI['MAXICNT'] = 6000000 # maximum number of records in each table + + # set environments and defaults + def SETPGDBI(self, name, value): + self.PGDBI[name] = self.get_environment(name, value) + + # create a pgddl command string with + # table name (tname), prefix (pre) and suffix (suf) + def get_pgddl_command(self, tname, pre = None, suf = None, scname = None): + ms = re.match(r'^(.+)\.(.+)$', tname) + if not scname: + if ms: + scname = ms.group(1) + tname = ms.group(2) + else: + scname = self.PGDBI['SCNAME'] + xy = '' + if suf: xy += ' -x ' + suf + if pre: xy += ' -y ' + pre + return "pgddl {} -aa -h {} -d {} -c {} -u {}{}".format(tname, self.PGDBI['DBHOST'], self.PGDBI['DBNAME'], scname, self.PGDBI['LNNAME'], xy) + + # set default connection for dssdb PostgreSQL Server + def dssdb_dbname(self): + self.default_scinfo(self.PGDBI['DEFDB'], self.PGDBI['DEFSC'], self.PGLOG['PSQLHOST']) + dssdb_scname = dssdb_dbname + + # set default connection for obsua PostgreSQL Server + def obsua_dbname(self): + self.default_scinfo('upadb', 'obsua', self.PGLOG['PMISCHOST']) + obsua_scname = obsua_dbname + + # set default connection for ivaddb PostgreSQL Server + def ivaddb_dbname(self): + self.default_scinfo('ivaddb', 'ivaddb', self.PGLOG['PMISCHOST']) + ivaddb_scname = ivaddb_dbname + + # set default connection for ispddb PostgreSQL Server + def ispddb_dbname(self): + self.default_scinfo('ispddb', 'ispddb', self.PGLOG['PMISCHOST']) + ispddb_scname = ispddb_dbname + + # set a default schema info with hard coded info + def default_dbinfo(self, scname = None, dbhost = None, lnname = None, pwname = None, dbport = None, socket = None): + return self.default_scinfo(self.get_dbname(scname), scname, dbhost, lnname, pwname, dbport, socket) + + # set default database/schema info with hard coded info + def default_scinfo(self, dbname = None, scname = None, dbhost = None, lnname = None, pwname = None, dbport = None, socket = None): + if not dbname: dbname = self.PGDBI['DEFDB'] + if not scname: scname = self.PGDBI['DEFSC'] + if not dbhost: dbhost = self.PGDBI['DEFHOST'] + if dbport is None: dbport = self.PGDBI['DEFPORT'] + if socket is None: socket = self.PGDBI['DEFSOCK'] + self.set_scname(dbname, scname, lnname, pwname, dbhost, dbport, socket) + + # get the datbase sock file name of a given dbname for local connection + def get_dbsock(self, dbname): + return (self.DBSOCKS[dbname] if dbname in self.DBSOCKS else self.DBSOCKS['default']) + + # get the datbase port number of a given dbname for remote connection + def get_dbport(self, dbname): + return (self.DBPORTS[dbname] if dbname in self.DBPORTS else self.DBPORTS['default']) + + # get the datbase name of a given schema name for remote connection + def get_dbname(self, scname): + if scname: + if scname in self.DBNAMES: return self.DBNAMES[scname] + return self.DBNAMES['default'] + return None + + # set connection for viewing database information + def view_dbinfo(self, scname = None, lnname = None, pwname = None): + self.view_scinfo(self.get_dbname(scname), scname, lnname, pwname) + + # set connection for viewing database/schema information + def view_scinfo(self, dbname = None, scname = None, lnname = None, pwname = None): + if not dbname: dbname = self.PGDBI['DEFDB'] + if not scname: scname = self.PGDBI['DEFSC'] + self.set_scname(dbname, scname, lnname, pwname, self.PGLOG['PVIEWHOST'], self.PGDBI['VWPORT']) + + # set connection for given scname + def set_dbname(self, scname = None, lnname = None, pwname = None, dbhost = None, dbport = None, socket = None): + if not scname: scname = self.PGDBI['DEFSC'] + self.set_scname(self.get_dbname(scname), scname, lnname, pwname, dbhost, dbport, socket) + + # set connection for given database & schema names + def set_scname(self, dbname = None, scname = None, lnname = None, pwname = None, dbhost = None, dbport = None, socket = None): + changed = 0 + if dbname and dbname != self.PGDBI['DBNAME']: + self.PGDBI['DBNAME'] = dbname + changed = 1 + if scname and scname != self.PGDBI['SCNAME']: + self.PGDBI['LNNAME'] = self.PGDBI['SCNAME'] = scname + changed = 1 + if lnname and lnname != self.PGDBI['LNNAME']: + self.PGDBI['LNNAME'] = lnname + changed = 1 + if pwname != self.PGDBI['PWNAME']: + self.PGDBI['PWNAME'] = pwname + changed = 1 + if dbhost and dbhost != self.PGDBI['DBHOST']: + self.PGDBI['DBHOST'] = dbhost + self.PGDBI['DBSHOST'] = self.get_short_host(dbhost) + changed = 1 + if self.PGDBI['DBSHOST'] == self.PGLOG['HOSTNAME']: + if socket is None: socket = self.get_dbsock(dbname) + if socket != self.PGDBI['DBSOCK']: + self.PGDBI['DBSOCK'] = socket + changed = 1 + else: + if not dbport: dbport = self.get_dbport(dbname) + if dbport != self.PGDBI['DBPORT']: + self.PGDBI['DBPORT'] = dbport + changed = 1 + if changed and self.pgdb is not None: self.pgdisconnect(1) + + # start a database transaction and exit if fails + def starttran(self): + if self.curtran == 1: self.endtran() # try to end previous transaction + if not self.pgdb: + self.pgconnect(0, 0, False) + else: + try: + self.pgdb.isolation_level + except PgSQL.OperationalError as e: + self.pgconnect(0, 0, False) + if self.pgdb.closed: + self.pgconnect(0, 0, False) + elif self.pgdb.autocommit: + self.pgdb.autocommit = False + self.curtran = 1 + + # end a transaction with changes committed and exit if fails + def endtran(self, autocommit = True): + if self.curtran and self.pgdb: + if not self.pgdb.closed: self.pgdb.commit() + self.pgdb.autocommit = autocommit + self.curtran = 0 if autocommit else 1 + + # end a transaction without changes committed and exit inside if fails + def aborttran(self, autocommit = True): + if self.curtran and self.pgdb: + if not self.pgdb.closed: self.pgdb.rollback() + self.pgdb.autocommit = autocommit + self.curtran = 0 if autocommit else 1 + + # record error message to dscheck record and clean the lock + def record_dscheck_error(self, errmsg, logact = None): + if logact is None: logact = self.PGDBI['EXITLG'] + cnd = self.PGLOG['DSCHECK']['chkcnd'] + if self.PGLOG['NOQUIT']: self.PGLOG['NOQUIT'] = 0 + dflags = self.PGLOG['DSCHECK']['dflags'] + pgrec = self.pgget("dscheck", "mcount, tcount, lockhost, pid", cnd, logact) + if not pgrec: return 0 + if not pgrec['pid'] and not pgrec['lockhost']: return 0 + (chost, cpid) = self.current_process_info() + if pgrec['pid'] != cpid or pgrec['lockhost'] != chost: return 0 + # update dscheck record only if it is still locked by the current process + record = {} + record['chktime'] = int(time.time()) + if logact&self.EXITLG: + record['status'] = "E" + record['pid'] = 0 # release lock + if dflags: + record['dflags'] = dflags + record['mcount'] = pgrec['mcount'] + 1 + else: + record['dflags'] = '' + if errmsg: + errmsg = self.break_long_string(errmsg, 512, None, 50, None, 50, 25) + if pgrec['tcount'] > 1: errmsg = "Try {}: {}".format(pgrec['tcount'], errmsg) + record['errmsg'] = errmsg + return self.pgupdt("dscheck", record, cnd, logact) + + # local function to log query error + def qelog(self, dberror, sleep, sqlstr, vals, pgcnt, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + retry = " Sleep {}(sec) & ".format(sleep) if sleep else " " + if sqlstr: + if sqlstr.find("Retry ") == 0: + retry += "the {} ".format(self.int2order(pgcnt+1)) + elif sleep: + retry += "the {} Retry: \n".format(self.int2order(pgcnt+1)) + elif pgcnt: + retry = " Error the {} Retry: \n".format(self.int2order(pgcnt)) + else: + retry = "\n" + sqlstr = retry + sqlstr + else: + sqlstr = '' + if vals: sqlstr += " with values: " + str(vals) + if dberror: sqlstr = "{}\n{}".format(dberror, sqlstr) + if logact&self.EXITLG and self.PGLOG['DSCHECK']: self.record_dscheck_error(sqlstr, logact) + self.pglog(sqlstr, logact) + if sleep: time.sleep(sleep) + return self.FAILURE # if not exit in self.pglog() + + # try to add a new table according the table not exist error + def try_add_table(self, dberror, logact): + ms = re.match(r'^42P01 ERROR: relation "(.+)" does not exist', dberror) + if ms: + tname = ms.group(1) + self.add_new_table(tname, logact = logact) + + # add a table for given table name + def add_a_table(self, tname, logact): + self.add_new_table(tname, logact = logact) + + # add a new table for given table name + def add_new_table(self, tname, pre = None, suf = None, logact = 0): + if pre: + tbname = '{}_{}'.format(pre, tname) + elif suf: + tbname = '{}_{}'.format(tname, suf) + else: + tbname = tname + if tbname in self.ADDTBLS: return + self.pgsystem(self.get_pgddl_command(tname, pre, suf), logact) + self.ADDTBLS.append(tbname) + + # validate a table for given table name (tname), prefix (pre) and suffix (suf), + # and add it if not existing + def valid_table(self, tname, pre = None, suf = None, logact = 0): + if pre: + tbname = '{}_{}'.format(pre, tname) + elif suf: + tbname = '{}_{}'.format(tname, suf) + else: + tbname = tname + if tbname in self.ADDTBLS: return tbname + if not self.pgcheck(tbname, logact): self.pgsystem(self.get_pgddl_command(tname, pre, suf), logact) + self.ADDTBLS.append(tbname) + return tbname + + # local function to log query error + def check_dberror(self, pgerr, pgcnt, sqlstr, ary, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + ret = self.FAILURE + pgcode = pgerr.pgcode + pgerror = pgerr.pgerror + dberror = "{} {}".format(pgcode, pgerror) if pgcode and pgerror else str(pgerr) + if pgcnt < self.PGLOG['DBRETRY']: + if not pgcode: + if self.PGDBI['DBNAME'] == self.PGDBI['DEFDB'] and self.PGDBI['DBSHOST'] != self.PGDBI['DEFSHOST']: + self.default_dbinfo() + self.qelog(dberror, 0, "Retry Connecting to {} on {}".format(self.PGDBI['DBNAME'], self.PGDBI['DBHOST']), ary, pgcnt, self.MSGLOG) + else: + self.qelog(dberror, 5+5*pgcnt, "Retry Connecting", ary, pgcnt, self.LOGWRN) + return self.SUCCESS + elif re.match(r'^(08|57)', pgcode): + self.qelog(dberror, 0, "Retry Connecting", ary, pgcnt, self.LOGWRN) + self.pgconnect(1, pgcnt + 1) + return (self.FAILURE if not self.pgdb else self.SUCCESS) + elif re.match(r'^55', pgcode): # try to lock again + self.qelog(dberror, 10, "Retry Locking", ary, pgcnt, self.LOGWRN) + return self.SUCCESS + elif pgcode == '25P02': # try to add table + self.qelog(dberror, 0, "Rollback transaction", ary, pgcnt, self.LOGWRN) + self.pgdb.rollback() + return self.SUCCESS + elif pgcode == '42P01' and logact&self.ADDTBL: # try to add table + self.qelog(dberror, 0, "Retry after adding a table", ary, pgcnt, self.LOGWRN) + self.try_add_table(dberror, logact) + return self.SUCCESS + if logact&self.DOLOCK and pgcode and re.match(r'^55\w\w\w$', pgcode): + logact &= ~self.EXITLG # no exit for lock error + return self.qelog(dberror, 0, sqlstr, ary, pgcnt, logact) + + # return hash reference to postgresql batch mode command and output file name + def pgbatch(self, sqlfile, foreground = 0): + dbhost = 'localhost' if self.PGDBI['DBSHOST'] == self.PGLOG['HOSTNAME'] else self.PGDBI['DBHOST'] + options = "-h {} -p {}".format(dbhost, self.PGDBI['DBPORT']) + pwname = self.get_pgpass_password() + os.environ['PGPASSWORD'] = pwname + options += " -U {} {}".format(self.PGDBI['LNNAME'], self.PGDBI['DBNAME']) + if not sqlfile: return options + if foreground: + batch = "psql {} < {} |".format(options, sqlfile) + else: + batch['out'] = sqlfile + if re.search(r'\.sql$', batch['out']): + batch['out'] = re.sub(r'\.sql$', '.out', batch['out']) + else: + batch['out'] += ".out" + batch['cmd'] = "psql {} < {} > {} 2>&1".format(options, sqlfile , batch['out']) + return batch + + # start a connection to dssdb database and return a DBI object; None if error + # force connect if connect > 0 + def pgconnect(self, reconnect = 0, pgcnt = 0, autocommit = True): + if self.pgdb: + if reconnect and not self.pgdb.closed: return self.pgdb # no need reconnect + elif reconnect: + reconnect = 0 # initial connection + while True: + config = {'database' : self.PGDBI['DBNAME'], + 'user' : self.PGDBI['LNNAME']} + if self.PGDBI['DBSHOST'] == self.PGLOG['HOSTNAME']: + config['host'] = 'localhost' + else: + config['host'] = self.PGDBI['DBHOST'] if self.PGDBI['DBHOST'] else self.PGDBI['DEFHOST'] + if not self.PGDBI['DBPORT']: self.PGDBI['DBPORT'] = self.get_dbport(self.PGDBI['DBNAME']) + if self.PGDBI['DBPORT']: config['port'] = self.PGDBI['DBPORT'] + config['password'] = '***' + sqlstr = "psycopg2.connect(**{})".format(config) + config['password'] = self.get_pgpass_password() + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, sqlstr) + try: + self.PGLOG['PGDBBUF'] = self.pgdb = PgSQL.connect(**config) + if reconnect: self.pglog("{} Reconnected at {}".format(sqlstr, self.current_datetime()), self.MSGLOG|self.FRCLOG) + if autocommit: self.pgdb.autocommit = autocommit + return self.pgdb + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, None, self.PGDBI['EXITLG']): return self.FAILURE + pgcnt += 1 + + # return a PostgreSQL cursor upon success + def pgcursor(self): + pgcur = None + if not self.pgdb: + self.pgconnect() + if not self.pgdb: return self.FAILURE + pgcnt = 0 + while True: + try: + pgcur = self.pgdb.cursor() + spath = "SET search_path = '{}'".format(self.PGDBI['SCNAME']) + if self.PGDBI['SCPATH'] and self.PGDBI['SCPATH'] != self.PGDBI['SCNAME']: + spath += ", '{}'".format(self.PGDBI['SCPATH']) + pgcur.execute(spath) + except PgSQL.Error as pgerr: + if pgcnt == 0 and self.pgdb.closed: + self.pgconnect(1) + elif not self.check_dberror(pgerr, pgcnt, '', None, self.PGDBI['EXITLG']): + return self.FAILURE + else: + break + pgcnt += 1 + return pgcur + + # disconnect to dssdb database + def pgdisconnect(self, stopit = 1): + if self.pgdb: + if stopit: self.pgdb.close() + self.PGLOG['PGDBBUF'] = self.pgdb = None + + # gather table field default information as hash array with field names as keys + # and default values as values + # the whole table information is cached to a hash array with table names as keys + def pgtable(self, tablename, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if tablename in self.TABLES: return self.TABLES[tablename].copy() # cached already + intms = r'^(smallint||bigint|integer)$' + fields = "column_name col, data_type typ, is_nullable nil, column_default def" + condition = self.table_condition(tablename) + pgcnt = 0 + while True: + pgrecs = self.pgmget('information_schema.columns', fields, condition, logact) + cnt = len(pgrecs['col']) if pgrecs else 0 + if cnt: break + if pgcnt == 0 and logact&self.ADDTBL: + self.add_new_table(tablename, logact = logact) + else: + return self.pglog(tablename + ": Table not exists", logact) + pgcnt += 1 + pgdefs = {} + for i in range(cnt): + name = pgrecs['col'][i] + isint = re.match(intms, pgrecs['typ'][i]) + dflt = pgrecs['def'][i] + if dflt != None: + if re.match(r'^nextval\(', dflt): + dflt = 0 + else: + dflt = self.check_default_value(dflt, isint) + elif pgrecs['nil'][i] == 'YES': + dflt = None + elif isint: + dflt = 0 + else: + dflt = '' + pgdefs[name] = dflt + self.TABLES[tablename] = pgdefs.copy() + return pgdefs + + # get sequence field name for given table name + def pgsequence(self, tablename, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if tablename in self.SEQUENCES: return self.SEQUENCES[tablename] # cached already + condition = self.table_condition(tablename) + " AND column_default LIKE 'nextval(%'" + pgrec = self.pgget('information_schema.columns', 'column_name', condition, logact) + seqname = pgrec['column_name'] if pgrec else None + self.SEQUENCES[tablename] = seqname + return seqname + + # check default value for integer & string + @staticmethod + def check_default_value(dflt, isint): + if isint: + ms = re.match(r"^'{0,1}(\d+)", dflt) + if ms: dflt = int(ms.group(1)) + elif dflt[0] == "'": + ms = re.match(r"^(.+)::", dflt) + if ms: dflt = ms.group(1) + elif dflt != 'NULL': + dflt = "'{}'".format(dflt) + return dflt + + # local fucntion: insert prepare pgadd()/pgmadd() for given table and field names + # according to options of multiple place holds and returning sequence id + def prepare_insert(self, tablename, fields, multi = True, getid = None): + strfld = self.pgnames(fields, '.', ',') + if multi: + strplc = "(" + ','.join(['%s']*len(fields)) + ")" + else: + strplc = '%s' + sqlstr = "INSERT INTO {} ({}) VALUES {}".format(tablename, strfld, strplc) + if getid: sqlstr += " RETURNING " + getid + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, sqlstr) + return sqlstr + + # local fucntion: prepare default value for single record + def prepare_default(self, tablename, record, logact = 0): + table = self.pgtable(tablename, logact) + for fld in record: + val = record[fld] + if val is None: + vlen = 0 + elif isinstance(val, str): + vlen = len(val) + else: + vlen = 1 + if vlen == 0: record[fld] = table[fld] + + # local fucntion: prepare default value for multiple records + def prepare_defaults(self, tablename, records, logact = 0): + table = self.pgtable(tablename, logact) + for fld in records: + vals = records[fld] + vcnt = len(vals) + for i in range(vcnt): + if vals[i] is None: + vlen = 0 + elif isinstance(vals[i], str): + vlen = len(vals[i]) + else: + vlen = 1 + if vlen == 0: records[fld][i] = table[fld] + + # insert one record into tablename + # tablename: add record for one table name each call + # record: hash reference with keys as field names and hash values as field values + # return self.SUCCESS or self.FAILURE + def pgadd(self, tablename, record, logact = None, getid = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not record: return self.pglog("Nothing adds to " + tablename, logact) + if logact&self.DODFLT: self.prepare_default(tablename, record, logact) + if logact&self.AUTOID and not getid: getid = self.pgsequence(tablename, logact) + sqlstr = self.prepare_insert(tablename, list(record), True, getid) + values = tuple(record.values()) + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "Insert: " + str(values)) + ret = acnt = pgcnt = 0 + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + pgcur.execute(sqlstr, values) + acnt = 1 + if getid: + ret = pgcur.fetchone()[0] + else: + ret = self.SUCCESS + pgcur.close() + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, values, logact): return self.FAILURE + else: + break + pgcnt += 1 + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "pgadd: 1 record added to " + tablename + ", return " + str(ret)) + if(logact&self.ENDLCK): + self.endtran() + elif self.curtran: + self.curtran += acnt + if self.curtran > self.PGDBI['MTRANS']: self.starttran() + return ret + + # insert multiple records into tablename + # tablename: add records for one table name each call + # records: dict with field names as keys and each value is a list of field values + # return self.SUCCESS or self.FAILURE + def pgmadd(self, tablename, records, logact = None, getid = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not records: return self.pglog("Nothing to insert to table " + tablename, logact) + if logact&self.DODFLT: self.prepare_defaults(tablename, records, logact) + if logact&self.AUTOID and not getid: getid = self.pgsequence(tablename, logact) + multi = True if getid else False + sqlstr = self.prepare_insert(tablename, list(records), multi, getid) + v = records.values() + values = list(zip(*v)) + cntrow = len(values) + ids = [] if getid else None + if self.PGLOG['DBGLEVEL']: + for row in values: self.pgdbg(1000, "Insert: " + str(row)) + count = pgcnt = 0 + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + if getid: + while count < cntrow: + record = values[count] + try: + pgcur.execute(sqlstr, record) + ids.append(pgcur.fetchone()[0]) + count += 1 + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, record, logact): return self.FAILURE + break + else: + try: + execute_values(pgcur, sqlstr, values, page_size=self.PGDBI['PGSIZE']) + count = cntrow + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, values[0], logact): return self.FAILURE + if count >= cntrow: break + pgcnt += 1 + pgcur.close() + if(self.PGLOG['DBGLEVEL']): self.pgdbg(1000, "pgmadd: {} of {} record(s) added to {}".format(count, cntrow, tablename)) + if(logact&self.ENDLCK): + self.endtran() + elif self.curtran: + self.curtran += count + if self.curtran > self.PGDBI['MTRANS']: self.starttran() + return (ids if ids else count) + + # local function: select prepare for pgget() and pgmget() + def prepare_select(self, tablenames, fields = None, condition = None, cndflds = None, logact = 0): + sqlstr = '' + if tablenames: + if fields: + sqlstr = "SELECT " + fields + else: + sqlstr = "SELECT count(*) cntrec" + + sqlstr += " FROM " + tablenames + if condition: + if re.match(r'^\s*(ORDER|GROUP|HAVING|OFFSET|LIMIT)\s', condition, re.I): + sqlstr += " " + condition # no where clause, append directly + else: + sqlstr += " WHERE " + condition + elif cndflds: + sep = 'WHERE' + for fld in cndflds: + sqlstr += " {} {}=%s".format(sep, fld) + sep = 'AND' + if logact&self.DOLOCK: + self.starttran() + sqlstr += " FOR UPDATE" + elif fields: + sqlstr = "SELECT " + fields + elif condition: + sqlstr = condition + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, sqlstr) + return sqlstr + + # tablenames: comma deliminated string of one or more tables and more than one table for joining, + # fields: comma deliminated string of one or more field names, + # condition: querry conditions for where clause + # return a dict reference with keys as field names upon success + def pgget(self, tablenames, fields, condition = None, logact = 0): + if not logact: logact = self.PGDBI['ERRLOG'] + if fields and condition and not re.search(r'limit 1$', condition, re.I): condition += " LIMIT 1" + sqlstr = self.prepare_select(tablenames, fields, condition, None, logact) + if fields and not re.search(r'(^|\s)limit 1($|\s)', sqlstr, re.I): sqlstr += " LIMIT 1" + ucname = True if logact&self.UCNAME else False + pgcnt = 0 + record = {} + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + pgcur.execute(sqlstr) + vals = pgcur.fetchone() + if vals: + colcnt = len(pgcur.description) + for i in range(colcnt): + col = pgcur.description[i] + colname = col[0].upper() if ucname else col[0] + val = vals[i] + if col[1] == self.CHCODE and val and val[-1] == ' ': val = val.rstrip() + record[colname] = val + pgcur.close() + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, None, logact): return self.FAILURE + else: + break + pgcnt += 1 + if record and tablenames and not fields: + if self.PGLOG['DBGLEVEL']: + self.pgdbg(1000, "pgget: {} record(s) found from {}".format(record['cntrec'], tablenames)) + return record['cntrec'] + elif self.PGLOG['DBGLEVEL']: + cnt = 1 if record else 0 + self.pgdbg(1000, "pgget: {} record retrieved from {}".format(cnt, tablenames)) + return record + + # tablenames: comma deliminated string of one or more tables and more than one table for joining, + # fields: comma deliminated string of one or more field names, + # condition: querry conditions for where clause + # return a dict reference with keys as field names upon success, values for each field name + # are in a list. All lists are the same length with missing values set to None + def pgmget(self, tablenames, fields, condition = None, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + sqlstr = self.prepare_select(tablenames, fields, condition, None, logact) + ucname = True if logact&self.UCNAME else False + count = pgcnt = 0 + records = {} + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + pgcur.execute(sqlstr) + rowvals = pgcur.fetchall() + if rowvals: + colcnt = len(pgcur.description) + count = len(rowvals) + colvals = list(zip(*rowvals)) + for i in range(colcnt): + col = pgcur.description[i] + colname = col[0].upper() if ucname else col[0] + vals = list(colvals[i]) + if col[1] == self.CHCODE: + for j in range(count): + if vals[j] and vals[j][-1] == ' ': vals[j] = vals[j].rstrip() + records[colname] = vals + pgcur.close() + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, None, logact): return self.FAILURE + else: + break + pgcnt += 1 + if self.PGLOG['DBGLEVEL']: + self.pgdbg(1000, "pgmget: {} record(s) retrieved from {}".format(count, tablenames)) + return records + + # tablenames: comma deliminated string of one or more tables + # fields: comma deliminated string of one or more field names, + # cnddict: condition dict with field names : values + # return a dict(field names : values) upon success + # retrieve one records from tablenames condition dict + def pghget(self, tablenames, fields, cnddict, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not tablenames: return self.pglog("Miss Table name to query", logact) + if not fields: return self.pglog("Nothing to query " + tablenames, logact) + if not cnddict: return self.pglog("Miss condition dict values to query " + tablenames, logact) + sqlstr = self.prepare_select(tablenames, fields, None, list(cnddict), logact) + if fields and not re.search(r'limit 1$', sqlstr, re.I): sqlstr += " LIMIT 1" + ucname = True if logact&self.UCNAME else False + values = tuple(cnddict.values()) + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "Query from {} for {}".format(tablenames, values)) + pgcnt = 0 + record = {} + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + pgcur.execute(sqlstr, values) + vals = pgcur.fetchone() + if vals: + colcnt = len(pgcur.description) + for i in range(colcnt): + col = pgcur.description[i] + colname = col[0].upper() if ucname else col[0] + val = vals[i] + if col[1] == self.CHCODE and val and val[-1] == ' ': val = val.rstrip() + record[colname] = val + pgcur.close() + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, values, logact): return self.FAILURE + else: + break + pgcnt += 1 + if record and tablenames and not fields: + if self.PGLOG['DBGLEVEL']: + self.pgdbg(1000, "pghget: {} record(s) found from {}".format(record['cntrec'], tablenames)) + return record['cntrec'] + elif self.PGLOG['DBGLEVEL']: + cnt = 1 if record else 0 + self.pgdbg(1000, "pghget: {} record retrieved from {}".format(cnt, tablenames)) + return record + + # tablenames: comma deliminated string of one or more tables + # fields: comma deliminated string of one or more field names, + # cnddicts: condition dict with field names : value lists + # return a dict(field names : value lists) upon success + # retrieve multiple records from tablenames for condition dict + def pgmhget(self, tablenames, fields, cnddicts, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not tablenames: return self.pglog("Miss Table name to query", logact) + if not fields: return self.pglog("Nothing to query " + tablenames, logact) + if not cnddicts: return self.pglog("Miss condition dict values to query " + tablenames, logact) + sqlstr = self.prepare_select(tablenames, fields, None, list(cnddicts), logact) + ucname = True if logact&self.UCNAME else False + v = cnddicts.values() + values = list(zip(*v)) + cndcnt = len(values) + if self.PGLOG['DBGLEVEL']: + for row in values: + self.pgdbg(1000, "Query from {} for {}".format(tablenames, row)) + colcnt = ccnt = count = pgcnt = 0 + cols = [] + chrs = [] + records = {} + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + while ccnt < cndcnt: + cndvals = values[ccnt] + try: + pgcur.execute(sqlstr, cndvals) + ccnt += 1 + rowvals = pgcur.fetchall() + if rowvals: + if colcnt == 0: + for col in pgcur.description: + colname = col[0].upper() if ucname else col[0] + if col[1] == self.CHCODE: chrs.append(colname) + cols.append(colname) + records[colname] = [] + colcnt = len(cols) + rcnt = len(rowvals) + count += rcnt + colvals = list(zip(*rowvals)) + for i in range(colcnt): + vals = list(colvals[i]) + colname = cols[i] + if chrs and colname in chrs: + for j in range(rcnt): + if vals[j] and vals[j][-1] == ' ': vals[j] = vals[j].rstrip() + records[colname].extend(vals) + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, cndvals, logact): return self.FAILURE + break + if ccnt >= cndcnt: break + pgcnt += 1 + pgcur.close() + if self.PGLOG['DBGLEVEL']: + self.pgdbg(1000, "pgmhget: {} record(s) retrieved from {}".format(count, tablenames)) + return records + + # local fucntion: update prepare for pgupdt, pghupdt and pgmupdt + def prepare_update(self, tablename, fields, condition = None, cndflds = None): + strset = [] + # build set string + for fld in fields: + strset.append("{}=%s".format(self.pgname(fld, '.'))) + strflds = ",".join(strset) + # build condition string + if not condition: + cndset = [] + for fld in cndflds: + cndset.append("{}=%s".format(self.pgname(fld, '.'))) + condition = " AND ".join(cndset) + sqlstr = "UPDATE {} SET {} WHERE {}".format(tablename, strflds, condition) + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, sqlstr) + return sqlstr + + # update one or multiple rows in tablename + # tablename: update for one table name each call + # record: dict with field names : values + # condition: update conditions for where clause) + # return number of rows undated upon success + def pgupdt(self, tablename, record, condition, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not record: self.pglog("Nothing updates to " + tablename, logact) + if not condition or isinstance(condition, int): self.pglog("Miss condition to update " + tablename, logact) + sqlstr = self.prepare_update(tablename, list(record), condition) + if logact&self.DODFLT: self.prepare_default(tablename, record, logact) + values = tuple(record.values()) + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "Update {} for {}".format(tablename, values)) + ucnt = pgcnt = 0 + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + pgcur.execute(sqlstr, values) + ucnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, values, logact): return self.FAILURE + else: + break + pgcnt += 1 + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "pgupdt: {} record(s) updated to {}".format(ucnt, tablename)) + if(logact&self.ENDLCK): + self.endtran() + elif self.curtran: + self.curtran += ucnt + if self.curtran > self.PGDBI['MTRANS']: self.starttran() + return ucnt + + # update one or multiple records in tablename + # tablename: update for one table name each call + # record: update values, dict with field names : values + # cnddict: condition dict with field names : values + # return number of records updated upon success + def pghupdt(self, tablename, record, cnddict, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not record: self.pglog("Nothing updates to " + tablename, logact) + if not cnddict or isinstance(cnddict, int): self.pglog("Miss condition to update to " + tablename, logact) + if logact&self.DODFLT: self.prepare_defaults(tablename, record, logact) + sqlstr = self.prepare_update(tablename, list(record), None, list(cnddict)) + values = tuple(record.values()) + tuple(cnddict.values()) + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "Update {} for {}".format(tablename, values)) + ucnt = count = pgcnt = 0 + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + pgcur.execute(sqlstr, values) + count += 1 + ucnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, values, logact): return self.FAILURE + else: + break + pgcnt += 1 + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "pghupdt: {}/{} record(s) updated to {}".format(ucnt, tablename)) + if(logact&self.ENDLCK): + self.endtran() + elif self.curtran: + self.curtran += ucnt + if self.curtran > self.PGDBI['MTRANS']: self.starttran() + return ucnt + + # update multiple records in tablename + # tablename: update for one table name each call + # records: update values, dict with field names : value lists + # cnddicts: condition dict with field names : value lists + # return number of records updated upon success + def pgmupdt(self, tablename, records, cnddicts, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not records: self.pglog("Nothing updates to " + tablename, logact) + if not cnddicts or isinstance(cnddicts, int): self.pglog("Miss condition to update to " + tablename, logact) + if logact&self.DODFLT: self.prepare_defaults(tablename, records, logact) + sqlstr = self.prepare_update(tablename, list(records), None, list(cnddicts)) + fldvals = tuple(records.values()) + cntrow = len(fldvals[0]) + cndvals = tuple(cnddicts.values()) + cntcnd = len(cndvals[0]) + if cntcnd != cntrow: return self.pglog("Field/Condition value counts Miss match {}/{} to update {}".format(cntrow, cntcnd, tablename), logact) + v = fldvals + cndvals + values = list(zip(*v)) + if self.PGLOG['DBGLEVEL']: + for row in values: self.pgdbg(1000, "Update {} for {}".format(tablename, row)) + ucnt = pgcnt = 0 + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + execute_batch(pgcur, sqlstr, values, page_size=self.PGDBI['PGSIZE']) + ucnt = cntrow + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, values[0], logact): return self.FAILURE + else: + break + pgcnt += 1 + pgcur.close() + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "pgmupdt: {} record(s) updated to {}".format(ucnt, tablename)) + if(logact&self.ENDLCK): + self.endtran() + elif self.curtran: + self.curtran += ucnt + if self.curtran > self.PGDBI['MTRANS']: self.starttran() + return ucnt + + # local fucntion: delete prepare for pgdel, pghdel and del + def prepare_delete(self, tablename, condition = None, cndflds = None): + # build condition string + if not condition: + cndset = [] + for fld in cndflds: + cndset.append("{}=%s".format(fld)) + condition = " AND ".join(cndset) + sqlstr = "DELETE FROM {} WHERE {}".format(tablename, condition) + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, sqlstr) + return sqlstr + + # delete one or mutiple records in tablename according condition + # tablename: delete for one table name each call + # condition: delete conditions for where clause + # return number of records deleted upon success + def pgdel(self, tablename, condition, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not condition or isinstance(condition, int): self.pglog("Miss condition to delete from " + tablename, logact) + sqlstr = self.prepare_delete(tablename, condition) + dcnt = pgcnt = 0 + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + pgcur.execute(sqlstr) + dcnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, None, logact): return self.FAILURE + else: + break + pgcnt += 1 + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "pgdel: {} record(s) deleted from {}".format(dcnt, tablename)) + if logact&self.ENDLCK: + self.endtran() + elif self.curtran: + self.curtran += dcnt + if self.curtran > self.PGDBI['MTRANS']: self.starttran() + return dcnt + + # delete one or mutiple records in tablename according condition + # tablename: delete for one table name each call + # cndict: delete condition dict for names : values + # return number of records deleted upon success + def pghdel(self, tablename, cnddict, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not cnddict or isinstance(cnddict, int): self.pglog("Miss condition dict to delete from " + tablename, logact) + sqlstr = self.prepare_delete(tablename, None, list(cnddict)) + values = tuple(cnddict.values()) + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "Delete from {} for {}".format(tablename, values)) + dcnt = pgcnt = 0 + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + pgcur.execute(sqlstr, values) + dcnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, values, logact): return self.FAILURE + else: + break + pgcnt += 1 + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "pghdel: {} record(s) deleted from {}".format(dcnt, tablename)) + if logact&self.ENDLCK: + self.endtran() + elif self.curtran: + self.curtran += dcnt + if self.curtran > self.PGDBI['MTRANS']: self.starttran() + return dcnt + + # delete mutiple records in tablename according condition + # tablename: delete for one table name each call + # cndicts: delete condition dict for names : value lists + # return number of records deleted upon success + def pgmdel(self, tablename, cnddicts, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if not cnddicts or isinstance(cnddicts, int): self.pglog("Miss condition dict to delete from " + tablename, logact) + sqlstr = self.prepare_delete(tablename, None, list(cnddicts)) + v = cnddicts.values() + values = list(zip(*v)) + if self.PGLOG['DBGLEVEL']: + for row in values: + self.pgdbg(1000, "Delete from {} for {}".format(tablename, row)) + dcnt = pgcnt = 0 + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + execute_batch(pgcur, sqlstr, values, page_size=self.PGDBI['PGSIZE']) + dcnt = len(values) + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, values[0], logact): return self.FAILURE + else: + break + pgcnt += 1 + pgcur.close() + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "pgmdel: {} record(s) deleted from {}".format(dcnt, tablename)) + if logact&self.ENDLCK: + self.endtran() + elif self.curtran: + self.curtran += dcnt + if self.curtran > self.PGDBI['MTRANS']: self.starttran() + return dcnt + + # sqlstr: a complete sql string + # return number of record affected upon success + def pgexec(self, sqlstr, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if self.PGLOG['DBGLEVEL']: self.pgdbg(100, sqlstr) + ret = pgcnt = 0 + while True: + pgcur = self.pgcursor() + if not pgcur: return self.FAILURE + try: + pgcur.execute(sqlstr) + ret = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not self.check_dberror(pgerr, pgcnt, sqlstr, None, logact): return self.FAILURE + else: + break + pgcnt += 1 + if self.PGLOG['DBGLEVEL']: self.pgdbg(1000, "pgexec: {} record(s) affected for {}".format(ret, sqlstr)) + if logact&self.ENDLCK: + self.endtran() + elif self.curtran: + self.curtran += ret + if self.curtran > self.PGDBI['MTRANS']: self.starttran() + return ret + + # tablename: one table name to a temporary table + # fromtable: table name data gathing from + # fields: table name data gathing from + # condition: querry conditions for where clause + # return number of records created upon success + def pgtemp(self, tablename, fromtable, fields, condition = None, logact = 0): + sqlstr = "CREATE TEMPORARY TABLE {} SELECT {} FROM {}".format(tablename, fields, fromtable) + if condition: sqlstr += " WHERE " + condition + return self.pgexec(sqlstr, logact) + + # get condition for given table name for accessing information_schema + def table_condition(self, tablename): + ms = re.match(r'(.+)\.(.+)', tablename) + if ms: + scname = ms.group(1) + tbname = ms.group(2) + else: + scname = self.PGDBI['SCNAME'] + tbname = tablename + return "table_schema = '{}' AND table_name = '{}'".format(scname, tbname) + + # check if a given table name exists or not + # tablename: one table name to check + def pgcheck(self, tablename, logact = 0): + condition = self.table_condition(tablename) + ret = self.pgget('information_schema.tables', None, condition, logact) + return (self.SUCCESS if ret else self.FAILURE) + + # group of functions to check parent records and add an empty one if missed + # return user.uid upon success, 0 otherwise + def check_user_uid(self, userno, date = None): + if not userno: return 0 + if type(userno) is str: userno = int(userno) + if date is None: + datecond = "until_date IS NULL" + date = 'today' + else: + datecond = "(start_date IS NULL OR start_date <= '{}') AND (until_date IS NULL OR until_date >= '{}')".format(date, date) + pgrec = self.pgget("dssdb.user", "uid", "userno = {} AND {}".format(userno, datecond), self.PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + if userno not in self.NMISSES: + self.pglog("{}: Scientist ID NOT on file for {}".format(userno, date), self.LGWNEM) + self.NMISSES.append(userno) + # check again if a user is on file with different date range + pgrec = self.pgget("dssdb.user", "uid", "userno = {}".format(userno), self.PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + pgrec = self.ucar_user_info(userno) + if not pgrec: pgrec = {'userno' : userno, 'stat_flag' : 'M'} + uid = self.pgadd("dssdb.user", pgrec, (self.PGDBI['EXITLG']|self.AUTOID)) + if uid: self.pglog("{}: Scientist ID Added as user.uid = {}".format(userno, uid), self.LGWNEM) + return uid + + # return user.uid upon success, 0 otherwise + def get_user_uid(self, logname, date = None): + if not logname: return 0 + if not date: + date = 'today' + datecond = "until_date IS NULL" + else: + datecond = "(start_date IS NULL OR start_date <= '{}') AND (until_date IS NULL OR until_date >= '{}')".format(date, date) + pgrec = self.pgget("dssdb.user", "uid", "logname = '{}' AND {}".format(logname, datecond), self.PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + if logname not in self.LMISSES: + self.pglog("{}: UCAR Login Name NOT on file for {}".format(logname, date), self.LGWNEM) + self.LMISSES.append(logname) + # check again if a user is on file with different date range + pgrec = self.pgget("dssdb.user", "uid", "logname = '{}'".format(logname), self.PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + pgrec = self.ucar_user_info(0, logname) + if not pgrec: pgrec = {'logname' : logname, 'stat_flag' : 'M'} + uid = self.pgadd("dssdb.user", pgrec, (self.PGDBI['EXITLG']|self.AUTOID)) + if uid: self.pglog("{}: UCAR Login Name Added as user.uid = {}".format(logname, uid), self.LGWNEM) + return uid + + # get ucar user info for given userno (scientist number) or logname (Ucar login) + def ucar_user_info(self, userno, logname = None): + matches = { + 'upid' : "upid", + 'uid' : "userno", + 'username' : "logname", + 'lastName' : "lstname", + 'firstName' : "fstname", + 'active' : "stat_flag", + 'internalOrg' : "division", + 'externalOrg' : "org_name", + 'country' : "country", + 'forwardEmail' : "email", + 'email' : "ucaremail", + 'phone' : "phoneno" + } + buf = self.pgsystem("pgperson " + ("-uid {}".format(userno) if userno else "-username {}".format(logname)), self.LOGWRN, 20) + if not buf: return None + pgrec = {} + for line in buf.split('\n'): + ms = re.match(r'^(.+)<=>(.*)$', line) + if ms: + (key, val) = ms.groups() + if key in matches: + if key == 'upid' and 'upid' in pgrec: break # get one record only + pgrec[matches[key]] = val + if not pgrec: return None + if userno: + pgrec['userno'] = userno + elif pgrec['userno']: + pgrec['userno'] = userno = int(pgrec['userno']) + if pgrec['upid']: pgrec['upid'] = int(pgrec['upid']) + if pgrec['stat_flag']: pgrec['stat_flag'] = 'A' if pgrec['stat_flag'] == "True" else 'C' + if pgrec['email'] and re.search(r'(@|\.)ucar\.edu$', pgrec['email'], re.I): + pgrec['email'] = pgrec['ucaremail'] + pgrec['org_name'] = 'NCAR' + country = pgrec['country'] if 'country' in pgrec else None + pgrec['country'] = self.set_country_code(pgrec['email'], country) + if pgrec['division']: + val = "NCAR" + else: + val = None + pgrec['org_type'] = self.get_org_type(val, pgrec['email']) + buf = self.pgsystem("pgusername {}".format(pgrec['logname']), self.LOGWRN, 20) + if not buf: return pgrec + for line in buf.split('\n'): + ms = re.match(r'^(.+)<=>(.*)$', line) + if ms: + (key, val) = ms.groups() + if key == 'startDate': + m = re.match(r'^(\d+-\d+-\d+)\s', val) + if m: + pgrec['start_date'] = m.group(1) + else: + pgrec['start_date'] = val + if key == 'endDate': + m = re.match(r'^(\d+-\d+-\d+)\s', val) + if m: + pgrec['until_date'] = m.group(1) + else: + pgrec['until_date'] = val + return pgrec + + # set country code for given coutry name or email address + def set_country_code(self, email, country = None): + codes = { + 'CHINA' : "P.R.CHINA", + 'ENGLAND' : "UNITED.KINGDOM", + 'FR' : "FRANCE", + 'KOREA' : "SOUTH.KOREA", + 'USSR' : "RUSSIA", + 'US' : "UNITED.STATES", + 'U.S.A.' : "UNITED.STATES" + } + if country: + country = country.upper() + ms = re.match(r'^(\w+)\s(\w+)$', country) + if ms: + country = ms.group(1) + '.' + ms.group(2) + elif country in codes: + country = codes[country] + else: + country = self.email_to_country(email) + return country + + # return wuser.wuid upon success, 0 otherwise + def check_wuser_wuid(self, email, date = None): + if not email: return 0 + emcond = "email = '{}'".format(email) + if not date: + date = 'today' + datecond = "until_date IS NULL" + else: + datecond = "(start_date IS NULL OR start_date <= '{}') AND (until_date IS NULL OR until_date >= '{}')".format(date, date) + pgrec = self.pgget("wuser", "wuid", "{} AND {}".format(emcond, datecond), self.PGDBI['ERRLOG']) + if pgrec: return pgrec['wuid'] + # check again if a user is on file with different date range + pgrec = self.pgget("wuser", "wuid", emcond, self.LOGERR) + if pgrec: return pgrec['wuid'] + # now add one in + record = {'email' : email} + # check again if a ruser is on file + pgrec = self.pgget("ruser", "*", emcond + " AND end_date IS NULL", self.PGDBI['ERRLOG']) + if not pgrec: pgrec = self.pgget("ruser", "*", emcond, self.PGDBI['ERRLOG']) + if pgrec: + record['ruid'] = pgrec['id'] + record['fstname'] = pgrec['fname'] + record['lstname'] = pgrec['lname'] + record['country'] = pgrec['country'] + record['org_type'] = self.get_org_type(pgrec['org_type'], pgrec['email']) + record['start_date'] = str(pgrec['rdate']) + if pgrec['end_date']: + record['until_date'] = str(pgrec['end_date']) + record['stat_flag'] = 'C' + else: + record['stat_flag'] = 'A' + if pgrec['title']: record['utitle'] = pgrec['title'] + if pgrec['mname']: record['midinit'] = pgrec['mname'][0] + if pgrec['org']: record['org_name'] = pgrec['org'] + else: + record['stat_flag'] = 'M' + record['org_type'] = self.get_org_type('', email) + record['country'] = self.email_to_country(email) + wuid = self.pgadd("wuser", record, self.LOGERR|self.AUTOID) + if wuid: + if pgrec: + self.pglog("{}({}, {}) Added as wuid({})".format(email, pgrec['lname'], pgrec['fname'], wuid), self.LGWNEM) + else: + self.pglog("{} Added as wuid({})".format(email, wuid), self.LGWNEM) + return wuid + return 0 + + # return wuser.wuid upon success, 0 otherwise + def check_cdp_wuser(self, username): + pgrec = self.pgget("wuser", "wuid", "cdpname = '{}'".format(username), self.PGDBI['EXITLG']) + if pgrec: return pgrec['wuid'] + idrec = self.pgget("wuser", "wuid", "email = '{}'".format(pgrec['email']), self.PGDBI['EXITLG']) + wuid = idrec['wuid'] if idrec else 0 + if wuid > 0: + idrec = {} + idrec['cdpid'] = pgrec['cdpid'] + idrec['cdpname'] = pgrec['cdpname'] + self.pgupdt("wuser", idrec, "wuid = {}".format(wuid) , self.PGDBI['EXITLG']) + else: + pgrec['stat_flag'] = 'A' + pgrec['org_type'] = self.get_org_type(pgrec['org_type'], pgrec['email']) + pgrec['country'] = self.email_to_country(pgrec['email']) + wuid = self.pgadd("wuser", pgrec, self.PGDBI['EXITLG']|self.AUTOID) + if wuid > 0: + self.pglog("CDP User {} added as wuid = {} in RDADB".format(username, wuid), self.LGWNEM) + return wuid + + # for given email to get long country name + def email_to_country(self, email): + ms = re.search(r'\.(\w\w)$', email) + if ms: + pgrec = self.pgget("countries", "token", "domain_id = '{}'".format(ms.group(1)), self.PGDBI['EXITLG']) + if pgrec: return pgrec['token'] + elif re.search(r'\.(gov|edu|mil|org|com|net)$', email): + return "UNITED.STATES" + else: + return "UNKNOWN" + + # if filelists is published for given dataset, reset it to 'P' + def reset_rdadb_version(self, dsid): + self.pgexec("UPDATE dataset SET version = version + 1 WHERE dsid = '{}'".format(dsid), self.PGDBI['ERRLOG']) + + # check the use rdadb flag in table dataset for a given dataset and given values + def use_rdadb(self, dsid, logact = 0, vals = None): + ret = '' # default to empty in case dataset not in RDADB + if dsid: + pgrec = self.pgget("dataset", "use_rdadb", "dsid = '{}'".format(dsid), self.PGDBI['EXITLG']) + if pgrec: + ret = 'N' # default to 'N' if dataset record in RDADB already + if pgrec['use_rdadb']: + if not vals: vals = "IPYMW" # default to Internal; Publishable; Yes RDADB + if vals.find(pgrec['use_rdadb']) > -1: + ret = pgrec['use_rdadb'] + elif logact: + self.pglog("Dataset '{}' is not in RDADB!".format(dsid), logact) + return ret + + # fld: field name for querry condition + # vals: reference to aaray of values + # isstr: 1 for string values requires quotes and support wildcard + # noand: 1 for skiping the leading ' AND ' for condition + # return a condition string for a given field + def get_field_condition(self, fld, vals, isstr = 0, noand = 0): + cnd = wcnd = negative = '' + sign = "=" + logic = " OR " + count = len(vals) if vals else 0 + if count == 0: return '' + ncnt = scnt = wcnt = cnt = 0 + for i in range(count): + val = vals[i] + if val is None or (i > 0 and val == vals[i-1]): continue + if i == 0 and val == self.PGSIGNS[0]: + negative = "NOT " + logic = " AND " + continue + if scnt == 0 and isinstance(val, str): + ms = re.match(r'^({})$'.format('|'.join(self.PGSIGNS[1:])), val) + if ms: + osign = sign = ms.group(1) + scnt += 1 + if sign == "<>": + scnt += 1 + sign = negative + "BETWEEN" + elif negative: + sign = "<=" if (sign == ">") else ">=" + continue + if isstr: + if not isinstance(val, str): val = str(val) + if sign == "=": + if not val: + ncnt += 1 # found null string + elif val.find('%') > -1: + sign = negative + "LIKE" + elif re.search(r'[\[\(\?\.]', val): + sign = negative + "SIMILAR TO" + if val.find("'") != 0: + val = "'{}'".format(val) + elif isinstance(val, str): + if val.find('.') > -1: + val = float(val) + else: + val = int(val) + if sign == "=": + if cnt > 0: cnd += ", " + cnd += str(val) + cnt += 1 + else: + if sign == "AND": + wcnd += " {} {}".format(sign, val) + else: + if wcnt > 0: wcnd += logic + wcnd += "{} {} {}".format(fld, sign, val) + wcnt += 1 + if re.search(r'BETWEEN$', sign): + sign = "AND" + else: + sign = "=" + scnt = 0 + if scnt > 0: + s = 's' if scnt > 1 else '' + self.pglog("Need {} value{} after sign '{}'".format(scnt, s, osign), self.LGEREX) + if wcnt > 1: wcnd = "({})".format(wcnd) + if cnt > 0: + if cnt > 1: + cnd = "{} {}IN ({})".format(fld, negative, cnd) + else: + cnd = "{} {} {}".format(fld, ("<>" if negative else "="), cnd) + if ncnt > 0: + ncnd = "{} IS {}NULL".format(fld, negative) + cnd = "({}{}{})".format(cnd, logic, ncnd) + if wcnt > 0: cnd = "({}{}{})".format(cnd, logic, wcnd) + elif wcnt > 0: + cnd = wcnd + if cnd and not noand: cnd = " AND " + cnd + return cnd + + # build up fieldname string for given or default condition + def fieldname_string(self, fnames, dnames = None, anames = None, wflds = None): + if not fnames: + fnames = dnames # include default fields names + elif re.match(r'^all$', fnames, re.I): + fnames = anames # include all field names + if not wflds: return fnames + for wfld in wflds: + if not wfld or fnames.find(wfld) > -1: continue # empty field, or included already + if wfld == "Q": + pos = fnames.find("R") # request name + elif wfld == "Y": + pos = fnames.find("X") # parent group name + elif wfld == "G": + pos = fnames.find("I") # group name + else: + pos = -1 # prepend other with-field names + if pos == -1: + fnames = wfld + fnames # prepend with-field + else: + fnames = fnames[0:pos] + wfld + fnames[pos:] # insert with-field + return fnames + + # Function get_group_field_path(gindex: group index + # dsid: dataset id + # field: path field name: webpath or savedpath) + # go through group tree upward to find a none-empty path, return it or null + def get_group_field_path(self, gindex, dsid, field): + if gindex: + pgrec = self.pgget("dsgroup", f"pindex, {field}", + f"dsid = '{dsid}' AND gindex = {gindex}", self.PGDBI['EXITLG']) + else: + pgrec = self.pgget("dataset", field, f"dsid = '{dsid}'", self.PGDBI['EXITLG']) + if pgrec: + if pgrec[field]: + return pgrec[field] + elif gindex: + return self.get_group_field_path(pgrec['pindex'], dsid, field) + else: + return None + + # get the specialist info for a given dataset + def get_specialist(self, dsid, logact = None): + if logact is None: logact = self.PGDBI['ERRLOG'] + if dsid in self.SPECIALIST: return self.SPECIALIST['dsid'] + + pgrec = self.pgget("dsowner, dssgrp", "specialist, lstname, fstname", + "specialist = logname AND dsid = '{}' AND priority = 1".format(dsid), logact) + if pgrec: + if pgrec['specialist'] == "datahelp" or pgrec['specialist'] == "dss": + pgrec['lstname'] = "Help" + pgrec['fstname'] = "Data" + else: + pgrec['specialist'] = "datahelp" + pgrec['lstname'] = "Help" + pgrec['fstname'] = "Data" + self.SPECIALIST['dsid'] = pgrec # cache specialist info for dsowner of dsid + return pgrec + + # build customized email from get_email() + def build_customized_email(self, table, field, condition, subject, logact = 0): + estat = self.FAILURE + msg = self.get_email() + if not msg: return estat + sender = self.PGLOG['CURUID'] + "@ucar.edu" + receiver = self.PGLOG['EMLADDR'] if self.PGLOG['EMLADDR'] else (self.PGLOG['CURUID'] + "@ucar.edu") + if receiver.find(sender) < 0: self.add_carbon_copy(sender, 1) + cc = self.PGLOG['CCDADDR'] + if not subject: subject = "Message from {}-{}".format(self.PGLOG['HOSTNAME'], self.get_command()) + estat = self.send_python_email(subject, receiver, msg, sender, cc, logact) + if estat != self.SUCCESS: + ebuf = "From: {}\nTo: {}\n".format(sender, receiver) + if cc: ebuf += "Cc: {}\n".format(cc) + ebuf += "Subject: {}!\n\n{}\n".format(subject, msg) + if self.PGLOG['EMLSEND']: + estat = self.send_customized_email(f"{table}.{condition}", ebuf, logact) + if estat != self.SUCCESS: + estat = self.cache_customized_email(table, field, condition, ebuf, 0) + if estat and logact: + self.pglog("Email {} cached to '{}.{}' for {}, Subject: {}".format(receiver, table, field, condition, subject), logact) + return estat + + # email: full user email address + # get user real name from table ruser for a given email address + # opts == 1 : include email + # opts == 2 : include org_type + # opts == 4 : include country + # opts == 8 : include valid_email + # opts == 16 : include org + def get_ruser_names(self, email, opts = 0, date = None): + fields = "lname lstname, fname fstname" + if opts&1: fields += ", email" + if opts&2: fields += ", org_type" + if opts&4: fields += ", country" + if opts&8: fields += ", valid_email" + if opts&16: fields += ", org" + if date: + datecond = "rdate <= '{}' AND (end_date IS NULL OR end_date >= '{}')".format(date, date) + else: + datecond = "end_date IS NULL" + date = time.strftime("%Y-%m-%d", (time.gmtime() if self.PGLOG['GMTZ'] else time.localtime())) + emcnd = "email = '{}'".format(email) + pgrec = self.pgget("ruser", fields, "{} AND {}".format(emcnd, datecond), self.LGEREX) + if not pgrec: # missing user record add one in + self.pglog("{}: email not in ruser for {}".format(email, date), self.LOGWRN) + # check again if a user is on file with different date range + pgrec = self.pgget("ruser", fields, emcnd, self.LGEREX) + if not pgrec and self.pgget("dssdb.user", '', emcnd): + fields = "lstname, fstname" + if opts&1: fields += ", email" + if opts&2: fields += ", org_type" + if opts&4: fields += ", country" + if opts&8: fields += ", email valid_email" + if opts&16: fields += ", org_name org" + pgrec = self.pgget("dssdb.user", fields, emcnd, self.LGEREX) + if pgrec and pgrec['lstname']: + pgrec['name'] = (pgrec['fstname'].capitalize() + ' ') if pgrec['fstname'] else '' + pgrec['name'] += pgrec['lstname'].capitalize() + else: + if not pgrec: pgrec = {} + pgrec['name'] = email.split('@')[0] + if opts&1: pgrec['email'] = email + return pgrec + + # cache a customized email for sending it later + def cache_customized_email(self, table, field, condition, emlmsg, logact = 0): + pgrec = {field: emlmsg} + if self.pgupdt(table, pgrec, condition, logact|self.ERRLOG): + if logact: self.pglog("Email cached to '{}.{}' for {}".format(table, field, condition), logact&(~self.EXITLG)) + return self.SUCCESS + else: + msg = "cache email to '{}.{}' for {}".format(table, field, condition) + self.pglog(f"Error {msg}, try to send directly now", logact|self.ERRLOG) + return self.send_customized_email(msg, emlmsg, logact) + + # otype: user organization type + # email: user email address) + # return: orgonizaion type like DSS, NCAR, UNIV... + def get_org_type(self, otype, email): + if not otype: otype = "OTHER" + if email: + ms = re.search(r'(@|\.)ucar\.edu$', email) + if ms: + mc = ms.group(1) + if otype == 'UCAR' or otype == 'OTHER': otype = 'NCAR' + if otype == 'NCAR' and mc == '@': + ms = re.match(r'^(.+)@', email) + if ms and self.pgget("dssgrp", "", "logname = '{}'".format(ms.group(1))): otype = 'DSS' + else: + ms = re.search(r'\.(mil|org|gov|edu|com|net)(\.\w\w|$)', email) + if ms: + otype = ms.group(1).upper() + if otype == 'EDU': otype = "UNIV" + return otype + + # join values and handle the null values + @staticmethod + def join_values(vstr, vals): + if vstr: + vstr += "\n" + elif vstr is None: + vstr = '' + return "{}Value{}({})".format(vstr, ('s' if len(vals) > 1 else ''), ', '.join(map(str, vals))) + + # check table hostname to find the system down times. Cache the result for 10 minutes + def get_system_downs(self, hostname, logact = 0): + curtime = int(time.time()) + newhost = 0 + if hostname not in self.SYSDOWN: + self.SYSDOWN[hostname] = {} + newhost = 1 + if newhost or (curtime - self.SYSDOWN[hostname]['chktime']) > 600: + self.SYSDOWN[hostname]['chktime'] = curtime + self.SYSDOWN[hostname]['start'] = 0 + self.SYSDOWN[hostname]['end'] = 0 + self.SYSDOWN[hostname]['active'] = 1 + self.SYSDOWN[hostname]['path'] = None + pgrec = self.pgget('hostname', 'service, domain, downstart, downend', + "hostname = '{}'".format(hostname), logact) + if pgrec: + if pgrec['service'] == 'N': + self.SYSDOWN[hostname]['start'] = curtime + self.SYSDOWN[hostname]['active'] = 0 + else: + start = int(datetime.timestamp(pgrec['downstart'])) if pgrec['downstart'] else 0 + end = int(datetime.timestamp(pgrec['downend'])) if pgrec['downend'] else 0 + if start > 0 and (end == 0 or end > curtime): + self.SYSDOWN[hostname]['start'] = start + self.SYSDOWN[hostname]['end'] = end + if pgrec['service'] == 'S' and pgrec['domain'] and re.match(r'^/', pgrec['domain']): + self.SYSDOWN[hostname]['path'] = pgrec['domain'] + self.SYSDOWN[hostname]['curtime'] = curtime + return self.SYSDOWN[hostname] + + # return seconds for how long the system will continue to be down + def system_down_time(self, hostname, offset, logact = 0): + down = self.get_system_downs(hostname, logact) + if down['start'] and down['curtime'] >= (down['start'] - offset): + if not down['end']: + if self.PGLOG['PGBATCH'] == self.PGLOG['PBSNAME']: + return self.PGLOG['PBSTIME'] + elif down['curtime'] <= down['end']: + return (down['end'] - down['curtime']) + return 0 # the system is not down + + # return string message if the system is down + def system_down_message(self, hostname, path, offset, logact = 0): + down = self.get_system_downs(hostname, logact) + msg = None + if down['start'] and down['curtime'] >= (down['start'] - offset): + match = self.match_down_path(path, down['path']) + if match: + msg = "{}{}:".format(hostname, ('-' + path) if match > 0 else '') + if not down['active']: + msg += " Not in Service" + else: + msg += " Planned down, started at " + self.current_datetime(down['start']) + if not down['end']: + msg += " And no end time specified" + elif down['curtime'] <= down['end']: + msg = " And will end by " + self.current_datetime(down['end']) + return msg + + # return 1 if given path match daemon paths, 0 if not; -1 if cannot compare + @staticmethod + def match_down_path(path, dpaths): + if not (path and dpaths): return -1 + paths = re.split(':', dpaths) + for p in paths: + if re.match(r'^{}'.format(p), path): return 1 + return 0 + + # validate is login user is in DECS group + # check all node if skpdsg is false, otherwise check non-DSG nodes + def validate_decs_group(self, cmdname, logname, skpdsg): + if skpdsg and self.PGLOG['DSGHOSTS'] and re.search(r'(^|:){}'.format(self.PGLOG['HOSTNAME']), self.PGLOG['DSGHOSTS']): return + if not logname: lgname = self.PGLOG['CURUID'] + if not self.pgget("dssgrp", '', "logname = '{}'".format(logname), self.LGEREX): + self.pglog("{}: Must be in DECS Group to run '{}' on {}".format(logname, cmdname, self.PGLOG['HOSTNAME']), self.LGEREX) + + # add an allusage record into yearly table; create a new yearly table if it does not exist + # year -- year to identify the yearly table, evaluated if missing + # records -- hash to hold one or multiple records. + # Dict keys: email -- user email address, + # org_type -- organization type + # country -- country code + # dsid -- dataset ID + # date -- date data accessed + # time -- time data accessed + # quarter -- quarter of the year data accessed + # size -- bytes of data accessed + # method -- delivery methods: MSS,Web,Ftp,Tape,Cd,Disk,Paper,cArt,Micro + # source -- usage source flag: W - wusage, O - ordusage + # midx -- refer to mbr2loc.midx if not 0 + # ip -- user IP address + # region -- user region name; for example, Colorado + # isarray -- if true, mutiple records provided via arrays for each hash key + # docheck -- if 1, check and add only if record is not on file + # docheck -- if 2, check and add if record is not on file, and update if exists + # docheck -- if 4, check and add if record is not on file, and update if exists, + # and also checking NULL email value too + def add_yearly_allusage(self, year, records, isarray = 0, docheck = 0): + acnt = 0 + if not year: + ms = re.match(r'^(\d\d\d\d)', str(records['date'][0] if isarray else records['date'])) + if ms: year = ms.group(1) + tname = "allusage_{}".format(year) + if isarray: + cnt = len(records['email']) + if 'quarter' not in records: records['quarter'] = [0]*cnt + for i in range(cnt): + if not records['quarter'][i]: + ms = re.search(r'-(\d+)-', str(records['date'][i])) + if ms: records['quarter'][i] = int((int(ms.group(1))-1)/3)+1 + if docheck: + for i in range(cnt): + record = {} + for key in records: + record[key] = records[key][i] + cnd = "email = '{}' AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['email'], record['dsid'], record['method'], record['date'], record['time']) + pgrec = self.pgget(tname, 'aidx', cnd, self.LOGERR|self.ADDTBL) + if docheck == 4 and not pgrec: + cnd = "email IS NULL AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['dsid'], record['method'], record['date'], record['time']) + pgrec = self.pgget(tname, 'aidx', cnd, self.LOGERR|self.ADDTBL) + if pgrec: + if docheck > 1: acnt += self.pgupdt(tname, record, "aidx = {}".format(pgrec['aidx']), self.LGEREX) + else: + acnt += self.pgadd(tname, record, self.LGEREX|self.ADDTBL) + else: + acnt = self.pgmadd(tname, records, self.LGEREX|self.ADDTBL) + else: + record = records + if not ('quarter' in record and record['quarter']): + ms = re.search(r'-(\d+)-', str(record['date'])) + if ms: record['quarter'] = int((int(ms.group(1))-1)/3)+1 + if docheck: + cnd = "email = '{}' AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['email'], record['dsid'], record['method'], record['date'], record['time']) + pgrec = self.pgget(tname, 'aidx', cnd, self.LOGERR|self.ADDTBL) + if docheck == 4 and not pgrec: + cnd = "email IS NULL AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['dsid'], record['method'], record['date'], record['time']) + pgrec = self.pgget(tname, 'aidx', cnd, self.LOGERR|self.ADDTBL) + if pgrec: + if docheck > 1: acnt = self.pgupdt(tname, record, "aidx = {}".format(pgrec['aidx']), self.LGEREX) + return acnt + acnt = self.pgadd(tname, record, self.LGEREX|self.ADDTBL) + return acnt + + # add a wusage record into yearly table; create a new yearly table if it does not exist + # year -- year to identify the yearly table, evaluated if missing + # records -- hash to hold one or multiple records. + # Dict keys: wid - reference to wfile.wid + # wuid_read - reference to wuser.wuid, 0 if missing email + # dsid - reference to dataset.dsid at the time of read + # date_read - date file read + # time_read - time file read + # quarter - quarter of the year data accessed + # size_read - bytes of data read + # method - download methods: WEB, CURL, MGET, FTP and MGET + # locflag - location flag: Glade or Object + # ip - IP address + # isarray -- if true, mutiple records provided via arrays for each hash key + def add_yearly_wusage(self, year, records, isarray = 0): + acnt = 0 + if not year: + ms = re.match(r'^(\d\d\d\d)', str(records['date_read'][0] if isarray else records['date_read'])) + if ms: year = ms.group(1) + tname = "wusage_{}".format(year) + if isarray: + if 'quarter' not in records: + cnt = len(records['wid']) + records['quarter'] = [0]*cnt + for i in range(cnt): + ms = re.search(r'-(\d+)-', str(records['date_read'][i])) + if ms: records['quarter'][i] = (int((int(ms.group(1))-1)/3)+1) + acnt = self.pgmadd(tname, records, self.LGEREX|self.ADDTBL) + else: + record = records + if 'quarter' not in record: + ms = re.search(r'-(\d+)-', str(record['date_read'])) + if ms: record['quarter'] = (int((int(ms.group(1))-1)/3)+1) + acnt = self.pgadd(tname, record, self.LGEREX|self.ADDTBL) + return acnt + + # double quote a array of single or sign delimited strings + def pgnames(self, ary, sign = None, joinstr = None): + pgary = [] + for a in ary: + pgary.append(self.pgname(a, sign)) + if joinstr == None: + return pgary + else: + return joinstr.join(pgary) + + # double quote a single or sign delimited string + def pgname(self, str, sign = None): + if sign: + nstr = '' + names = str.split(sign[0]) + for name in names: + if nstr: nstr += sign[0] + nstr += self.pgname(name, sign[1:]) + else: + nstr = str.strip() + if nstr and nstr.find('"') < 0: + if not re.match(r'^[a-z_][a-z0-9_]*$', nstr) or nstr in self.PGRES: + nstr = '"{}"'.format(nstr) + return nstr + + # get a postgres password for given host, port, dbname, usname + def get_pgpass_password(self): + if self.PGDBI['PWNAME']: return self.PGDBI['PWNAME'] + pwname = self.get_baopassword() + if not pwname: pwname = self.get_pgpassword() + return pwname + + # get the pg passwords from file .pgpass + def get_pgpassword(self): + if not self.DBPASS: self.read_pgpass() + dbport = str(self.PGDBI['DBPORT']) if self.PGDBI['DBPORT'] else '5432' + pwname = self.DBPASS.get((self.PGDBI['DBSHOST'], dbport, self.PGDBI['DBNAME'], self.PGDBI['LNNAME'])) + if not pwname: pwname = self.DBPASS.get((self.PGDBI['DBHOST'], dbport, self.PGDBI['DBNAME'], self.PGDBI['LNNAME'])) + return pwname + + # get the pg passwords from OpenBao + def get_baopassword(self): + dbname = self.PGDBI['DBNAME'] + if dbname not in self.DBBAOS: self.read_openbao() + return self.DBBAOS[dbname].get(self.PGDBI['LNNAME']) + + # Reads the .pgpass file and returns a dictionary of credentials. + def read_pgpass(self): + pgpass = self.PGLOG['DSSHOME'] + '/.pgpass' + if not op.isfile(pgpass): pgpass = self.PGLOG['GDEXHOME'] + '/.pgpass' + try: + with open(pgpass, "r") as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): continue + dbhost, dbport, dbname, lnname, pwname = line.split(":") + self.DBPASS[(dbhost, dbport, dbname, lnname)] = pwname + except Exception as e: + self.pglog(str(e), self.PGDBI['ERRLOG']) + + # Reads OpenBao secrets and returns a dictionary of credentials. + def read_openbao(self): + dbname = self.PGDBI['DBNAME'] + self.DBBAOS[dbname] = {} + url = 'https://bao.k8s.ucar.edu/' + baopath = { + 'ivaddb' : 'gdex/pgdb03', + 'ispddb' : 'gdex/pgdb03', + 'default' : 'gdex/pgdb01' + } + dbpath = baopath[dbname] if dbname in baopath else baopath['default'] + client = hvac.Client(url=self.PGDBI.get('BAOURL')) + client.token = self.PGLOG.get('BAOTOKEN') + try: + read_response = client.secrets.kv.v2.read_secret_version( + path=dbpath, + mount_point='kv', + raise_on_deleted_version=False + ) + except Exception as e: + return self.pglog(str(e), self.PGDBI['ERRLOG']) + baos = read_response['data']['data'] + for key in baos: + ms = re.match(r'^(\w*)pass(\w*)$', key) + if not ms: continue + baoname = None + pre = ms.group(1) + suf = ms.group(2) + if pre: + baoname = 'metadata' if pre == 'meta' else pre + elif suf == 'word': + baoname = 'postgres' + if baoname: self.DBBAOS[dbname][baoname] = baos[key] diff --git a/src/rda_python_common/pg_file.py b/src/rda_python_common/pg_file.py new file mode 100644 index 0000000..93e13d9 --- /dev/null +++ b/src/rda_python_common/pg_file.py @@ -0,0 +1,2462 @@ +# +############################################################################### +# +# Title : self.py +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/05/2020 +# 2025-01-10 transferred to package rda_python_common from +# https://github.com/NCAR/rda-shared-libraries.git +# 2025-12-01 convert to class PgFile +# Purpose : python library module to copy, move and delete data files locally +# and remotely +# +# Github : https://github.com/NCAR/rda-python-common.git +# +############################################################################### +# +import sys +import os +from os import path as op +import pwd +import grp +import stat +import re +import time +import glob +import json +from .pg_util import PgUtil +from .pg_sig import PgSIG + +class PgFile(PgUtil, PgSIG): + + CMDBTH = (0x0033) # return both stdout and stderr, 16 + 32 + 2 + 1 + RETBTH = (0x0030) # return both stdout and stderr, 16 + 32 + CMDRET = (0x0110) # return stdout and save error, 16 + 256 + CMDERR = (0x0101) # display command and save error, 1 + 256 + CMDGLB = (0x0313) # return stdout and save error for globus, 1+2+16+256+512 + + def __init__(self): + super().__init__() # initialize parent class + self.PGCMPS = { + # extension Compress Uncompress ArchiveFormat + 'Z' : ['compress -f', 'uncompress -f', 'Z'], + 'zip' : ['zip', 'unzip', 'ZIP'], + 'gz' : ['gzip', 'gunzip', 'GZ'], + 'xz' : ['xz', 'unxz', 'XZ'], + 'bz2' : ['bzip2', 'bunzip2', 'BZ2'] + } + self.CMPSTR = '|'.join(self.PGCMPS) + self.PGTARS = { + # extension Packing Unpacking ArchiveFormat + 'tar' : ['tar -cvf', 'tar -xvf', 'TAR'], + 'tar.Z' : ['tar -Zcvf', 'tar -xvf', 'TAR.Z'], + 'zip' : ['zip -v', 'unzip -v', 'ZIP'], + 'tgz' : ['tar -zcvf', 'tar -xvf', 'TGZ'], + 'tar.gz' : ['tar -zcvf', 'tar -xvf', 'TAR.GZ'], + 'txz' : ['tar -cvJf', 'tar -xvf', 'TXZ'], + 'tar.xz' : ['tar -cvJf', 'tar -xvf', 'TAR.XZ'], + 'tbz2' : ['tar -cvjf', 'tar -xvf', 'TBZ2'], + 'tar.bz2' : ['tar -cvjf', 'tar -xvf', 'TAR.BZ2'] + } + self.TARSTR = '|'.join(self.PGTARS) + self.TASKIDS = {} # cache unfinished + self.LHOST = "localhost" + self.OHOST = self.PGLOG['OBJCTSTR'] + self.BHOST = self.PGLOG['BACKUPNM'] + self.DHOST = self.PGLOG['DRDATANM'] + self.OBJCTCMD = "isd_s3_cli" + self.BACKCMD = "dsglobus" + self.DIRLVLS = 0 + # record how many errors happen for working with HPSS, local or remote machines + self.ECNTS = {'D' : 0, 'H' : 0, 'L' : 0, 'R' : 0, 'O' : 0, 'B' : 0} + # up limits for how many continuing errors allowed + self.ELMTS = {'D' : 20, 'H' : 20, 'L' : 20, 'R' : 20, 'O' : 10, 'B' : 10} + # down storage hostnames & paths + self.DHOSTS = { + 'G' : self.PGLOG['GPFSNAME'], + 'O' : self.OHOST, + 'B' : self.BHOST, + 'D' : self.DHOST + } + self.DPATHS = { + 'G' : self.PGLOG['DSSDATA'], + 'O' : self.PGLOG['OBJCTBKT'], + 'B' : '/' + self.PGLOG['DEFDSID'], # backup globus endpoint + 'D' : '/' + self.PGLOG['DEFDSID'] # disaster recovery globus endpoint + } + self.QSTATS = { + 'A' : 'ACTIVE', + 'I' : 'INACTIVE', + 'S' : 'SUCCEEDED', + 'F' : 'FAILED', + } + self.QPOINTS = { + 'L' : 'gdex-glade', + 'B' : 'gdex-quasar', + 'D' : 'gdex-quasar-drdata' + } + self.QHOSTS = { + 'gdex-glade' : self.LHOST, + 'gdex-quasar' : self.BHOST, + 'gdex-quasar-drdata' : self.DHOST + } + self.ENDPOINTS = { + 'gdex-glade' : "NCAR GDEX GLADE", + 'gdex-quasar' : "NCAR GDEX Quasar", + 'gdex-quasar-drdata' : "NCAR GDEX Quasar DRDATA" + } + self.BFILES = {} # cache backup file names and dates for each bid + + # reset the up limit for a specified error type + def reset_error_limit(self, etype, lmt): + self.ELMTS[etype] = lmt + + # wrapping self.pglog() to show error and no fatal exit at the first call for retry + def errlog(self, msg, etype, retry = 0, logact = 0): + bckgrnd = self.PGLOG['BCKGRND'] + logact |= self.ERRLOG + if not retry: + if msg and not re.search(r'\n$', msg): msg += "\n" + msg += "[The same execution will be retried in {} Seconds]".format(self.PGSIG['ETIME']) + self.PGLOG['BCKGRND'] = 1 + logact &= ~(self.EMEROL|self.EXITLG) + elif self.ELMTS[etype]: + self.ECNTS[etype] += 1 + if self.ECNTS[etype] >= self.ELMTS[etype]: + logact |= self.EXITLG + self.ECNTS[etype] = 0 + if self.PGLOG['DSCHECK'] and logact&self.EXITLG: self.record_dscheck_error(msg, logact) + self.pglog(msg, logact) + self.PGLOG['BCKGRND'] = bckgrnd + if not retry: time.sleep(self.PGSIG['ETIME']) + return self.FAILURE + + # Copy a file from one host (including local host) to an another host (including local host) + # excluding copy file from remote host to remote host copying in background is permitted + # tofile - target file name + # fromfile - source file name + # tohost - target host name, default to self.LHOST + # fromhost - original host name, default to self.LHOST + # Return 1 if successful 0 if failed with error message generated in self.pgsystem() cached in self.PGLOG['SYSERR'] + def copy_gdex_file(self, tofile, fromfile, tohost = None, fromhost = None, logact = 0): + if tohost is None: tohost = self.LHOST + if fromhost is None: fromhost = self.LHOST + thost = self.strip_host_name(tohost) + fhost = self.strip_host_name(fromhost) + if self.pgcmp(thost, fhost, 1) == 0: + if self.pgcmp(thost, self.LHOST, 1) == 0: + return self.local_copy_local(tofile, fromfile, logact) + elif self.pgcmp(fhost, self.LHOST, 1) == 0: + if self.pgcmp(thost, self.OHOST, 1) == 0: + return self.local_copy_object(tofile, fromfile, None, None, logact) + elif self.pgcmp(thost, self.BHOST, 1) == 0: + return self.local_copy_backup(tofile, fromfile, self.QPOINTS['B'], logact) + elif self.pgcmp(thost, self.DHOST, 1) == 0: + return self.local_copy_backup(tofile, fromfile, self.QPOINTS['D'], logact) + else: + return self.local_copy_remote(tofile, fromfile, tohost, logact) + elif self.pgcmp(thost, self.LHOST, 1) == 0: + if self.pgcmp(fhost, self.OHOST, 1) == 0: + return self.object_copy_local(tofile, fromfile, None, logact) + elif self.pgcmp(fhost, self.BHOST, 1) == 0: + return self.backup_copy_local(tofile, fromfile, self.QPOINTS['B'], logact) + elif self.pgcmp(fhost, self.DHOST, 1) == 0: + return self.backup_copy_local(tofile, fromfile, self.QPOINTS['D'], logact) + else: + return self.remote_copy_local(tofile, fromfile, fromhost) + return self.errlog("{}-{}->{}-{}: Cannot copy file".format(fhost, fromfile, thost, tofile), 'O', 1, self.LGEREX) + copy_rda_file = copy_gdex_file + + # Copy a file locally + # tofile - target file name + # fromfile - source file name + def local_copy_local(self, tofile, fromfile, logact = 0): + finfo = self.check_local_file(fromfile, 0, logact) + if not finfo: + if finfo != None: return self.FAILURE + return self.lmsg(fromfile, "{} to copy to {}".format(self.PGLOG['MISSFILE'], tofile), logact) + target = tofile + ms = re.match(r'^(.+)/$', tofile) + if ms: + dir = ms.group(1) + tofile += op.basename(fromfile) + else: + dir = self.get_local_dirname(tofile) + if not self.make_local_directory(dir, logact): return self.FAILURE + + cmd = "cp -{} {} {}".format(('f' if finfo['isfile'] else "rf"), fromfile, target) + reset = loop = 0 + while((loop-reset) < 2): + info = None + self.PGLOG['ERR2STD'] = ['are the same file'] + ret = self.pgsystem(cmd, logact, self.CMDERR) + self.PGLOG['ERR2STD'] = [] + if ret: + info = self.check_local_file(tofile, 143, logact) # 1+2+4+8+128 + if info: + if not info['isfile']: + self.set_local_mode(tofile, 0, 0, info['mode'], info['logname'], logact) + return self.SUCCESS + elif info['data_size'] == finfo['data_size']: + self.set_local_mode(tofile, 1, 0, info['mode'], info['logname'], logact) + return self.SUCCESS + elif info != None: + break + if self.PGLOG['SYSERR']: + errmsg = self.PGLOG['SYSERR'] + else: + errmsg = "Error of '{}': Miss target file {}".format(cmd, tofile) + self.errlog(errmsg, 'L', (loop - reset), logact) + if loop == 0: reset = self.reset_local_info(tofile, info, logact) + loop += 1 + return self.FAILURE + + # Copy a local file to a remote host + # tofile - target file name + # fromfile - source file name + # host - remote host name + def local_copy_remote(self, tofile, fromfile, host, logact = 0): + finfo = self.check_local_file(fromfile, 0, logact) + if not finfo: + if finfo != None: return self.FAILURE + return self.lmsg(fromfile, "{} to copy to {}-{}".format(self.PGLOG['MISSFILE'], host, tofile), logact) + target = tofile + ms = re.match(r'^(.+)/$', tofile) + if ms: + dir = ms.group(1) + tofile += op.basename(fromfile) + else: + dir = op.dirname(tofile) + if not self.make_remote_directory(dir, host, logact): return self.FAILURE + cmd = self.get_sync_command(host) + cmd += " {} {}".format(fromfile, target) + for loop in range(2): + if self.pgsystem(cmd, logact, self.CMDERR): + info = self.check_remote_file(tofile, host, 0, logact) + if info: + if not finfo['isfile']: + self.set_remote_mode(tofile, 0, host, self.PGLOG['EXECMODE']) + return self.SUCCESS + elif info['data_size'] == finfo['data_size']: + self.set_remote_mode(tofile, 1, host, self.PGLOG['FILEMODE']) + return self.SUCCESS + elif info != None: + break + self.errlog(self.PGLOG['SYSERR'], 'R', loop, logact) + return self.FAILURE + + # Copy a local file to object store + # tofile - target file name + # fromfile - source file name + # bucket - bucket name on Object store + # meta - reference to metadata hash + def local_copy_object(self, tofile, fromfile, bucket = None, meta = None, logact = 0): + if not bucket: bucket = self.PGLOG['OBJCTBKT'] + if meta is None: meta = {} + if 'user' not in meta: meta['user'] = self.PGLOG['CURUID'] + if 'group' not in meta: meta['group'] = self.PGLOG['GDEXGRP'] + uinfo = json.dumps(meta) + finfo = self.check_local_file(fromfile, 0, logact) + if not finfo: + if finfo != None: return self.FAILURE + return self.lmsg(fromfile, "{} to copy to {}-{}".format(self.PGLOG['MISSFILE'], self.OHOST, tofile), logact) + if not logact&self.OVRIDE: + tinfo = self.check_object_file(tofile, bucket, 0, logact) + if tinfo and tinfo['data_size'] > 0: + return self.pglog("{}-{}-{}: file exists already".format(self.OHOST, bucket, tofile), logact) + cmd = "{} ul -lf {} -b {} -k {} -md '{}'".format(self.OBJCTCMD, fromfile, bucket, tofile, uinfo) + for loop in range(2): + buf = self.pgsystem(cmd, logact, self.CMDBTH) + tinfo = self.check_object_file(tofile, bucket, 0, logact) + if tinfo: + if tinfo['data_size'] == finfo['data_size']: + return self.SUCCESS + elif tinfo != None: + break + self.errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact) + return self.FAILURE + + # Copy multiple files from a Globus endpoint to another + # tofiles - target file name list, echo name leading with /dsnnn.n/ on Quasar and + # leading with /data/ or /decsdata/ on local glade disk + # fromfiles - source file name list, the same format as the tofiles + # topoint - target endpoint name, 'gdex-glade', 'gdex-quasar' or 'gdex-quasar-dgdexta' + # frompoint - source endpoint name, the same choices as the topoint + def quasar_multiple_trasnfer(self, tofiles, fromfiles, topoint, frompoint, logact = 0): + ret = self.FAILURE + fcnt = len(fromfiles) + transfer_files = {"files": []} + for i in range(fcnt): + transfer_files["files"].append({ + "source_file": fromfiles[i], + "destination_file": tofiles[i] + }) + qstr = json.dumps(transfer_files) + action = 'transfer' + source_endpoint = frompoint + destination_endpoint = topoint + label = f"{self.ENDPOINTS[frompoint]} to {self.ENDPOINTS[topoint]} {action}" + verify_checksum = True + cmd = f'{self.BACKCMD} {action} -se {source_endpoint} -de {destination_endpoint} --label "{label}"' + if verify_checksum: + cmd += ' -vc' + cmd += ' --batch -' + task = self.submit_globus_task(cmd, topoint, logact, qstr) + if task['stat'] == 'S': + ret = self.SUCCESS + elif task['stat'] == 'A': + self.TASKIDS["{}-{}".format(topoint, tofiles[0])] = task['id'] + ret = self.FINISH + return ret + + # Copy a file from a Globus endpoint to another + # tofile - target file name, leading with /dsnnn.n/ on Quasar and + # leading with /data/ or /decsdata/ on local glade disk + # fromfile - source file, the same format as the tofile + # topoint - target endpoint name, 'gdex-glade', 'gdex-quasar' or 'gdex-quasar-dgdexta' + # frompoint - source endpoint name, the same choices as the topoint + def endpoint_copy_endpoint(self, tofile, fromfile, topoint, frompoint, logact = 0): + ret = self.FAILURE + finfo = self.check_globus_file(fromfile, frompoint, 0, logact) + if not finfo: + if finfo != None: return ret + return self.lmsg(fromfile, "{} to copy {} file to {}-{}".format(self.PGLOG['MISSFILE'], frompoint, topoint, tofile), logact) + if not logact&self.OVRIDE: + tinfo = self.check_globus_file(tofile, topoint, 0, logact) + if tinfo and tinfo['data_size'] > 0: + return self.pglog("{}-{}: file exists already".format(topoint, tofile), logact) + action = 'transfer' + cmd = f'{self.BACKCMD} {action} -se {frompoint} -de {topoint} -sf {fromfile} -df {tofile} -vc' + task = self.submit_globus_task(cmd, topoint, logact) + if task['stat'] == 'S': + ret = self.SUCCESS + elif task['stat'] == 'A': + self.TASKIDS["{}-{}".format(topoint, tofile)] = task['id'] + ret = self.FINISH + + return ret + + # submit a globus task and return a task id + def submit_globus_task(self, cmd, endpoint, logact = 0, qstr = None): + task = {'id' : None, 'stat' : 'U'} + loop = reset = 0 + while (loop-reset) < 2: + buf = self.pgsystem(cmd, logact, self.CMDGLB, qstr) + syserr = self.PGLOG['SYSERR'] + if buf and buf.find('a task has been created') > -1: + ms = re.search(r'Task ID:\s+(\S+)', buf) + if ms: + task['id'] = ms.group(1) + lp = 0 + while lp < 2: + task['stat'] = self.check_globus_status(task['id'], endpoint, logact) + if task['stat'] == 'S': break + time.sleep(self.PGSIG['ETIME']) + lp += 1 + if task['stat'] == 'S' or task['stat'] == 'A': break + if task['stat'] == 'F' and not syserr: break + errmsg = "Error Execute: " + cmd + if qstr: errmsg += " with stdin:\n" + qstr + if syserr: + errmsg += "\n" + syserr + (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 1, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'B', (loop - reset), logact) + if loop == 0 and syserr and syserr.find('This user has too many pending jobs') > -1: reset = 1 + loop += 1 + if task['stat'] == 'S' or task['stat'] == 'A': self.ECNTS['B'] = 0 # reset error count + return task + + # check Globus transfer status for given taskid. Cancel the task + # if self.NOWAIT presents and Details is neither OK nor Queued + def check_globus_status(self, taskid, endpoint = None, logact = 0): + ret = 'U' + if not taskid: return ret + if not endpoint: endpoint = self.PGLOG['BACKUPEP'] + mp = r'Status:\s+({})'.format('|'.join(self.QSTATS.values())) + cmd = f"{self.BACKCMD} get-task {taskid}" + astats = ['OK', 'Queued'] + for loop in range(2): + buf = self.pgsystem(cmd, logact, self.CMDRET) + if buf: + ms = re.search(mp, buf) + if ms: + ret = ms.group(1)[0] + if ret == 'A': + ms = re.search(r'Details:\s+(\S+)', buf) + if ms: + detail = ms.group(1) + if detail not in astats: + if logact&self.NOWAIT: + errmsg = "{}: Cancel Task due to {}:\n{}".format(taskid, detail, buf) + self.errlog(errmsg, 'B', 1, logact) + ccmd = f"{self.BACKCMD} cancel-task {taskid}" + self.pgsystem(ccmd, logact, 7) + else: + time.sleep(self.PGSIG['ETIME']) + continue + break + errmsg = "Error Execute: " + cmd + if self.PGLOG['SYSERR']: + errmsg = "\n" + self.PGLOG['SYSERR'] + (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 1, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'B', loop, logact) + if ret == 'S' or ret == 'A': self.ECNTS['B'] = 0 # reset error count + return ret + + # return SUCCESS if Globus transfer is done; FAILURE otherwise + def check_globus_finished(self, tofile, topoint, logact = 0): + ret = self.SUCCESS + ckey = "{}-{}".format(topoint, tofile) + if ckey in self.TASKIDS: + taskid = self.TASKIDS[ckey] + else: + self.errlog(ckey + ": Miss Task ID to check Status", 'B', 1, logact) + return self.FAILURE + lp = 0 + if logact&self.NOWAIT: + act = logact&(~self.NOWAIT) + lps = 2 + else: + act = logact + lps = 0 + while True: + stat = self.check_globus_status(taskid, topoint, act) + if stat == 'A': + if lps: + lp += 1 + if lp > lps: act = logact + time.sleep(self.PGSIG['ETIME']) + else: + if stat == 'S': + del self.TASKIDS[ckey] + else: + status = self.QSTATS[stat] if stat in self.QSTATS else 'UNKNOWN' + self.errlog("{}: Status '{}' for Task {}".format(ckey, status, taskid), 'B', 1, logact) + ret = self.FAILURE + break + return ret + + # Copy a local file to Quasar backup tape system + # tofile - target file name, leading with /dsnnn.n/ + # fromfile - source file name, leading with /data/ or /decsdata/ + # endpoint - endpoint name on Quasar Backup Server + def local_copy_backup(self, tofile, fromfile, endpoint = None, logact = 0): + if not endpoint: endpoint = self.PGLOG['BACKUPEP'] + return self.endpoint_copy_endpoint(tofile, fromfile, endpoint, 'gdex-glade', logact) + + # Copy a Quasar backup file to local Globus endpoint + # tofile - target file name, leading with /data/ or /decsdata/ + # fromfile - source file name, leading with /dsnnn.n/ + # endpoint - endpoint name on Quasar Backup Server + def backup_copy_local(self, tofile, fromfile, endpoint = None, logact = 0): + if not endpoint: endpoint = self.PGLOG['BACKUPEP'] + return self.endpoint_copy_endpoint(tofile, fromfile, 'gdex-glade', endpoint, logact) + + # Copy a remote file to local + # tofile - target file name + # fromfile - source file name + # host - remote host name + def remote_copy_local(self, tofile, fromfile, host, logact = 0): + cmd = self.get_sync_command(host) + finfo = self.check_remote_file(fromfile, host, 0, logact) + if not finfo: + if finfo != None: return self.FAILURE + return self.errlog("{}-{}: {} to copy to {}".format(host, fromfile, self.PGLOG['MISSFILE'], tofile), 'R', 1, logact) + target = tofile + ms = re.match(r'^(.+)/$', tofile) + if ms: + dir = ms.group(1) + tofile += op.basename(fromfile) + else: + dir = self.get_local_dirname(tofile) + if not self.make_local_directory(dir, logact): return self.FAILURE + cmd += " -g {} {}".format(fromfile, target) + loop = reset = 0 + while (loop-reset) < 2: + if self.pgsystem(cmd, logact, self.CMDERR): + info = self.check_local_file(tofile, 143, logact) # 1+2+4+8+128 + if info: + if not info['isfile']: + self.set_local_mode(tofile, 0, self.PGLOG['EXECMODE']) + return self.SUCCESS + elif info['data_size'] == finfo['data_size']: + self.set_local_mode(tofile, 1, self.PGLOG['FILEMODE']) + return self.SUCCESS + elif info != None: + break + self.errlog(self.PGLOG['SYSERR'], 'L', (loop - reset), logact) + if loop == 0: reset = self.reset_local_info(tofile, info, logact) + loop += 1 + return self.FAILURE + + # Copy a object file to local + # tofile - target file name + # fromfile - source file name + # bucket - bucket name on Object store + def object_copy_local(self, tofile, fromfile, bucket = None, logact = 0): + ret = self.FAILURE + if not bucket: bucket = self.PGLOG['OBJCTBKT'] + finfo = self.check_object_file(fromfile, bucket, 0, logact) + if not finfo: + if finfo != None: return ret + return self.lmsg(fromfile, "{}-{} to copy to {}".format(self.OHOST, self.PGLOG['MISSFILE'], tofile), logact) + cmd = "{} go -k {} -b {}".format(self.OBJCTCMD, fromfile, bucket) + fromname = op.basename(fromfile) + toname = op.basename(tofile) + if toname == tofile: + dir = odir = None + else: + dir = op.dirname(tofile) + odir = self.change_local_directory(dir, logact) + loop = reset = 0 + while (loop-reset) < 2: + buf = self.pgsystem(cmd, logact, self.CMDBTH) + info = self.check_local_file(fromname, 143, logact) # 1+2+4+8+128 + if info: + if info['data_size'] == finfo['data_size']: + self.set_local_mode(fromfile, info['isfile'], 0, info['mode'], info['logname'], logact) + if toname == fromname or self.move_local_file(toname, fromname, logact): + ret = self.SUCCESS + break + elif info != None: + break + self.errlog("Error Execute: {}\n{}".format(cmd, buf), 'L', (loop - reset), logact) + if loop == 0: reset = self.reset_local_info(tofile, info, logact) + loop += 1 + if odir and odir != dir: + self.change_local_directory(odir, logact) + return ret + + # Copy a remote file to object + # tofile - target object file name + # fromfile - source remote file name + # host - remote host name + # bucket - bucket name on Object store + # meta - reference to metadata hash + def remote_copy_object(self, tofile, fromfile, host, bucket = None, meta = None, logact = 0): + if self.is_local_host(host): return self.local_copy_object(tofile, fromfile, bucket, meta, logact) + locfile = "{}/{}".format(self.PGLOG['TMPPATH'], op.basename(tofile)) + ret = self.remote_copy_local(locfile, fromfile, host, logact) + if ret: + ret = self.local_copy_object(tofile, locfile, bucket, meta, logact) + self.delete_local_file(locfile, logact) + return ret + + # Copy an object file to remote + # tofile - target remote file name + # fromfile - source object file name + # host - remote host name + # bucket - bucket name on Object store + # meta - reference to metadata hash + def object_copy_remote(self, tofile, fromfile, host, bucket = None, logact = 0): + if self.is_local_host(host): return self.object_copy_local(tofile, fromfile, bucket, logact) + locfile = "{}/{}".format(self.PGLOG['TMPPATH'], op.basename(tofile)) + ret = self.object_copy_local(locfile, fromfile, bucket, logact) + if ret: + ret = self.local_copy_remote(fromfile, locfile, host, logact) + self.delete_local_file(locfile, logact) + return ret + + # Delete a file/directory on a given host name (including local host) no background process for deleting + # file - file name to be deleted + # host - host name the file on, default to self.LHOST + # Return 1 if successful 0 if failed with error message generated in self.pgsystem() cached in self.PGLOG['SYSERR'] + def delete_gdex_file(self, file, host, logact = 0): + shost = self.strip_host_name(host) + if self.pgcmp(shost, self.LHOST, 1) == 0: + return self.delete_local_file(file, logact) + elif self.pgcmp(shost, self.OHOST, 1) == 0: + return self.delete_object_file(file, None, logact) + else: + return self.delete_remote_file(file, host, logact) + delete_rda_file = delete_gdex_file + + # Delete a local file/irectory + def delete_local_file(self, file, logact = 0): + info = self.check_local_file(file, 0, logact) + if not info: return self.FAILURE + cmd = "rm -rf " + cmd += file + loop = reset = 0 + while (loop-reset) < 2: + if self.pgsystem(cmd, logact, self.CMDERR): + info = self.check_local_file(file, 14, logact) + if info is None: + if self.DIRLVLS: self.record_delete_directory(op.dirname(file), self.LHOST) + return self.SUCCESS + elif not info: + break # error checking file + self.errlog(self.PGLOG['SYSERR'], 'L', (loop - reset), logact) + if loop == 0: reset = self.reset_local_info(file, info, logact) + loop += 1 + return self.FAILURE + + # Delete file/directory on a remote host + def delete_remote_file(self, file, host, logact = 0): + if not self.check_remote_file(file, host, logact): return self.FAILURE + cmd = self.get_sync_command(host) + for loop in range(2): + if self.pgsystem("{} -d {}".format(cmd, file), logact, self.CMDERR): + if self.DIRLVLS: self.record_delete_directory(op.dirname(file), host) + return self.SUCCESS + self.errlog(self.PGLOG['SYSERR'], 'R', loop, logact) + return self.FAILURE + + # Delete a file on object store + def delete_object_file(self, file, bucket = None, logact = 0): + if not bucket: bucket = self.PGLOG['OBJCTBKT'] + for loop in range(2): + list = self.object_glob(file, bucket, 0, logact) + if not list: return self.FAILURE + errmsg = None + for key in list: + cmd = "{} dl {} -b {}".format(self.OBJCTCMD, key, bucket) + if not self.pgsystem(cmd, logact, self.CMDERR): + errmsg = self.PGLOG['SYSERR'] + break + list = self.object_glob(file, bucket, 0, logact) + if not list: return self.SUCCESS + if errmsg: self.errlog(errmsg, 'O', loop, logact) + return self.FAILURE + + # Delete a backup file on Quasar Server + def delete_backup_file(self, file, endpoint = None, logact = 0): + if not endpoint: endpoint = self.PGLOG['BACKUPEP'] + info = self.check_backup_file(file, endpoint, 0, logact) + if not info: return self.FAILURE + cmd = f"{self.BACKCMD} delete -ep {endpoint} -tf {file}" + task = self.submit_globus_task(cmd, endpoint, logact) + if task['stat'] == 'S': + return self.SUCCESS + elif task['stat'] == 'A': + self.TASKIDS["{}-{}".format(endpoint, file)] = task['id'] + return self.FINISH + return self.FAILURE + + # reset local file/directory information to make them writable for self.PGLOG['GDEXUSER'] + # file - file name (mandatory) + # info - gathered file info with option 14, None means file not exists + def reset_local_info(self, file, info = None, logact = 0): + ret = 0 + if info: + if info['isfile']: + ret += self.reset_local_file(file, info, logact) + dir = self.get_local_dirname(file) + info = self.check_local_file(dir, 14, logact) + else: + dir = file + else: + dir = self.get_local_dirname(file) + info = self.check_local_file(dir, 14, logact) + if info: ret += self.reset_local_directory(dir, info, logact) + return 1 if ret else 0 + + # reset local directory group/mode + def reset_local_directory(self, dir, info = None, logact = 0): + ret = 0 + if not (info and 'mode' in info and 'group' in info and 'logname' in info): + info = self.check_local_file(dir, 14, logact) + if info: + if info['mode'] and info['mode'] != 0o775: + ret += self.set_local_mode(dir, 0, 0o775, info['mode'], info['logname'], logact) + if info['group'] and self.PGLOG['GDEXGRP'] != info['group']: + ret += self.change_local_group(dir, self.PGLOG['GDEXGRP'], info['group'], info['logname'], logact) + return 1 if ret else 0 + + # reset local file group/mode + def reset_local_file(self, file, info = None, logact = 0): + ret = 0 + if not (info and 'mode' in info and 'group' in info and 'logname' in info): + info = self.check_local_file(file, 14, logact) + if info: + if info['mode'] != 0o664: + ret += self.set_local_mode(file, 1, 0o664, info['mode'], info['logname'], logact) + if self.PGLOG['GDEXGRP'] != info['group']: + ret += self.change_local_group(file, self.PGLOG['GDEXGRP'], info['group'], info['logname'], logact) + return ret + + # Move file locally or remotely on the same host no background process for moving + # tofile - target file name + # fromfile - original file name + # host - host name the file is moved on, default to self.LHOST + # Return self.SUCCESS if successful self.FAILURE otherwise + def move_gdex_file(self, tofile, fromfile, host, logact = 0): + shost = self.strip_host_name(host) + if self.pgcmp(shost, self.LHOST, 1) == 0: + return self.move_local_file(tofile, fromfile, logact) + elif self.pgcmp(shost, self.OHOST, 1) == 0: + return self.move_object_file(tofile, fromfile, None, None, logact) + else: + return self.move_remote_file(tofile, fromfile, host, logact) + move_rda_file = move_gdex_file + + # Move a file locally + # tofile - target file name + # fromfile - source file name + def move_local_file(self, tofile, fromfile, logact = 0): + dir = self.get_local_dirname(tofile) + info = self.check_local_file(fromfile, 0, logact) + tinfo = self.check_local_file(tofile, 0, logact) + if not info: + if info != None: return self.FAILURE + if tinfo: + self.pglog("{}: Moved to {} already".format(fromfile, tofile), self.LOGWRN) + return self.SUCCESS + else: + return self.errlog("{}: {} to move".format(fromfile, self.PGLOG['MISSFILE']), 'L', 1, logact) + if tinfo: + if tinfo['data_size'] > 0 and not logact&self.OVRIDE: + return self.errlog("{}: File exists, cannot move {} to it".format(tofile, fromfile), 'L', 1, logact) + elif tinfo != None: + return self.FAILURE + if not self.make_local_directory(dir, logact): return self.FAILURE + cmd = "mv {} {}".format(fromfile, tofile) + loop = reset = 0 + while (loop-reset) < 2: + if self.pgsystem(cmd, logact, self.CMDERR): + if self.DIRLVLS: self.record_delete_directory(op.dirname(fromfile), self.LHOST) + return self.SUCCESS + self.errlog(self.PGLOG['SYSERR'], 'L', (loop - reset), logact) + if loop == 0: reset = self.reset_local_info(tofile, info, logact) + loop += 1 + return self.FAILURE + + # Move a remote file on the same host + # tofile - target file name + # fromfile - original file name + # host - remote host name + # locfile - local copy of tofile + def move_remote_file(self, tofile, fromfile, host, logact = 0): + if self.is_local_host(host): return self.move_local_file(tofile, fromfile, logact) + ret = self.FAILURE + dir = op.dirname(tofile) + info = self.check_remote_file(fromfile, host, 0, logact) + tinfo = self.check_remote_file(tofile, host, 0, logact) + if not info: + if info != None: return self.FAILURE + if tinfo: + self.pglog("{}-{}: Moved to {} already".format(host, fromfile, tofile), self.LOGWRN) + return self.SUCCESS + else: + return self.errlog("{}-{}: {} to move".format(host, fromfile, self.PGLOG['MISSFILE']), 'R', 1, logact) + if tinfo: + if tinfo['data_size'] > 0 and not logact&self.OVRIDE: + return self.errlog("{}-{}: File exists, cannot move {} to it".format(host, tofile, fromfile), 'R', 1, logact) + elif tinfo != None: + return self.FAILURE + if self.make_remote_directory(dir, host, logact): + locfile = "{}/{}".format(self.PGLOG['TMPPATH'], op.basename(tofile)) + if self.remote_copy_local(locfile, fromfile, host, logact): + ret = self.local_copy_remote(tofile, locfile, host, logact) + self.delete_local_file(locfile, logact) + if ret: + ret = self.delete_remote_file(fromfile, host, logact) + if self.DIRLVLS: self.record_delete_directory(op.dirname(fromfile), host) + return ret + + # Move an object file on Object Store + # tofile - target file name + # fromfile - original file name + # tobucket - target bucket name + # frombucket - original bucket name + def move_object_file(self, tofile, fromfile, tobucket, frombucket, logact = 0): + ret = self.FAILURE + if not tobucket: tobucket = self.PGLOG['OBJCTBKT'] + if not frombucket: frombucket = tobucket + finfo = self.check_object_file(fromfile, frombucket, 0, logact) + tinfo = self.check_object_file(tofile, tobucket, 0, logact) + if not finfo: + if finfo != None: return self.FAILURE + if tinfo: + self.pglog("{}-{}: Moved to {}-{} already".format(frombucket, fromfile, tobucket, tofile), self.LOGWRN) + return self.SUCCESS + else: + return self.errlog("{}-{}: {} to move".format(frombucket, fromfile, self.PGLOG['MISSFILE']), 'R', 1, logact) + if tinfo: + if tinfo['data_size'] > 0 and not logact&self.OVRIDE: + return self.errlog("{}-{}: Object File exists, cannot move {}-{} to it".format(tobucket, tofile, frombucket, fromfile), 'R', 1, logact) + elif tinfo != None: + return self.FAILURE + cmd = "{} mv -b {} -db {} -k {} -dk {}".format(self.OBJCTCMD, frombucket, tobucket, fromfile, tofile) + ucmd = "{} gm -k {} -b {}".format(self.OBJCTCMD, fromfile, frombucket) + ubuf = self.pgsystem(ucmd, self.LOGWRN, self.CMDRET) + if ubuf and re.match(r'^\{', ubuf): cmd += " -md '{}'".format(ubuf) + for loop in range(2): + buf = self.pgsystem(cmd, logact, self.CMDBTH) + tinfo = self.check_object_file(tofile, tobucket, 0, logact) + if tinfo: + if tinfo['data_size'] == finfo['data_size']: + return self.SUCCESS + elif tinfo != None: + break + self.errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact) + return self.FAILURE + + # Move an object path on Object Store and all the file keys under it + # topath - target path name + # frompath - original path name + # tobucket - target bucket name + # frombucket - original bucket name + def move_object_path(self, topath, frompath, tobucket, frombucket, logact = 0): + ret = self.FAILURE + if not tobucket: tobucket = self.PGLOG['OBJCTBKT'] + if not frombucket: frombucket = tobucket + fcnt = self.check_object_path(frompath, frombucket, logact) + tcnt = self.check_object_path(topath, tobucket, logact) + if not fcnt: + if fcnt == None: return self.FAILURE + if tcnt: + self.pglog("{}-{}: Moved to {}-{} already".format(frombucket, frompath, tobucket, topath), self.LOGWRN) + return self.SUCCESS + else: + return self.errlog("{}-{}: {} to move".format(frombucket, frompath, self.PGLOG['MISSFILE']), 'R', 1, logact) + cmd = "{} mv -b {} -db {} -k {} -dk {}".format(self.OBJCTCMD, frombucket, tobucket, frompath, topath) + for loop in range(2): + buf = self.pgsystem(cmd, logact, self.CMDBTH) + fcnt = self.check_object_path(frompath, frombucket, logact) + if not fcnt: return self.SUCCESS + self.errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact) + return self.FAILURE + + # Move a backup file on Quasar Server + # tofile - target file name + # fromfile - source file name + # endpoint - Globus endpoint + def move_backup_file(self, tofile, fromfile, endpoint = None, logact = 0): + ret = self.FAILURE + if not endpoint: endpoint = self.PGLOG['BACKUPEP'] + finfo = self.check_backup_file(fromfile, endpoint, 0, logact) + tinfo = self.check_backup_file(tofile, endpoint, 0, logact) + if not finfo: + if finfo != None: return ret + if tinfo: + self.pglog("{}: Moved to {} already".format(fromfile, tofile), self.LOGWRN) + return self.SUCCESS + else: + return self.errlog("{}: {} to move".format(fromfile, self.PGLOG['MISSFILE']), 'B', 1, logact) + if tinfo: + if tinfo['data_size'] > 0 and not logact&self.OVRIDE: + return self.errlog("{}: File exists, cannot move {} to it".format(tofile, fromfile), 'B', 1, logact) + elif tinfo != None: + return ret + cmd = f"{self.BACKCMD} rename -ep {endpoint} --old-path {fromfile} --new-path {tofile}" + loop = 0 + while loop < 2: + buf = self.pgsystem(cmd, logact, self.CMDRET) + syserr = self.PGLOG['SYSERR'] + if buf: + if buf.find('File or directory renamed successfully') > -1: + ret = self.SUCCESS + break + if syserr: + if syserr.find("No such file or directory") > -1: + if self.make_backup_directory(op.dirname(tofile), endpoint, logact): continue + errmsg = "Error Execute: {}\n{}".format(cmd, syserr) + (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 1, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'B', loop, logact) + loop += 1 + if ret == self.SUCCESS: self.ECNTS['B'] = 0 # reset error count + return ret + + # Make a directory on a given host name (including local host) + # dir - directory path to be made + # host - host name the directory on, default to self.LHOST + # Return self.SUCCESS(1) if successful or self.FAILURE(0) if failed + def make_gdex_directory(self, dir, host, logact = 0): + if not dir: return self.SUCCESS + shost = self.strip_host_name(host) + if self.pgcmp(shost, self.LHOST, 1) == 0: + return self.make_local_directory(dir, logact) + else: + return self.make_remote_directory(dir, host, logact) + make_rda_directory = make_gdex_directory + + # Make a local directory + # dir - directory path to be made + def make_local_directory(self, dir, logact = 0): + return self.make_one_local_directory(dir, None, logact) + + # Make a local directory recursively + def make_one_local_directory(self, dir, odir = None, logact = 0): + if not dir or op.isdir(dir): return self.SUCCESS + if op.isfile(dir): return self.errlog(dir + ": is file, cannot make directory", 'L', 1, logact) + if not odir: odir = dir + if self.is_root_directory(dir, 'L', self.LHOST, "make directory " + odir, logact): return self.FAILURE + if not self.make_one_local_directory(op.dirname(dir), odir, logact): return self.FAILURE + loop = reset = 0 + while (loop-reset) < 2: + try: + os.mkdir(dir, self.PGLOG['EXECMODE']) + except Exception as e: + errmsg = str(e) + if errmsg.find('File exists') > -1: return self.SUCCESS + self.errlog(errmsg, 'L', (loop - reset), logact) + if loop == 0: reset = self.reset_local_info(dir, None, logact) + loop += 1 + else: + return self.SUCCESS + return self.FAILURE + + # Make a directory on a remote host name + # dir - directory path to be made + # host - host name the directory on + def make_remote_directory(self, dir, host, logact = 0): + return self.make_one_remote_directory(dir, None, host, logact) + + def make_one_remote_directory(self, dir, odir, host, logact = 0): + info = self.check_remote_file(dir, host, 0, logact) + if info: + if info['isfile']: return self.errlog("{}-{}: is file, cannot make directory".format(host, dir), 'R', 1, logact) + return self.SUCCESS + elif info != None: + return self.FAILURE + if not odir: odir = dir + if self.is_root_directory(dir, 'R', host, "make directory {} on {}".format(odir, host), logact): return self.FAILURE + if self.make_one_remote_directory(op.dirname(dir), odir, host, logact): + tmpsync = self.get_tmpsync_path() + if self.pgsystem("{} {} {}".format(self.get_sync_command(host), tmpsync, dir), logact, 5): + self.set_remote_mode(dir, 0, host, self.PGLOG['EXECMODE']) + return self.SUCCESS + return self.FAILURE + + # Make a quasar directory + # dir - directory path to be made + def make_backup_directory(self, dir, endpoint, logact = 0): + return self.make_one_backup_directory(dir, None, endpoint, logact) + + # Make a quasar directory recursively + def make_one_backup_directory(self, dir, odir, endpoint = None, logact = 0): + if not dir or dir == '/': return self.SUCCESS + if not endpoint: endpoint = self.PGLOG['BACKUPEP'] + info = self.check_backup_file(dir, endpoint, 0, logact) + if info: + if info['isfile']: return self.errlog("{}-{}: is file, cannot make backup directory".format(endpoint, dir), 'B', 1, logact) + return self.SUCCESS + elif info != None: + return self.FAILURE + if not odir: odir = dir + if not self.make_one_backup_directory(op.dirname(dir), odir, endpoint, logact): return self.FAILURE + cmd = f"{self.BACKCMD} mkdir -ep {endpoint} -p {dir}" + for loop in range(2): + buf = self.pgsystem(cmd, logact, self.CMDRET) + syserr = self.PGLOG['SYSERR'] + if buf: + if(buf.find('The directory was created successfully') > -1 or + buf.find("Path '{}' already exists".format(dir)) > -1): + ret = self.SUCCESS + break + if syserr: + if syserr.find("No such file or directory") > -1: + ret = self.make_one_backup_directory(op.dirname(dir), odir, endpoint, logact) + if ret == self.SUCCESS or loop: break + time.sleep(self.PGSIG['ETIME']) + else: + errmsg = "Error Execute: {}\n{}".format(cmd, syserr) + (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 1, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'B', loop, logact) + if ret == self.SUCCESS: self.ECNTS['B'] = 0 # reset error count + return ret + + # check and return 1 if a root directory + def is_root_directory(self, dir, etype, host = None, action = None, logact = 0): + ret = cnt = 0 + if re.match(r'^{}'.format(self.PGLOG['DSSDATA']), dir): + ms = re.match(r'^({})(.*)$'.format(self.PGLOG['GPFSROOTS']), dir) + if ms: + m2 = ms.group(2) + if not m2 or m2 == '/': ret = 1 + else: + cnt = 4 + else: + ms = re.match(r'^({})(.*)$'.format(self.PGLOG['HOMEROOTS']), dir) + if ms: + m2 = ms.group(2) + if not m2 or m2 == '/': ret = 1 + else: + cnt = 2 + if cnt and re.match(r'^(/[^/]+){0,%d}(/*)$' % cnt, dir): + ret = 1 + if ret and action: + cnt = 0 + errmsg = "{}: Cannot {} from {}".format(dir, action, self.PGLOG['HOSTNAME']) + (hstat, msg) = self.host_down_status(dir, host, 0, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, etype, 1, logact|self.ERRLOG) + return ret + + # set mode for a given direcory/file on a given host (include local host) + def set_gdex_mode(self, file, isfile, host, nmode = None, omode = None, logname = None, logact = 0): + shost = self.strip_host_name(host) + if self.pgcmp(shost, self.LHOST, 1) == 0: + return self.set_local_mode(file, isfile, nmode, omode, logname, logact) + else: + return self.set_remote_mode(file, isfile, host, nmode, omode, logact) + set_rda_mode = set_gdex_mode + + # set mode for given local directory or file + def set_local_mode(self, file, isfile = 1, nmode = 0, omode = 0, logname = None, logact = 0): + if not nmode: nmode = (self.PGLOG['FILEMODE'] if isfile else self.PGLOG['EXECMODE']) + if not (omode and logname): + info = self.check_local_file(file, 6) + if not info: + if info != None: return self.FAILURE + return self.lmsg(file, "{} to set mode({})".format(self.PGLOG['MISSFILE'], self.int2base(nmode, 8)), logact) + omode = info['mode'] + logname = info['logname'] + if nmode == omode: return self.SUCCESS + try: + os.chmod(file, nmode) + except Exception as e: + return self.errlog(str(e), 'L', 1, logact) + return self.SUCCESS + + # set mode for given directory or file on remote host + def set_remote_mode(self, file, isfile, host, nmode = 0, omode = 0, logact = 0): + if not nmode: nmode = (self.PGLOG['FILEMODE'] if isfile else self.PGLOG['EXECMODE']) + if not omode: + info = self.check_remote_file(file, host, 6) + if not info: + if info != None: return self.FAILURE + return self.errlog("{}-{}: {} to set mode({})".format(host, file, self.PGLOG['MISSFILE'], self.int2base(nmode, 8)), 'R', 1, logact) + omode = info['mode'] + if nmode == omode: return self.SUCCESS + return self.pgsystem("{} -m {} {}".format(self.get_sync_command(host), self.int2base(nmode, 8), file), logact, 5) + + # change group for given local directory or file + def change_local_group(self, file, ngrp = None, ogrp = None, logname = None, logact = 0): + if not ngrp: + ngid = self.PGLOG['GDEXGID'] + else: + ngid = grp.getgrnam[ngrp].gr_gid + if logact and logact&self.EXITLG: logact &=~self.EXITLG + if not (ogrp and logname): + info = self.check_local_file(file, 10, logact) + if not info: + if info != None: return self.FAILURE + return self.errlog("{}: {} to change group({})".format(file, self.PGLOG['MISSFILE'], ngrp), 'L', 1, logact) + ogid = info['gid'] + ouid = info['uid'] + else: + ouid = pwd.getpwnam(logname).pw_uid + ogid = grp.getgrnam(logname).gr_gid + if ngid == ogid: return self.SUCCESS + try: + os.chown(file, ouid, ngid) + except Exception as e: + return self.errlog(str(e), 'L', 1, logact) + + # Check if given path on a specified host or the host itself are down + # path: path name to be checked + # host: host name the file on, default to self.LHOST + # chkopt: 1 - do a file/path check, 0 - do not + # Return array of 2 (hstat, msg) + # hstat: 0 if system is up and accessible, + # 1 - host is down, + # 2 - if path not accessible + # negative values if planned system down + # msg: None - stat == 0 + # an unempty string for system down message - stat != 0 + def host_down_status(self, path, host, chkopt = 0, logact = 0): + shost = self.strip_host_name(host) + hstat = 0 + rets = [0, None] + msg = hostname = None + if self.pgcmp(shost, self.LHOST, 1) == 0: + if not path or (chkopt and self.check_local_file(path)): return rets + msg = path + ": is not accessible" + flag = "L" + if re.match(r'^(/{}/|{})'.format(self.PGLOG['GPFSNAME'], self.PGLOG['DSSDATA']), path): + hstat = 1 + hostname = self.PGLOG['GPFSNAME'] + else: + hstat = 2 + elif self.pgcmp(shost, self.PGLOG['GPFSNAME'], 1) == 0: + if not path or (chkopt and self.check_local_file(path)): return rets + msg = path + ": is not accessible" + flag = "L" + hstat = 1 + hostname = self.PGLOG['GPFSNAME'] + elif self.pgcmp(shost, self.BHOST, 1) == 0: + if path: + hstat = 2 + else: + hstat = 1 + path = self.DPATHS['B'] + if chkopt and self.check_backup_file(path, self.QPOINTS['B']): return rets + hostname = self.BHOST + msg = "{}-{}: is not accessible".format(hostname, path) + flag = "B" + elif self.pgcmp(shost, self.DHOST, 1) == 0: + if path: + hstat = 2 + else: + hstat = 1 + path = self.DPATHS['B'] + if chkopt and self.check_backup_file(path, self.QPOINTS['D']): return rets + hostname = self.DHOST + msg = "{}-{}: is not accessible".format(hostname, path) + flag = "D" + elif self.pgcmp(shost, self.OHOST, 1) == 0: + if path: + hstat = 2 + else: + hstat = 1 + path = self.PGLOG['OBJCTBKT'] + if chkopt and self.check_object_file(path): return rets + hostname = self.OHOST + msg = "{}-{}: is not accessible".format(hostname, path) + flag = "O" + elif self.pgcmp(shost, self.PGLOG['PGBATCH'], 1): + if path and chkopt and self.check_remote_file(path, host): return rets + estat = self.ping_remote_host(host) + if estat: + hstat = 1 + hostname = host + else: + if not path: return rets + if re.match(r'^/{}/'.format(self.PGLOG['GPFSNAME']), path): + hstat = 1 + hostname = self.PGLOG['GPFSNAME'] + else: + hstat = 2 + hostname = host + flag = "R" + msg = "{}-{}: is not accessible".format(host, path) + elif self.get_host(1) == self.PGLOG['PGBATCH']: # local host is a batch node + if not path or (chkopt and self.check_local_file(path)): return rets + msg = path + ": is not accessible" + flag = "L" + if re.match(r'^(/{}/|{})'.format(self.PGLOG['GPFSNAME'], self.PGLOG['DSSDATA']), path): + hstat = 1 + hostname = self.PGLOG['GPFSNAME'] + else: + hstat = 2 + msg += " at the moment Checked on " + self.PGLOG['HOSTNAME'] + if hostname: + estat = self.system_down_message(hostname, path, 0, logact) + if estat: + hstat = -hstat + msg += "\n" + estat + if logact and (chkopt or hstat < 0): self.errlog(msg, flag, 1, logact) + return (hstat, msg) + + # Check if given path on a specified host is down or not + # path: path name to be checked + # host: host name the file on, default to self.LHOST + # Return errmsg if not accessible and None otherwise + def check_host_down(self, path, host, logact = 0): + (hstat, msg) = self.host_down_status(path, host, 1, logact) + return msg if hstat else None + + # Check if given service name is accessible from a specified host + # sname: service name to be checked + # fhost: from host name to connect to service, default to self.LHOST + # reset the service flag to A or I accordingly + # Return 0 if accessible, dsservice.sindex if not, and -1 if can not be checked + def check_service_accessibilty(self, sname, fhost = None, logact = 0): + if not fhost: fhost = self.PGLOG['HOSTNAME'] + pgrec = self.pgget("dsservice", "*", "service = '{}' AND hostname = '{}'".format(sname, fhost), logact) + if not pgrec: + self.pglog("dsservice: Access {} from {} is not defined in GDEX Configuration".format(sname, fhost), logact) + return -1 + path = sname if (pgrec['flag'] == "H" or pgrec['flag'] == "G") else None + (hstat, msg) = self.host_down_status(path, fhost, 1, logact) + return msg if hstat else None + + # check if this host is a local host for given host name + def is_local_host(self, host): + host = self.strip_host_name(host) + if host == self.LHOST or self.valid_batch_host(host): return 1 + return 0 + + # check and return action string on a node other than local one + def local_host_action(self, host, action, info, logact = 0): + if self.is_local_host(host): return 1 + if not logact: return 0 + if host == "partition": + msg = "for individual partition" + elif host == "rda_config": + msg = "via https://gdex.ucar.edu/rda_pg_config" + elif host in self.BCHCMDS: + msg = "on a {} Node".format(host) + else: + msg = "on " + host + return self.pglog("{}: Cannot {}, try {}".format(info, action, msg), logact) + + # ping a given remote host name + # return None if system is up error messge if not + def ping_remote_host(self, host): + while True: + buf = self.pgsystem("ping -c 3 " + host, self.LOGWRN, self.CMDRET) + if buf: + ms = re.search(r'3 packets transmitted, (\d)', buf) + if ms: + if int(ms.group(1)) > 0: + return None + else: + return host + " seems down not accessible" + if self.PGLOG['SYSERR']: + if self.PGLOG['SYSERR'].find("ping: unknown host") > -1 and host.find('.') > -1: + host += ".ucar.edu" + continue + return self.PGLOG['SYSERR'] + else: + return "Cannot ping " + host + + # compare given two host names, return 1 if same and 0 otherwise + def same_hosts(self, host1, host2): + host1 = self.strip_host_name(host1) + host2 = self.strip_host_name(host2) + return (1 if self.pgcmp(host1, host2, 1) == 0 else 0) + + # strip and identify the proper host name + def strip_host_name(self, host): + if not host: return self.LHOST + ms = re.match(r'^([^\.]+)\.', host) + if ms: host = ms.group(1) + if self.pgcmp(host, self.PGLOG['HOSTNAME'], 1) == 0: + return self.LHOST + else: + return host + + # Check a file stuatus info on a given host name (including local host) no background process for checking + # file: file name to be checked + # host: host name the file on, default to self.LHOST + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 4 - get permission mode in 3 octal digits (mode) + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # 32 - get checksum (checksum), work for local file only + # Return a dict of file info, or None if file not exists + def check_gdex_file(self, file, host = None, opt = 0, logact = 0): + if host is None: host = self.LHOST + shost = self.strip_host_name(host) + if self.pgcmp(shost, self.LHOST, 1) == 0: + return self.check_local_file(file, opt, logact) + elif self.pgcmp(shost, self.OHOST, 1) == 0: + return self.check_object_file(file, None, opt, logact) + elif self.pgcmp(shost, self.BHOST, 1) == 0: + return self.check_backup_file(file, self.QPOINTS['B'], opt, logact) + elif self.pgcmp(shost, self.DHOST, 1) == 0: + return self.check_backup_file(file, self.QPOINTS['D'], opt, logact) + else: + return self.check_remote_file(file, host, opt, logact) + check_rda_file = check_gdex_file + + # wrapper to self.check_local_file() and self.check_globus_file() to check info for a file + # on local or remote Globus endpoints + def check_globus_file(self, file, endpoint = None, opt = 0, logact = 0): + if not endpoint: endpoint = self.PGLOG['BACKUPEP'] + if endpoint == 'gdex-glade': + if re.match(r'^/(data|decsdata)/', file): file = self.PGLOG['DSSDATA'] + file + return self.check_local_file(file, opt, logact) + else: + return self.check_backup_file(file, endpoint, opt, logact) + + # check and get local file status information + # file: local File name + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 4 - get permission mode in 3 octal digits (mode) + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # 32 - get checksum (checksum) + # 64 - remove file too small + # 128 - check twice for missing file + # Return: a dict of file info, or None if not exists + def check_local_file(self, file, opt = 0, logact = 0): + ret = None + if not file: return ret + loop = 0 + while loop < 2: + if op.exists(file): + try: + fstat = os.stat(file) + ret = self.local_file_stat(file, fstat, opt, logact) + break + except Exception as e: + errmsg = "{}: {}".format(file, str(e)) + (hstat, msg) = self.host_down_status(file, self.LHOST, 0, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'L', loop, logact) + else: + if loop > 0 or opt&128 == 0: break + self.pglog(file + ": check it again in a moment", self.LOGWRN) + time.sleep(6) + loop += 1 + if loop > 1: return self.FAILURE + self.ECNTS['L'] = 0 # reset error count + return ret + + # local function to get local file stat + def local_file_stat(self, file, fstat, opt, logact): + if not fstat: + self.errlog(file + ": Error check file stat", 'L', 1, logact) + return None + info = {} + info['isfile'] = (1 if stat.S_ISREG(fstat.st_mode) else 0) + if info['isfile'] == 0 and logact&self.PFSIZE: + info['data_size'] = self.local_path_size(file) + else: + info['data_size'] = fstat.st_size + info['fname'] = op.basename(file) + if not opt: return info + if opt&64 and info['isfile'] and info['data_size'] < self.PGLOG['MINSIZE']: + self.pglog("{}: Remove {} file".format(file, ("Small({}B)".format(info['data_size']) if info['data_size'] else "Empty")), logact&~self.EXITLG) + self.delete_local_file(file, logact) + return None + if opt&17: + mdate, mtime = self.get_date_time(fstat.st_mtime) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + cdate, ctime = self.get_date_time(fstat.st_ctime) + info['date_created'] = cdate + info['time_created'] = ctime + if opt&16: info['week_day'] = self.get_weekday(mdate) + if opt&2: + info['uid'] = fstat.st_uid + info['logname'] = pwd.getpwuid(info['uid']).pw_name + if opt&4: info['mode'] = stat.S_IMODE(fstat.st_mode) + if opt&8: + info['gid'] = fstat.st_gid + info['group'] = grp.getgrgid(info['gid']).gr_name + if opt&32 and info['isfile']: info['checksum'] = self.get_md5sum(file, 0, logact) + return info + + # get total size of files under a given path + @staticmethod + def local_path_size(pname): + if not pname: pname = '.' # To get size of current directory + size = 0 + for path, dirs, files in os.walk(pname): + for f in files: + size += os.path.getsize(os.path.join(path, f)) + return size + + # check and get file status information of a file on remote host + # file: remote File name + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - file owner's login name (logname), assumed 'gdexdata' + # 4 - get permission mode in 3 octal digits (mode) + # 8 - get group name (group), assumed 'dss' + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # Return: a dict of file info, or None if not exists + def check_remote_file(self, file, host, opt = 0, logact = 0): + if not file: return None + ms = re.match(r'^(.+)/$', file) + if ms: file = ms.group(1) # remove ending '/' in case + cmd = "{} {}".format(self.get_sync_command(host), file) + loop = 0 + while loop < 2: + buf = self.pgsystem(cmd, self.LOGWRN, self.CMDRET) + if buf or not self.PGLOG['SYSERR'] or self.PGLOG['SYSERR'].find(self.PGLOG['MISSFILE']) > -1: break + errmsg = self.PGLOG['SYSERR'] + (hstat, msg) = self.host_down_status(file, host, 0, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'R', loop, logact) + loop += 1 + if loop > 1: return self.FAILURE + self.ECNTS['R'] = 0 # reset error count + if buf: + for line in re.split(r'\n', buf): + info = self.remote_file_stat(line, opt) + if info: return info + return None + + # local function to get remote file stat + def remote_file_stat(self, line, opt): + info = {} + items = re.split(r'\s+', line) + if len(items) < 5 or items[4] == '.': return None + ms = re.match(r'^([d\-])([\w\-]{9})$', items[0]) + info['isfile'] = (1 if ms and ms.group(1) == "-" else 0) + if opt&4: info['mode'] = self.get_file_mode(ms.group(2)) + fsize = items[1] + if fsize.find(',') > -1: fsize = re.sub(r',', '', fsize) + info['data_size'] = int(fsize) + info['fname'] = op.basename(items[4]) + if not opt: return info + if opt&17: + mdate = self.format_date(items[2], "YYYY-MM-DD", "YYYY/MM/DD") + mtime = items[3] + if self.PGLOG['GMTZ']: (mdate, mtime) = self.addhour(mdate, mtime, self.PGLOG['GMTZ']) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = self.get_weekday(mdate) + if opt&2: info['logname'] = "gdexdata" + if opt&8: info['group'] = self.PGLOG['GDEXGRP'] + return info + + # check and get object file status information + # file: object store File key name + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 4 - get metadata hash + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # 32 - get checksum (checksum) + # 64 - check once, no rechecking + # Return a dict of file info, or None if file not exists + def check_object_file(self, file, bucket = None, opt = 0, logact = 0): + if not bucket: bucket = self.PGLOG['OBJCTBKT'] + ret = None + if not file: return ret + cmd = "{} lo {} -b {}".format(self.OBJCTCMD, file, bucket) + ucmd = "{} gm -k {} -b {}".format(self.OBJCTCMD, file, bucket) if opt&14 else None + loop = 0 + while loop < 2: + buf = self.pgsystem(cmd, self.LOGWRN, self.CMDRET) + if buf: + if re.match(r'^\[\]', buf): break + if re.match(r'^\[\{', buf): + ary = json.loads(buf) + cnt = len(ary) + if cnt > 1: return self.pglog("{}-{}: {} records returned\n{}".format(bucket, file, cnt, buf), logact|self.ERRLOG) + hash = ary[0] + uhash = None + if ucmd: + ubuf = self.pgsystem(ucmd, self.LOGWRN, self.CMDRET) + if ubuf and re.match(r'^\{', ubuf): uhash = json.loads(ubuf) + ret = self.object_file_stat(hash, uhash, opt) + break + if opt&64: return self.FAILURE + errmsg = "Error Execute: {}\n{}".format(cmd, self.PGLOG['SYSERR']) + (hstat, msg) = self.host_down_status(bucket, self.OHOST, 0, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'O', loop, logact) + loop += 1 + if loop > 1: return self.FAILURE + self.ECNTS['O'] = 0 # reset error count + return ret + + # check an object path status information + # path: object store path name + # Return count of object key names, 0 if not file exists; None if error checking + def check_object_path(self, path, bucket = None, logact = 0): + if not bucket: bucket = self.PGLOG['OBJCTBKT'] + ret = None + if not path: return ret + cmd = "{} lo {} -ls -b {}".format(self.OBJCTCMD, path, bucket) + loop = 0 + while loop < 2: + buf = self.pgsystem(cmd, self.LOGWRN, self.CMDRET) + if buf: + ary = json.loads(buf) + return len(ary) + errmsg = "Error Execute: {}\n{}".format(cmd, self.PGLOG['SYSERR']) + (hstat, msg) = self.host_down_status(bucket, self.OHOST, 0, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'O', loop, logact) + loop += 1 + self.ECNTS['O'] = 0 # reset error count + return ret + + # object store function to get file stat + def object_file_stat(self, hash, uhash, opt): + info = {'isfile' : 1, 'data_size' : int(hash['Size']), 'fname' : op.basename(hash['Key'])} + if not opt: return info + if opt&17: + ms = re.match(r'^(\d+-\d+-\d+)\s+(\d+:\d+:\d+)', hash['LastModified']) + if ms: + (mdate, mtime) = ms.groups() + if self.PGLOG['GMTZ']: (mdate, mtime) = self.addhour(mdate, mtime, self.PGLOG['GMTZ']) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = self.get_weekday(mdate) + if opt&32: + ms = re.match(r'"(.+)"', hash['ETag']) + if ms: info['checksum'] = ms.group(1) + if uhash: + if opt&2: info['logname'] = uhash['user'] + if opt&4: info['meta'] = uhash + if opt&8: info['group'] = uhash['group'] + return info + + # check and get backup file status information + # file: backup File key name + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 4 - get metadata hash + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # 64 - rechecking + # Return a dict of file info, or None if file not exists + def check_backup_file(self, file, endpoint = None, opt = 0, logact = 0): + ret = None + if not file: return ret + if not endpoint: endpoint = self.PGLOG['BACKUPEP'] + bdir = op.dirname(file) + bfile = op.basename(file) + cmd = f"{self.BACKCMD} ls -ep {endpoint} -p {bdir} --filter {bfile}" + ccnt = loop = 0 + while loop < 2: + buf = self.pgsystem(cmd, logact, self.CMDRET) + syserr = self.PGLOG['SYSERR'] + if buf: + getstat = 0 + for line in re.split(r'\n', buf): + if re.match(r'^(User|-+)\s*\|', line): + getstat += 1 + elif getstat > 1: + ret = self.backup_file_stat(line, opt) + if ret: break + if ret: break + if loop or opt&64 == 0: return ret + time.sleep(self.PGSIG['ETIME']) + elif syserr: + if syserr.find("Directory '{}' not found on endpoint".format(bdir)) > -1: + if loop or opt&64 == 0: return ret + time.sleep(self.PGSIG['ETIME']) + elif ccnt < 2 and syserr.find("The connection to the server was broken") > -1: + time.sleep(self.PGSIG['ETIME']) + ccnt += 1 + continue + else: + if opt&64 == 0: return self.FAILURE + errmsg = "Error Execute: {}\n{}".format(cmd, syserr) + (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 0, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'B', loop, logact) + loop += 1 + if ret: self.ECNTS['B'] = 0 # reset error count + return ret + + # backup store function to get file stat + def backup_file_stat(self, line, opt): + info = {} + items = re.split(r'[\s\|]+', line) + if len(items) < 8: return None + info['isfile'] = (1 if items[6] == 'file' else 0) + info['data_size'] = int(items[3]) + info['fname'] = items[7] + if not opt: return info + if opt&17: + mdate = items[4] + mtime = items[5] + ms = re.match(r'^(\d+:\d+:\d+)', mtime) + if ms: mtime = ms.group(1) + if self.PGLOG['GMTZ']: (mdate, mtime) = self.addhour(mdate, mtime, self.PGLOG['GMTZ']) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = self.get_weekday(mdate) + if opt&2: info['logname'] = items[0] + if opt&4: info['mode'] = self.get_file_mode(items[2]) + if opt&8: info['group'] = items[1] + return info + + # check and get a file status information inside a tar file + # file: File name to be checked + # tfile: the tar file name + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 4 - get permission mode in 3 octal digits (mode) + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # Return a dict of file info, or None if file not exists + def check_tar_file(self, file, tfile, opt = 0, logact = 0): + ret = None + if not (file and tfile): return ret + for loop in range(2): + buf = self.pgsystem("tar -tvf {} {}".format(tfile, file), self.LOGWRN, self.CMDRET) + if buf or not self.PGLOG['SYSERR'] or self.PGLOG['SYSERR'].find('Not found in archive') > -1: break + errmsg = self.PGLOG['SYSERR'] + (hstat, msg) = self.host_down_status(tfile, self.LHOST, 0, logact) + self.errlog(errmsg, 'L', loop, logact) + if loop > 0: return self.FAILURE + if buf: + for line in re.split(r'\n', buf): + ret = self.tar_file_stat(line, opt) + if ret: break + self.ECNTS['L'] = 0 # reset error count + return ret + + # local function to get file stat in a tar file + def tar_file_stat(self, line, opt): + items = re.split(r'\s+', line) + if len(items) < 6: return None + ms = re.match(r'^([d\-])([\w\-]{9})$', items[0]) + if not ms: return None + info = {} + info['isfile'] = (1 if ms and ms.group(1) == "-" else 0) + info['data_size'] = int(items[2]) + info['fname'] = op.basename(items[5]) + if not opt: return info + if opt&4: info['mode'] = self.get_file_mode(ms.group(2)) + if opt&17: + mdate = items[3] + mtime = items[4] + if self.PGLOG['GMTZ']: (mdate, mtime) = self.addhour(mdate, mtime, self.PGLOG['GMTZ']) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = self.get_weekday(mdate) + if opt&10: + ms = re.match(r'^(\w+)/(\w+)', items[1]) + if ms: + if opt&2: info['logname'] = ms.group(1) + if opt&8: info['group'] = ms.group(2) + return info + + # check and get a file status information on ftp server + # file: File name to be checked + # name: login user name + # pswd: login password + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 4 - get permission mode in 3 octal digits (mode) + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # Return a dict of file info, or None if file not exists + def check_ftp_file(self, file, opt = 0, name = None, pswd = None, logact = 0): + if not file: return None + ms = re.match(r'^(.+)/$', file) + if ms: file = ms.group(1) # remove ending '/' in case + cmd = "ncftpls -l " + if name: cmd += "-u {} ".format(name) + if pswd: cmd += "-p {} ".format(pswd) + fname = op.basename(file) + for loop in range(2): + buf = self.pgsystem(cmd + file, self.LOGWRN, self.CMDRET) + if buf: break + if self.PGLOG['SYSERR']: + self.errlog(self.PGLOG['SYSERR'], 'O', loop, logact|self.LOGERR) + if loop == 0: file = op.dirname(file) + '/' + if loop > 1: return self.FAILURE + for line in re.split(r'\n', buf): + if not line or line.find(fname) < 0: continue + info = self.ftp_file_stat(line, opt) + if info: return info + return None + + # local function to get stat of a file on ftp server + def ftp_file_stat(self, line, opt): + items = re.split(r'\s+', line) + if len(items) < 9: return None + ms = re.match(r'^([d\-])([\w\-]{9})$', items[0]) + info = {} + info['isfile'] = (1 if ms and ms.group(1) == "-" else 0) + info['data_size'] = int(items[4]) + info['fname'] = op.basename(items[8]) + if not opt: return info + if opt&4: info['mode'] = self.get_file_mode(ms.group(2)) + if opt&17: + dy = int(items[6]) + mn = self.get_month(items[5]) + if re.match(r'^\d+$', items[7]): + yr = int(items[7]) + mtime = "00:00:00" + else: + mtime = items[7] + ":00" + cdate = self.curdate() + ms = re.match(r'^(\d+)-(\d\d)', cdate) + if ms: + yr = int(ms.group(1)) + cm = int(ms.group(2)) # current month + if cm < mn: yr -= 1 # previous year + mdate = "{}-{:02}-{:02}".format(yr, mn, dy) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = self.get_weekday(mdate) + if opt&2: info['logname'] = items[2] + if opt&8: info['group'] = items[3] + return info + + # get an array of directories/files under given dir on a given host name (including local host) + # dir: directory name to be listed + # host: host name the directory on, default to self.LHOST + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 4 - get permission mode in 3 octal digits (mode) + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # 32 - get checksum (checksum), work for local file only + # Return: a dict with filenames as keys None if empty directory + def gdex_glob(self, dir, host, opt = 0, logact = 0): + shost = self.strip_host_name(host) + if self.pgcmp(shost, self.LHOST, 1) == 0: + return self.local_glob(dir, opt, logact) + elif self.pgcmp(shost, self.OHOST, 1) == 0: + return self.object_glob(dir, None, opt, logact) + elif self.pgcmp(shost, self.BHOST, 1) == 0: + return self.backup_glob(dir, None, opt, logact) + else: + return self.remote_glob(dir, host, opt, logact) + rda_glob = gdex_glob + + # get an array of directories/files under given dir on local host + # dir: directory name to be listed + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 4 - get permission mode in 3 octal digits (mode) + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # 32 - get checksum (checksum), work for local file only + # 256 - get files only and ignore directories + # Return: dict with filenames as keys or None if empty directory + def local_glob(self, dir, opt = 0, logact = 0): + flist = {} + if not re.search(r'[*?]', dir): + if op.exists(dir): + dir = self.join_paths(dir, "*") + else: + dir += "*" + for file in glob.glob(dir): + info = self.check_local_file(file, opt, logact) + if info and (info['isfile'] or not 256&opt): flist[file] = info + return flist + + # check and get file status information of a file on remote host + # dir: remote directory name + # host: host name the directory on, default to self.LHOST + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - file owner's login name (logname), assumed 'gdexdata' + # 4 - get permission mode in 3 octal digits (mode) + # 8 - get group name (group), assumed 'dss' + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # Return: dict with filenames as keys or None if empty directory + def remote_glob(self, dir, host, opt = 0, logact = 0): + flist = {} + if not re.search(r'/$', dir): dir += '/' + buf = self.pgsystem(self.get_sync_command(host) + " dir", self.LOGWRN, self.CMDRET) + if not buf: + if self.PGLOG['SYSERR'] and self.PGLOG['SYSERR'].find(self.PGLOG['MISSFILE']) < 0: + self.errlog("{}-{}: Error list directory\n{}".format(host, dir, self.PGLOG['SYSERR']), 'R', 1, logact) + return flist + for line in re.split(r'\n', buf): + info = self.remote_file_stat(line, opt) + if info: flist[dir + info['fname']] = info + return flist + + # check and get muiltiple object store file status information + # dir: object directory name + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # Return: a dict with filenames as keys, or None if not exists + def object_glob(self, dir, bucket = None, opt = 0, logact = 0): + flist = {} + if not bucket: bucket = self.PGLOG['OBJCTBKT'] + ms = re.match(r'^(.+)/$', dir) + if ms: dir = ms.group(1) + cmd = "{} lo {} -b {}".format(self.OBJCTCMD, dir, bucket) + ary = err = None + buf = self.pgsystem(cmd, self.LOGWRN, self.CMDRET) + if buf: + if re.match(r'^\[\{', buf): + ary = json.loads(buf) + elif not re.match(r'^\[\]', buf): + err = "{}\n{}".format(self.PGLOG['SYSERR'], buf) + else: + err = self.PGLOG['SYSERR'] + if not ary: + if err: + self.errlog("{}-{}-{}: Error list files\n{}".format(self.OHOST, bucket, dir, err), 'O', 1, logact) + return self.FAILURE + else: + return flist + for hash in ary: + uhash = None + if opt&10: + ucmd = "{} gm -l {} -b {}".format(self.OBJCTCMD, hash['Key'], bucket) + ubuf = self.pgsystem(ucmd, self.LOGWRN, self.CMDRET) + if ubuf and re.match(r'^\{.+', ubuf): uhash = json.loads(ubuf) + info = self.object_file_stat(hash, uhash, opt) + if info: flist[hash['Key']] = info + return flist + + # check and get muiltiple Quasar backup file status information + # dir: backup path + # opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename + # 1 - get date/time modified (date_modified, time_modfied) + # 2 - get file owner's login name (logname) + # 8 - get group name (group) + # 16 - get week day 0-Sunday, 1-Monday (week_day) + # 64 - rechecking + # Return: a dict with filenames as keys, or None if not exists + def backup_glob(self, dir, endpoint = None, opt = 0, logact = 0): + if not dir: return None + if not endpoint: endpoint = self.PGLOG['BACKUPEP'] + cmd = f"{self.BACKCMD} ls -ep {endpoint} -p {dir}" + flist = {} + for loop in range(2): + buf = self.pgsystem(cmd, logact, self.CMDRET) + syserr = self.PGLOG['SYSERR'] + if buf: + getstat = 0 + for line in re.split(r'\n', buf): + if re.match(r'^(User|-+)\s*\|', line): + getstat += 1 + elif getstat > 1: + info = self.backup_file_stat(line, opt) + if info: flist[info['fname']] = info + if flist: break + if loop or opt&64 == 0: return None + time.sleep(self.PGSIG['ETIME']) + elif syserr: + if syserr.find("Directory '{}' not found on endpoint".format(dir)) > -1: + if loop or opt&64 == 0: return None + time.sleep(self.PGSIG['ETIME']) + else: + if opt&64 == 0: return self.FAILURE + errmsg = "Error Execute: {}\n{}".format(cmd, syserr) + (hstat, msg) = self.host_down_status('', self.QHOSTS[endpoint], 0, logact) + if hstat: errmsg += "\n" + msg + self.errlog(errmsg, 'B', loop, logact) + if flist: + self.ECNTS['B'] = 0 # reset error count + return flist + else: + return self.FAILURE + + # local function to get file/directory mode for given permission string, for example, rw-rw-r-- + @staticmethod + def get_file_mode(perm): + mbits = [4, 2, 1] + mults = [64, 8, 1] + plen = len(perm) + if plen == 4: + perm = perm[1:] + plen = 3 + mode = 0 + for i in range(3): + for j in range(3): + pidx = 3*i+j + if pidx < plen and perm[pidx] != "-": mode += mults[i]*mbits[j] + return mode + + # Evaluate md5 checksum + # file: file name for MD5 checksum + # count: defined if filename is a array + # Return: one or a array of 128-bits md5 'fingerprint' None if failed + def get_md5sum(self, file, count = 0, logact = 0): + cmd = 'md5sum ' + if count > 0: + checksum = [None]*count + for i in range(count): + if op.isfile(file[i]): + chksm = self.pgsystem(cmd + file[i], logact, 20) + if chksm: + ms = re.search(r'(\w{32})', chksm) + if ms: checksum[i] = ms.group(1) + else: + checksum = None + if op.isfile(file): + chksm = self.pgsystem(cmd + file, logact, 20) + if chksm: + ms = re.search(r'(\w{32})', chksm) + if ms: checksum = ms.group(1) + return checksum + + # Evaluate md5 checksums and compare them for two given files + # file1, file2: file names + # Return: 0 if same and 1 if not + def compare_md5sum(self, file1, file2, logact = 0): + if op.isdir(file1) or op.isdir(file2): + files1 = self.get_directory_files(file1) + fcnt1 = len(files1) if files1 else 0 + files2 = self.get_directory_files(file2) + fcnt2 = len(files2) if files2 else 0 + if fcnt1 != fcnt2: return 1 + chksm1 = self.get_md5sum(files1, fcnt1, logact) + chksm1 = ''.join(chksm1) + chksm2 = self.get_md5sum(files1, fcnt2, logact) + chksm2 = ''.join(chksm2) + else: + chksm1 = self.get_md5sum(file1, 0, logact) + chksm2 = self.get_md5sum(file2, 0, logact) + return (0 if (chksm1 and chksm2 and chksm1 == chksm2) else 1) + + # change local directory to todir, and return odir upon success + def change_local_directory(self, todir, logact = 0): + if logact: + lact = logact&~(self.EXITLG|self.ERRLOG) + else: + logact = lact = self.LOGWRN + if not op.isdir(todir): + if op.isfile(todir): return self.errlog(todir + ": is file, cannot change directory", 'L', 1, logact) + if not self.make_local_directory(todir, logact): return self.FAILURE + odir = self.PGLOG['CURDIR'] + if todir == odir: + self.pglog(todir + ": in Directory", lact) + return odir + try: + os.chdir(todir) + except Exception as e: + return self.errlog(str(e), 'L', 1, logact) + else: + if not op.isabs(todir): todir = os.getcwd() + self.PGLOG['CURDIR'] = todir + self.pglog(todir + ": Change to Directory", lact) + return odir + + # record the directory for the deleted file + # pass in empty dir to turn the recording delete directory on + def record_delete_directory(self, dir, val): + if dir is None: + if isinstance(val, int): + self.DIRLVLS = val + elif re.match(r'^\d+$'): + self.DIRLVLS = int(val) + elif dir and not re.match(r'^(\.|\./|/)$', dir) and dir not in self.DELDIRS: + self.DELDIRS[dir] = val + + # remove the recorded delete directory if it is empty + def clean_delete_directory(self, logact = 0): + if not self.DIRLVLS: return + if logact: + lact = logact&~(self.EXITLG) + else: + logact = lact = self.LOGWRN + lvl = self.DIRLVLS + self.DIRLVLS = 0 # set to 0 to stop recording directory + while lvl > 0: + lvl -= 1 + dirs = {} + for dir in self.DELDIRS: + host = self.DELDIRS[dir] + dinfo = (dir if host == self.LHOST else "{}-{}".format(host, dir)) + dstat = self.gdex_empty_directory(dir, self.DELDIRS[dir]) + if dstat == 0: + if self.delete_gdex_file(dir, host, logact): + self.pglog(dinfo + ": Empty directory removed", lact) + elif dstat > 0: + if dstat == 1 and lvl > 0: self.pglog(dinfo + ": Directory not empty yet", lact) + continue + if lvl: dirs[op.dirname(dir)] = host + if not dirs: break + self.DELDIRS = dirs + self.DELDIRS = {} # empty cache afterward + + # remove the empty given directory and its all subdirectories + # return 1 if empty dirctory removed 0 otherwise + def clean_empty_directory(self, dir, host, logact = 0): + if not dir: return 0 + dirs = self.gdex_glob(dir, host) + cnt = 0 + if logact: + lact = logact&~self.EXITLG + else: + lact = logact = self.LOGWRN + if dirs: + for name in dirs: + cnt += 1 + if dirs[name]['isfile']: continue + cnt -= self.clean_empty_directory(name, host, logact) + + dinfo = (dir if self.same_hosts(host, self.LHOST) else "{}-{}".format(host, dir)) + if cnt == 0: + if self.delete_gdex_file(dir, host, logact): + self.pglog(dinfo + ": Empty directory removed", lact) + return 1 + else: + self.pglog(dinfo + ": Directory not empty yet", lact) + return 0 + + # check if given directory is empty + # Return: 0 if empty directory, 1 if not empty and -1 if invalid directory + def gdex_empty_directory(self, dir, host): + shost = self.strip_host_name(host) + if self.pgcmp(shost, self.LHOST, 1) == 0: + return self.local_empty_directory(dir) + else: + return self.remote_empty_directory(dir, host) + rda_empty_directory = gdex_empty_directory + + # return 0 if empty local directory, 1 if not; -1 if cannot remove + def local_empty_directory(self, dir): + if not op.isdir(dir): return -1 + if self.is_root_directory(dir, 'L'): return 2 + if not re.search(r'/$', dir): dir += '/' + dir += '*' + return (1 if glob.glob(dir) else 0) + + # return 0 if empty remote directory, 1 if not; -1 if cannot remove + def remote_empty_directory(self, dir, host): + if self.is_root_directory(dir, 'R', host): return 2 + if not re.search(r'/$', dir): dir += '/' + buf = self.pgsystem("{} {}".format(self.get_sync_command(host), dir), self.LOGWRN, self.CMDRET) + if not buf: return -1 + for line in re.split(r'\n', buf): + if self.remote_file_stat(line, 0): return 1 + return 0 + + # get sizes of files on a given host + # files: file names to get sizes + # host: host name the file on, default to self.LHOST + # return: array of file sizes size is -1 if file does not exist + def gdex_file_sizes(self, files, host, logact = 0): + sizes = [] + for file in files: sizes.append(self.gdex_file_size(file, host, 2, logact)) + return sizes + rda_file_sizes = gdex_file_sizes + + # get sizes of local files + # files: file names to get sizes + # return: array of file sizes size is -1 if file does not exist + def local_file_sizes(self, files, logact = 0): + sizes = [] + for file in files: sizes.append(self.local_file_size(file, 6, logact)) + return sizes + + # check if a file on a given host is empty or too small to be considered valid + # file: file name to be checked + # host: host name the file on, default to self.LHOST + # opt: 1 - to remove empty file + # 2 - show message for empty file + # 4 - show message for non-existing file + # return: file size in unit of byte + # 0 - empty file or small file, with size < self.PGLOG['MINSIZE'] + # -1 - file not exists + # -2 - error check file + def gdex_file_size(self, file, host, opt = 0, logact = 0): + info = self.check_gdex_file(file, host, 0, logact) + if info: + if info['isfile'] and info['data_size'] < self.PGLOG['MINSIZE']: + if opt: + if opt&2: self.errlog("{}-{}: {} file".format(host, file, ("Too small({}B)".format(info['data_size']) if info['data_size'] > 0 else "Empty")), + 'O', 1, logact) + if opt&1: self.delete_gdex_file(file, host, logact) + return 0 + else: + return info['data_size'] # if not regular file or not empty + elif info != None: + return -2 # error access + else: + if opt&4: self.errlog("{}-{}: {}".format(host, file, self.PGLOG['MISSFILE']), 'O', 1, logact) + return -1 # file not exist + rda_file_size = gdex_file_size + + # check if a local file is empty or too small to be considered valid + # file: file name to be checked + # opt: 1 - to remove empty file + # 2 - show message for empty file + # 4 - show message for non-existing file + # return: file size in unit of byte + # 0 - empty file or small file, with size < self.PGLOG['MINSIZE'] + # -1 - file not exists + # -2 - error check file + def local_file_size(self, file, opt = 0, logact = 0): + if not op.exists(file): + if opt&4: self.lmsg(file, self.PGLOG['MISSFILE'], logact) + return -1 # file not eixsts + info = self.check_local_file(file, 0, logact|self.PFSIZE) + if info: + if info['isfile'] and info['data_size'] < self.PGLOG['MINSIZE']: + if opt: + if opt&2: self.lmsg(file, ("Too small({}B)".format(info['data_size']) if info['data_size'] > 0 else "Empty file") , logact) + if opt&1: self.delete_local_file(file, logact) + return 0 + else: + return info['data_size'] # if not regular file or not empty + elif info != None: + return -2 # error check file + + # compress/uncompress a single local file + # ifile: file name to be compressed/uncompressed + # fmt: archive format + # act: 0 - uncompress + # 1 - compress + # 2 - get uncompress file name + # 3 - get compress file name + # return: array of new file name and archive format if changed otherwise original one + def compress_local_file(self, ifile, fmt = None, act = 0, logact = 0): + ms = re.match(r'^(.+)\.({})'.format(self.CMPSTR), ifile) + if ms: + ofile = ms.group(1) + else: + ofile = ifile + if fmt: + if act&1: + for ext in self.PGCMPS: + if re.search(r'(^|\.)({})(\.|$)'.format(ext), fmt, re.I): + ofile += '.' + ext + break + else: + ms = re.search(r'(^|\.)({})$'.format(self.CMPSTR), fmt, re.I) + if ms: fmt = re.sub(r'{}{}$'.format(ms.group(1), ms.group(2)), '', fmt, 1) + if act < 2 and ifile != ofile: self.convert_files(ofile, ifile, 0, logact) + return (ofile, fmt) + + # get file archive format from a givn file name; None if not found + def get_file_format(self, fname): + ms = re.search(r'\.({})$'.format(self.TARSTR), fname, re.I) + if ms: return self.PGTARS[ms.group(1)][2] + ms = re.search(r'\.({})$'.format(self.CMPSTR), fname, re.I) + if ms: return self.PGCMPS[ms.group(1)][2] + return None + + # tar/untar mutliple local file into/from a single tar/tar.gz/tgz/zip file + # tfile: tar file name to be tar/untarred + # files: member file names in the tar file + # fmt: archive format (defaults to tar file name extension must be defined in self.PGTARS + # act: 0 - untar + # 1 - tar + # return: self.SUCCESS upon successful self.FAILURE otherwise + def tar_local_file(self, tfile, files, fmt, act, logact = 0): + if not fmt: + ms = re.search(r'\.({})$'.format(self.TARSTR), tfile, re.I) + if ms: fmt = ms.group(1) + logact |= self.ERRLOG + if not fmt: return self.pglog(tfile + ": Miss archive format", logact) + if fmt not in self.PGTARS: return self.pglog(tfile + ": unknown format fmt provided", logact) + tarray = self.PGTARS[fmt] + if not act: #untar member files + cmd = "{} {}".format(tarray[1], tfile) + if files: cmd += ' ' + ' '.join(files) + else: + if not files: return self.pglog(tfile + ": Miss member file to archive", logact) + cmd = "{} {} {}".format(tarray[0], tfile, ' '.join(files)) + return self.pgsystem(cmd, logact, 7) + + # get local file archive format by checking extension of given local file name + # file: local file name + def local_archive_format(self,file): + ms = re.search(r'\.({})$'.format(self.CMPSTR), file) + if ms: + fmt = ms.group(1) + if re.search(r'\.tar\.{}$'.format(fmt), file): + return "TAR." + fmt.upper() + else: + return fmt.upper() + elif re.search(r'\.tar$', file): + return "TAR" + return '' + + # local function to show message with full local file path + def lmsg(self, file, msg, logact = 0): + if not op.isabs(file): file = self.join_paths(os.getcwd(), file) + return self.errlog("{}: {}".format(file, msg), 'L', 1, logact) + + # check if given path is executable locally + # return self.SUCCESS if yes self.FAILURE if not + def check_local_executable(self, path, actstr = '', logact = 0): + if os.access(path, os.W_OK): return self.SUCCESS + if self.check_local_accessible(path, actstr, logact): + if actstr: actstr += '-' + self.errlog("{}{}: Accessible, but Unexecutable on'{}'".format(actstr, path, self.PGLOG['HOSTNAME']), 'L', 1, logact) + return self.FAILURE + + # check if given path is writable locally + # return self.SUCCESS if yes self.FAILURE if not + def check_local_writable(self, path, actstr = '', logact = 0): + if os.access(path, os.W_OK): return self.SUCCESS + if self.check_local_accessible(path, actstr, logact): + if actstr: actstr += '-' + self.errlog("{}{}: Accessible, but Unwritable on'{}'".format(actstr, path, self.PGLOG['HOSTNAME']), 'L', 1, logact) + return self.FAILURE + + # check if given path is accessible locally + # return self.SUCCESS if yes, self.FAILURE if not + def check_local_accessible(self, path, actstr = '', logact = 0): + if os.access(path, os.F_OK): return self.SUCCESS + if actstr: actstr += '-' + self.errlog("{}{}: Unaccessible on '{}'".format(actstr, path, self.PGLOG['HOSTNAME']), 'L', 1, logact) + return self.FAILURE + + # check if given webfile under self.PGLOG['DSSDATA'] is writable + # return self.SUCCESS if yes self.FAILURE if not + def check_webfile_writable(self, action, wfile, logact = 0): + ms = re.match(r'^({}/\w+)'.format(self.PGLOG['DSSDATA']), wfile) + if ms: + return self.check_local_writable(ms.group(1), "{} {}".format(action, wfile), logact) + else: + return self.SUCCESS # do not need check + + # convert the one file to another via uncompress, move/copy, and/or compress + def convert_files(self, ofile, ifile, keep = 0, logact = 0): + if ofile == ifile: return self.SUCCESS + oname = ofile + iname = ifile + if keep: kfile = ifile + ".keep" + oext = iext = None + for ext in self.PGCMPS: + if oext is None: + ms = re.match(r'^(.+)\.{}$'.format(ext), ofile) + if ms: + oname = ms.group(1) + oext = ext + if iext is None: + ms = re.match(r'^(.+)\.{}$'.format(ext), ifile) + if ms: + iname = ms.group(1) + iext = ext + if iext and oext and oext == iext: + oext = iext = None + iname = ifile + oname = ofile + if iext: # uncompress + if keep: + if iext == 'zip': + kfile = ifile + else: + self.local_copy_local(kfile, ifile, logact) + if self.pgsystem("{} {}".format(self.PGCMPS[iext][1], ifile), logact, 5): + if iext == "zip": + path = op.dirname(iname) + if path and path != '.': self.move_local_file(iname, op.basename(iname), logact) + if not keep: self.delete_local_file(ifile, logact) + if oname != iname: # move/copy + path = op.dirname(oname) + if path and not op.exists(path): self.make_local_directory(path, logact) + if keep and not op.exists(kfile): + self.local_copy_local(oname, iname, logact) + kfile = iname + else: + self.move_local_file(oname, iname, logact) + if oext: # compress + if keep and not op.exists(kfile): + if oext == "zip": + kfile = oname + else: + self.local_copy_local(kfile, oname, logact) + if oext == "zip": + path = op.dirname(oname) + if path: + if path != '.': path = self.change_local_directory(path, logact) + bname = op.basename(oname) + self.pgsystem("{} {}.zip {}".format(self.PGCMPS[oext][0], bname, bname), logact, 5) + if path != '.': self.change_local_directory(path, logact) + else: + self.pgsystem("{} {} {}".format(self.PGCMPS[oext][0], ofile, oname), logact, 5) + if not keep and op.exists(ofile): self.delete_local_file(oname, logact) + else: + self.pgsystem("{} {}".format(self.PGCMPS[oext][0], oname), logact, 5) + if keep and op.exists(kfile) and kfile != ifile: + if op.exist(ifile): + self.delete_local_file(kfile, logact) + else: + self.move_local_file(ifile, kfile, logact) + if op.exists(ofile): + return self.SUCCESS + else: + return self.errlog("{}: ERROR convert from {}".format(ofile, ifile), 'L', 1, logact) + + # comapre two files from given two hash references to the file information + # return 0 if same, 1 different, -1 if can not compare + @staticmethod + def compare_file_info(ainfo, binfo): + if not (ainfo and binfo): return -1 # at least one is missing + return (0 if (ainfo['data_size'] == binfo['data_size'] and + ainfo['date_modified'] == binfo['date_modified'] and + ainfo['time_modified'] == binfo['time_modified']) else 1) + + # get local_dirname + @staticmethod + def get_local_dirname(file): + dir = op.dirname(file) + if dir == '.': dir = os.getcwd() + return dir + + # collect valid file names under a given directory, current directory if empty + def get_directory_files(self, dir = None, limit = 0, level = 0): + files = [] + if dir: + if level == 0 and op.isfile(dir): + files.append(dir) + return files + dir += "/*" + else: + dir = "*" + for file in glob.glob(dir): + if op.isdir(file): + if limit == 0 or (limit-level) > 0: + fs = self.get_directory_files(file, limit, level+1) + if fs: files.extend(fs) + else: + files.append(file) + return files if files else None + + # reads a local file into a string and returns it + def read_local_file(self, file, logact = 0): + try: + fd = open(file, 'r') + except Exception as e: + return self.errlog("{}: {}".format(file, str(e)), 'L', 1, logact) + else: + fstr = fd.read() + fd.close() + return fstr + + # open a local file and return the file handler + def open_local_file(self, file, mode = 'r', logact = None): + if logact is None: logact = self.LOGERR + try: + fd = open(file, mode) + except Exception as e: + return self.errlog("{}: {}".format(file, str(e)), 'L', 1, logact) + return fd + + # change absolute paths to relative paths + def get_relative_paths(self, files, cdir, logact = 0): + cnt = len(files) + if cnt == 0: return files + if not cdir: cdir = os.getcwd() + for i in range(cnt): + afile = files[i] + if op.isabs(afile): + files[i] = self.join_paths(afile, cdir, 1) + else: + self.pglog("{}: is not under the working directory '{}'".format(afile, cdir), logact) + return files + + # check if the action to path is blocked + def check_block_path(self, path, act = '', logact = 0): + blockpath = self.PGLOG['USRHOME'] + if not act: act = 'Copy' + if re.match(r'^{}'.format(blockpath), path): + return self.pglog("{}: {} to {} is blocked".format(path, act, blockpath), logact) + else: + return 1 + + # join two filenames by uing the common prefix/suffix and keeping the different main bodies, + # the bodies are seprated by sep replace fext with text if provided + def join_filenames(self, name1, name2, sep = '-', fext = None, text = None): + if fext: + name1 = self.remove_file_extention(name1, fext) + name2 = self.remove_file_extention(name2, fext) + if name1 == name2: + fname = name1 + else: + fname = suffix = '' + cnt1 = len(name1) + cnt2 = len(name2) + cnt = (cnt1 if cnt1 < cnt2 else cnt2) + # get common prefix + for pcnt in range(cnt): + if name1[pcnt] != name2[pcnt]: break + # get common suffix + cnt -= pcnt + for scnt in range(0, cnt): + if name1[cnt1-scnt-1] != name2[cnt2-scnt-1]: break + body1 = name1[pcnt:(cnt1-scnt)] + body2 = name2[pcnt:(cnt2-scnt)] + if scnt > 0: + suffix = name2[(cnt1-scnt):cnt1] + if name1[cnt1-scnt-1].isnumeric(): + ms = re.match(r'^([\d\.-]*\d)', suffix) + if ms: body1 += ms.group(1) # include trailing digit chrs to body1 + if pcnt > 0: + fname = name1[0:pcnt] + if name2[pcnt].isnumeric(): + ms = re.search(r'(\d[\d\.-]*)$', fname) + if ms: body2 = ms.group(1) + body2 # include leading digit chrs to body2 + fname += body1 + sep + body2 + if suffix: fname += suffix + if text: fname += "." + text + return fname + + # remove given file extention if provided + # otherwise try to remove predfined compression extention in self.PGCMPS + def remove_file_extention(self, fname, fext): + if not fname: return '' + if fext: + fname = re.sub(r'\.{}$'.format(fext), '', fname, 1, re.I) + else: + for fext in self.PGCMPS: + mp = r'\.{}$'.format(fext) + if re.search(mp, fname): + fname = re.sub(mp, '', fname, 1, re.I) + break + return fname + + # check if a previous down storage system is up now for given dflag + # return error message if failed checking, and None otherwise + def check_storage_down(self, dflag, dpath, dscheck, logact = 0): + if dflag not in self.DHOSTS: + if logact: self.pglog(dflag + ": Unknown Down Flag for Storage Systems", logact) + return None + dhost = self.DHOSTS[dflag] + if not dpath and dflag in self.DPATHS: dpath = self.DPATHS[dflag] + for loop in range(2): + (stat, msg) = self.host_down_status(dpath, dhost, 1, logact) + if stat < 0: break # stop retry for planned down + + if not dscheck and self.PGLOG['DSCHECK']: dscheck = self.PGLOG['DSCHECK'] + if dscheck: + didx = dscheck['dflags'].find(dflag) + if msg: + if didx < 0: dscheck['dflags'] += dflag + else: + if didx > -1: dscheck['dflags'].replace(dflag, '', 1) + + return msg + + # check if previous down storage systems recorded in the dflags + # return an array of strings for storage systems that are still down, + # and empty array if all up + def check_storage_dflags(self, dflags, dscheck = None, logact = 0): + if not dflags: return 0 + isdict = isinstance(dflags, dict) + msgary = [] + for dflag in dflags: + msg = self.check_storage_down(dflag, dflags[dflag] if isdict else None, dscheck, logact) + if msg: msgary.append(msg) + if not msgary: + if not dscheck and self.PGLOG['DSCHECK']: dscheck = self.PGLOG['DSCHECK'] + cidx = dscheck['cindex'] if dscheck else 0 + # clean dflags if the down storage systems are all up + if cidx: self.pgexec("UPDATE dscheck SET dflags = '' WHERE cindex = {}".format(cidx), logact) + return msgary + + # check a GDEX file is backed up or not for given file record; + # clear the cached bfile records if frec is None. + # return 0 if not yet, 1 if backed up, or -1 if backed up but modified + def file_backup_status(self, frec, chgdays = 1, logact = 0): + if frec is None: + self.BFILES.clear() + return 0 + bid = frec['bid'] + if not bid: return 0 + fields = 'bfile, dsid, date_modified' + if chgdays > 0: fields += ', note' + if bid not in self.BFILES: self.BFILES[bid] = self.pgget('bfile', fields, 'bid = {}'.format(bid), logact) + brec = self.BFILES[bid] + if not brec: return 0 + if 'sfile' in frec: + fname = frec['sfile'] + ftype = 'Saved' + else: + fname = frec['wfile'] + ftype = 'Web' + ret = 1 + fdate = frec['date_modified'] + bdate = brec['date_modified'] + if chgdays > 0 and self.diffdate(fdate, bdate) >= chgdays: + ret = -1 + if brec['note']: + mp = r'{}<:>{}<:>(\d+)<:>(\w+)<:>'.format(fname, frec['type']) + ms = re.search(mp, brec['note']) + if ms: + fsize = int(ms.group(1)) + cksum = ms.group(2) + if cksum and cksum == frec['checksum'] or not cksum and fsize == frec['data_size']: + ret = 1 + if logact: + if ret == 1: + msg = "{}-{}: {} file backed up to /{}/{} by {}".format(frec['dsid'], fname, ftype, brec['dsid'], brec['bfile'], bdate) + else: + msg = "{}-{}: {} file changed on {}".format(frec['dsid'], fname, ftype, fdate) + self.pglog(msg, logact) + return ret diff --git a/src/rda_python_common/pg_lock.py b/src/rda_python_common/pg_lock.py new file mode 100644 index 0000000..4a45ae7 --- /dev/null +++ b/src/rda_python_common/pg_lock.py @@ -0,0 +1,533 @@ +# +############################################################################### +# +# Title : pg_lock.py +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/118/2020 +# 2025-01-10 transferred to package rda_python_common from +# https://github.com/NCAR/rda-shared-libraries.git +# 2025-12-01 convert to class PgLock +# Purpose : python library module for functions to lock RDADB records +# +# Github : https://github.com/NCAR/rda-python-common.git +# +############################################################################### +# +import re +import time +from .pg_file import PgFile + +class PgLock(PgFile): + + def __init__(self): + + super().__init__() # initialize parent class + self.DOLOCKS = {-2 : 'Force Unlock', -1 : 'Unlock', 0 : 'Unlock', 1 : 'Relock', 2 : 'Force Relock'} + + def end_db_transaction(self, idx): + if idx > 0: + self.endtran() + else: + self.aborttran() + return idx + + # check and return running process status: 1-running/uncheckable,0-stopped + def check_process_running_status(self, host, pid, dolock, lmsg, logact): + if not self.local_host_action(host, self.DOLOCKS[dolock], lmsg, logact): return 1 + stat = self.check_host_pid(host, pid) + if stat > 0: + if logact: self.pglog("{}: Cannot {}".format(lmsg, self.DOLOCKS[dolock]), logact) + return 1 + if stat < 0 and dolock > -2 and dolock < 2: + if logact: self.pglog("{}: Fail checking lock info to {}".format(lmsg, self.DOLOCKS[dolock]), logact) + return 1 + return 0 + + # lock/unlock dscheck record + # lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1 + # force unlock if < -1 or force lock if 2 + def lock_dscheck(self, cidx, dolock, logact = 0): + if not cidx: return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = self.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dscheck" + cnd = "cindex = {}".format(cidx) + fields = "command, pid, lockhost, lockcmd" + pgrec = self.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + pid = pgrec['pid'] + host = pgrec['lockhost'] + lockcmd = pgrec['lockcmd'] + (chost, cpid) = self.current_process_info() + clockcmd = self.get_command() + if pid == 0 and dolock <= 0: return cidx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not self.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return cidx # no need lock again + cinfo = "{}-{}-Chk{}({})".format(self.PGLOG['HOSTNAME'], self.current_datetime(), cidx, pgrec['command']) + if lckpid > 0 and (clockcmd == "dscheck" or lockcmd != "dscheck"): + lmsg = "{} Locked by {}/{}/{}".format(cinfo, pid, host, lockcmd) + if self.check_process_running_status(host, pid, dolock, lmsg, logout): return -cidx + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + if lockcmd != clockcmd: record['lockcmd'] = clockcmd + else: + if pid: record['pid'] = 0 + if not record: return cidx + lkrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not lkrec: return self.end_db_transaction(0) # dscheck is gone or db error + if (not lkrec['pid'] or + lkrec['pid'] == pid and self.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and self.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(cinfo + ": Error update lock", logout) + cidx = -cidx + else: + if logout: self.pglog("{}: Relocked {}/{}".format(cinfo, lkrec['pid'], lkrec['lockhost']), logout) + cidx = -cidx + return self.end_db_transaction(cidx) + + # lock dscheck record for given cidx, pid and host + def lock_host_dscheck(self, cidx, pid, host, logact = 0): + if not (cidx and pid): return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = 0 + table = "dscheck" + cnd = "cindex = {}".format(cidx) + fields = "command, pid, lockhost, lockcmd" + pgrec = self.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + (chost, cpid) = self.current_process_info() + cinfo = "{}-{}-Chk{}({})".format(self.PGLOG['HOSTNAME'], self.current_datetime(), cidx, pgrec['command']) + if pgrec['pid']: + if pid == pgrec['pid'] and self.pgcmp(pgrec['lockhost'], host, 1) == 0: + return -cidx # locked by the real process already + elif cpid != pgrec['pid'] or self.pgcmp(pgrec['lockhost'], chost, 1): + if logout: + lmsg = "{} Locked by {}/{}/{}".format(cinfo, pid, host, pgrec['lockcmd']) + self.pglog(lmsg +": Cannot Lock", logout) + return -cidx # locked by other process + record = {} + record['pid'] = pid + record['lockhost'] = host + record['lockcmd'] = self.get_command(pgrec['command']) + lkrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not lkrec: return self.end_db_transaction(0) + if (not lkrec['pid'] or + lkrec['pid'] == pid and self.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and self.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(cinfo + ": Error update lock", logout) + cidx = -cidx + else: + if logout: self.pglog("{}: Relocked {}/{}".format(cinfo, lkrec['pid'], lkrec['lockhost']), logout) + cidx = -cidx + return self.end_db_transaction(cidx) + + # lock/unlock data request record + # lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1 + # force unlock if < -1 or 2 + def lock_request(self, ridx, dolock, logact = 0): + if not ridx: return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = self.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dsrqst" + cnd = "rindex = {}".format(ridx) + fields = "pid, lockhost" + pgrec = self.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # request is gone or db error + pid = pgrec['pid'] + host = pgrec['lockhost'] + (chost, cpid) = self.current_process_info() + if pid == 0 and dolock <= 0: return ridx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not self.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return ridx # no need lock again + rinfo = "{}-{}-Rqst{}".format(self.PGLOG['HOSTNAME'], self.current_datetime(), ridx) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(rinfo, pid, host) + if self.check_process_running_status(host, pid, dolock, lmsg, logout): return -ridx + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + if record: record['locktime'] = int(time.time()) + else: + if pid: record['pid'] = 0 + if host: record['lockhost'] = "" + if not record: return ridx + lkrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not lkrec: return self.end_db_transaction(0) # request is gone or db error + if (not lkrec['pid'] or + lkrec['pid'] == pid and self.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and self.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(rinfo + ": Error update lock", logout) + ridx = -ridx + else: + if logout: self.pglog("{}: Relocked {}/{}".format(rinfo, lkrec['pid'], lkrec['lockhost']), logout) + ridx = -ridx + return self.end_db_transaction(ridx) + + # lock dsrqst record for given cidx, pid and host + def lock_host_request(self, ridx, pid, host, logact = 0): + if not (ridx and pid): return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = 0 + table = "dsrqst" + cnd = "rindex = {}".format(ridx) + fields = "pid, lockhost" + pgrec = self.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + rinfo = "{}-{}-Rqst{}".format(self.PGLOG['HOSTNAME'], self.current_datetime(), ridx) + if pgrec['pid']: + if pid == pgrec['pid'] and self.pgcmp(pgrec['lockhost'], host, 1) == 0: return ridx + if logout: + lmsg = "{} Locked by {}/{}".format(rinfo, pid, host) + self.pglog(lmsg +": Cannot Lock", logout) + return -ridx + record = {} + record['pid'] = pid + record['lockhost'] = host + record['locktime'] = int(time.time()) + pgrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not pgrec: return self.end_db_transaction(0) + if not pgrec['pid'] or pid == pgrec['pid'] and self.pgcmp(pgrec['lockhost'], host, 1) == 0: + if not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(rinfo + ": Error update lock", logout) + ridx = -ridx + else: + if logout: self.pglog("{}: Relocked {}/{}".format(rinfo, pgrec['pid'], pgrec['lockhost']), logout) + ridx = -ridx + return self.end_db_transaction(ridx) + + # lock/unlock dataset update record + # lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1 + # force unlock if < -1 or 2 + def lock_update(self, lidx, linfo, dolock, logact = 0): + if not lidx: return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = self.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dlupdt" + cnd = "lindex = {}".format(lidx) + fields = "pid, hostname" + pgrec = self.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # update record is deleted + pid = pgrec['pid'] + host = pgrec['hostname'] + (chost, cpid) = self.current_process_info() + if pid == 0 and dolock <= 0: return lidx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not self.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return lidx # no need lock again + if not linfo: linfo = "{}-{}-Updt{}".format(self.PGLOG['HOSTNAME'], self.current_datetime(), lidx) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(linfo, pid, host) + if self.check_process_running_status(host, pid, dolock, lmsg, logout): return -lidx + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['hostname'] = chost + if record: record['locktime'] = int(time.time()) + else: + if pid: record['pid'] = 0 + if host: record['hostname'] = '' + if not record: return lidx + lkrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not lkrec: return self.end_db_transaction(0) # update record is deleted + if not lkrec['pid'] or lkrec['pid'] == pid and self.pgcmp(lkrec['hostname'], host, 1) == 0: + if not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(linfo + ": Error update lock", logout) + lidx = -lidx + else: + if logout: self.pglog("{}: Relocked {}/{}".format(linfo, lkrec['pid'], lkrec['hostname']), logout) + lidx = -lidx + return self.end_db_transaction(lidx) + + # lock/unlock dataset update control record + # lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1, + # unlock dead process if < -1 or 2, force unlock if -2 + def lock_update_control(self, cidx, dolock, logact = 0): + if not cidx: return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = self.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dcupdt" + cnd = "cindex = {}".format(cidx) + fields = "pid, lockhost" + pgrec = self.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # update control record is deleted + pid = pgrec['pid'] + host = pgrec['lockhost'] + (chost, cpid) = self.current_process_info() + if pid == 0 and dolock <= 0: return cidx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not self.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return cidx # no need lock again + cinfo = "{}-{}-UC{}".format(self.PGLOG['HOSTNAME'], self.current_datetime(), cidx) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(cinfo, pid, host) + if self.check_process_running_status(host, pid, dolock, lmsg, logout): return -cidx + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + if record: record['chktime'] = int(time.time()) + else: + if pid: record['pid'] = 0 + if host: record['lockhost'] = '' + if not record: return cidx + lkrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not lkrec: return self.end_db_transaction(0) # update control record is deleted + if (not lkrec['pid'] or + lkrec['pid'] == pid and self.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and self.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(cinfo + ": Error update lock", logout) + cidx = -cidx + else: + if logout: self.pglog("{}: Relocked {}/{}".format(cinfo, lkrec['pid'], lkrec['lockhost']), logout) + cidx = -cidx + return self.end_db_transaction(cidx) + + # lock dscheck record for given cidx, pid and host + def lock_host_update_control(self, cidx, pid, host, logact = 0): + if not (cidx and pid): return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = 0 + table = "dcupdt" + cnd = "cindex = {}".format(cidx) + fields = "pid, lockhost" + pgrec = self.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + cinfo = "{}-{}-UC{}".format(self.PGLOG['HOSTNAME'], self.current_datetime(), cidx) + if pgrec['pid']: + if pid == pgrec['pid'] and self.pgcmp(pgrec['lockhost'], host, 1) == 0: return cidx + if logout: + lmsg = "{} Locked by {}/{}".format(cinfo, pid, host) + self.pglog(lmsg +": Cannot Lock", logout) + return -cidx + record = {} + record['pid'] = pid + record['lockhost'] = host + record['chktime'] = int(time.time()) + pgrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not pgrec: return self.end_db_transaction(0) + if not pgrec['pid'] or pid == pgrec['pid'] and self.pgcmp(pgrec['lockhost'], host, 1) == 0: + if not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(cinfo + ": Error update lock", logout) + cidx = -cidx + else: + if logout: self.pglog("{}: Relocked {}/{}".format(cinfo, pgrec['pid'], pgrec['lockhost']), logout) + cidx = -cidx + return self.end_db_transaction(cidx) + + # return lock information of a locked process + @staticmethod + def lock_process_info(pid, lockhost, runhost = None, pcnt = 0): + retstr = " {}<{}".format(lockhost, pid) + if pcnt: retstr += "/{}".format(pcnt) + retstr += ">" + if runhost and runhost != lockhost: retstr += '/' + runhost + return retstr + + # lock/unlock data request partition record + # lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1 + # force unlock if < -1 or 2 + def lock_partition(self, pidx, dolock, logact = 0): + if not pidx: return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = self.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "ptrqst" + cnd = "pindex = {}".format(pidx) + fields = "pid, lockhost" + pgrec = self.pgget(table, "rindex, ptorder, " + fields, cnd, logerr) + if not pgrec: return 0 # request is gone or db error + ridx = pgrec['rindex'] + pid = pgrec['pid'] + host = pgrec['lockhost'] + (chost, cpid) = self.current_process_info() + if pid == 0 and dolock <= 0: return pidx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not self.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return pidx # no need lock again + pinfo = "{}-{}-RPT{}(Rqst{}/PTO{})".format(self.PGLOG['HOSTNAME'], self.current_datetime(), pidx, ridx, pgrec['ptorder']) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(pinfo, pid, host) + if self.check_process_running_status(host, pid, dolock, lmsg, logout): return -pidx + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + if record: record['locktime'] = int(time.time()) + else: + if pid: record['pid'] = 0 + if host: record['lockhost'] = "" + if not record: return pidx + lkrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not lkrec: return self.end_db_transaction(0) # request partition is gone or db error + if (not lkrec['pid'] or + lkrec['pid'] == pid and self.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and self.pgcmp(lkrec['lockhost'], chost, 1) == 0): + lmsg = self.update_partition_lock(ridx, record, logout) + if lmsg: + if logout: self.pglog("{}: {}".format(pinfo, lmsg), logout) + pidx = -pidx + elif not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(pinfo + ": error update lock", logout) + pidx = -pidx + else: + self.pglog("{}: Relocked {}/{}".format(pinfo, lkrec['pid'], lkrec['lockhost']), logout) + pidx = -pidx + return self.end_db_transaction(pidx) + + # lock dsrqst partition record for given cidx, pid and host + def lock_host_partition(self, pidx, pid, host, logact = 0): + if not (pidx and pid): return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = 0 + table = "ptrqst" + cnd = "pindex = {}".format(pidx) + fields = "pid, lockhost" + pgrec = self.pgget(table, "rindex, ptorder, " + fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + ridx = pgrec['rindex'] + pinfo = "{}-{}-RPT{}(Rqst{}/PTO{})".format(self.PGLOG['HOSTNAME'], self.current_datetime(), pidx, ridx, pgrec['ptorder']) + if pgrec['pid']: + if pid == pgrec['pid'] and self.pgcmp(pgrec['lockhost'], host, 1) == 0: return pidx + if logout: + lmsg = "{} Locked by {}/{}".format(pinfo, pid, host) + self.pglog(lmsg +": Cannot Lock", logout) + return -pidx + record = {} + record['pid'] = pid + record['lockhost'] = host + record['locktime'] = int(time.time()) + pgrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not pgrec: return self.end_db_transaction(0) + if not pgrec['pid'] or pid == pgrec['pid'] and self.pgcmp(pgrec['lockhost'], host, 1) == 0: + lmsg = self.update_partition_lock(ridx, record, logout) + if lmsg: + if logout: self.pglog("{}: {}".format(pinfo, lmsg), logout) + pidx = -pidx + elif not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(pinfo + ": error update lock", logout) + pidx = -pidx + else: + if logout: self.pglog("{}: Relocked {}/{}".format(pinfo, pgrec['pid'], pgrec['lockhost']), logout) + pidx = -pidx + return self.end_db_transaction(pidx) + + # update dsrqst lock info for given partition lock status + # Return None if all is fine; error message otherwise + def update_partition_lock(self, ridx, ptrec, logact = 0): + if not ridx: return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = self.LOGWRN + table = "dsrqst" + lockhost = "partition" + cnd = "rindex = {}".format(ridx) + pgrec = self.pgget(table, "pid, lockhost", cnd, logact|self.DOLOCK) + if not pgrec: return "Error get Rqst{} record".format(ridx) # should not happen + if pgrec['pid'] > 0 and pgrec['lockhost'] != lockhost: + return "Rqst{} locked by non-lockhost process ({}/{})".format(ridx, pgrec['pid'], pgrec['lockhost']) + record = {} + if ptrec['pid'] > 0: + record['pid'] = pgrec['pid'] + 1 + record['lockhost'] = lockhost + record['locktime'] = ptrec['locktime'] + else: + if pgrec['pid'] > 1: + pcnt = self.pgget('ptrqst', '', cnd + " AND pid > 0") + if pgrec['pid'] > pcnt: pgrec['pid'] = pcnt + record['pid'] = pgrec['pid'] - 1 + record['lockhost'] = lockhost + else: + record['pid'] = 0 + record['lockhost'] = '' + if not self.pgupdt(table, record, cnd, logact): + return "Error update Rqst{} lock".format(ridx) + return None + + # lock/unlock dataset record for Quasar Backup + # lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1, + # unlock dead process if < -1 or 2, force unlock if -2 + def lock_dataset(self, dsid, dolock, logact = 0): + if not dsid: return 0 + if logact: + logerr = logact|self.ERRLOG + logout = logact&(~self.EXITLG) + else: + logerr = self.LOGERR + logout = self.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dataset" + cnd = "dsid = '{}'".format(dsid) + fields = "pid, lockhost" + pgrec = self.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dataset not exists + pid = pgrec['pid'] + host = pgrec['lockhost'] + (chost, cpid) = self.current_process_info() + if pid == 0 and dolock <= 0: return 1 # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not self.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return 1 # no need lock again + dinfo = "{}-{}-{}".format(self.PGLOG['HOSTNAME'], self.current_datetime(), dsid) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(dinfo, pid, host) + if self.check_process_running_status(host, pid, dolock, lmsg, logout): return -1 + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + else: + if pid: record['pid'] = 0 + if not record: return 1 + lkrec = self.pgget(table, fields, cnd, logerr|self.DOLOCK) + if not lkrec: return self.end_db_transaction(0) # dscheck is gone or db error + lstat = 1 + if (not lkrec['pid'] or + lkrec['pid'] == pid and self.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and self.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not self.pgupdt(table, record, cnd, logerr): + if logout: self.pglog(dinfo + ": Error update lock", logout) + lstat = -1 + else: + if logout: self.pglog("{}: Relocked {}/{}".format(dinfo, lkrec['pid'], lkrec['lockhost']), logout) + lstat = -1 + return self.end_db_transaction(lstat) diff --git a/src/rda_python_common/pg_log.py b/src/rda_python_common/pg_log.py new file mode 100644 index 0000000..dac2ec8 --- /dev/null +++ b/src/rda_python_common/pg_log.py @@ -0,0 +1,1352 @@ +# +############################################################################### +# +# Title : pg_log.py -- Module for logging messages. +# Author : Zaihua Ji, zji@ucar.edu +# Date : 03/02/2016 +# 2025-01-10 transferred to package rda_python_common from +# https://github.com/NCAR/rda-shared-libraries.git +# 2025-11-20 convert to class PgLOG +# Purpose : Python library module to log message and also do other things +# according to the value of logact, like display the error +# message on screen and exit script +# +# Github : https://github.com/NCAR/rda-python-common.git +# +############################################################################### + +import sys +import os +import re +import pwd +import grp +import shlex +import smtplib +from email.message import EmailMessage +from subprocess import Popen, PIPE +from os import path as op +import time +import socket +import shutil +import traceback +from unidecode import unidecode + +class PgLOG: + + # define some constants for logging actions + MSGLOG = (0x00001) # logging message + WARNLG = (0x00002) # show logging message as warning + EXITLG = (0x00004) # exit after logging + LOGWRN = (0x00003) # MSGLOG|WARNLG + LOGEXT = (0x00005) # MSGLOG|EXITLG + WRNEXT = (0x00006) # WARNLG|EXITLG + LGWNEX = (0x00007) # MSGLOG|WARNLG|EXITLG + EMLLOG = (0x00008) # append message to email buffer + LGWNEM = (0x0000B) # MSGLOG|WARNLG|EMLLOG + LWEMEX = (0x0000F) # MSGLOG|WARNLG|EMLLOG|EXITLG + ERRLOG = (0x00010) # error log only, output to STDERR + LOGERR = (0x00011) # MSGLOG|ERRLOG + LGEREX = (0x00015) # MSGLOG|ERRLOG|EXITLG + LGEREM = (0x00019) # MSGLOG|ERRLOG|EMLLOG + DOLOCK = (0x00020) # action to lock table record(s) + ENDLCK = (0x00040) # action to end locking table record(s) + AUTOID = (0x00080) # action to retrieve the last auto added id + DODFLT = (0x00100) # action to set empty values to default ones + SNDEML = (0x00200) # action to send email now + RETMSG = (0x00400) # action to return the message back + FRCLOG = (0x00800) # force logging message + SEPLIN = (0x01000) # add a separating line for email/STDOUT/STDERR + BRKLIN = (0x02000) # add a line break for email/STDOUT/STDERR + EMLTOP = (0x04000) # prepend message to email buffer + RCDMSG = (0x00814) # make sure to record logging message + MISLOG = (0x00811) # cannot access logfile + EMLSUM = (0x08000) # record as email summary + EMEROL = (0x10000) # record error as email only + EMLALL = (0x1D208) # all email acts + DOSUDO = (0x20000) # add 'sudo -u self.PGLOG['GDEXUSER']' + NOTLOG = (0x40000) # do not log any thing + OVRIDE = (0x80000) # do override existing file or record + NOWAIT = (0x100000) # do not wait on globus task to finish + ADDTBL = (0x200000) # action to add a new table if it does not exist + SKPTRC = (0x400000) # action to skip tracing when log errors + UCNAME = (0x800000) # action to change query field names to upper case + UCLWEX = (0x800015) # UCNAME|MSGLOG|WARNLG|EXITLG + PFSIZE = (0x1000000) # total file size under a path + SUCCESS = 1 # Successful function call + FINISH = 2 # go through a function, including time out + FAILURE = 0 # Unsuccessful function call + + def __init__(self): + self.PGLOG = { + # more defined in untaint_suid() with environment variables + 'EMLADDR' : '', + 'CCDADDR' : '', + 'SEPLINE' : "===========================================================\n", + 'TWOGBS' : 2147483648, + 'ONEGBS' : 1073741824, + 'MINSIZE' : 100, # minimal file size in bytes to be valid + 'LOGMASK' : (0xFFFFFF), # log mask to turn off certain log action bits + 'BCKGRND' : 0, # background process flag -b + 'ERRCNT' : 0, # record number of errors for email + 'ERRMSG' : '', # record error message for email + 'SUMMSG' : '', # record summary message for email + 'EMLMSG' : '', # record detail message for email + 'PRGMSG' : '', # record progressing message for email, replaced each time + 'GMTZ' : 0, # 0 - use local time, 1 - use greenwich mean time + 'NOLEAP' : 0, # 1 - skip 29 of Feburary while add days to date + 'GMTDIFF' : 6, # gmt is 6 hours ahead of us + 'CURUID' : None, # the login name who executes the program + 'SETUID' : '', # the login name for suid if it is different to the CURUID + 'FILEMODE': 0o664, # default 8-base file mode + 'EXECMODE': 0o775, # default 8-base executable file mode or directory mode + 'GDEXUSER' : "gdexdata", # common gdex user name + 'GDEXEMAIL' : "zji", # specialist to receipt email intead of common gdex user name + 'SUDOGDEX' : 0, # 1 to allow sudo to self.PGLOG['GDEXUSER'] + 'HOSTNAME' : '', # current host name the process in running on + 'OBJCTSTR' : "object", + 'BACKUPNM' : "quasar", + 'DRDATANM' : "drdata", + 'GPFSNAME' : "glade", + 'PBSNAME' : "PBS", + 'DSIDCHRS' : "d", + 'DOSHELL' : False, + 'NEWDSID' : True, + 'PUSGDIR' : None, + 'BCHHOSTS' : "PBS", + 'HOSTTYPE' : 'dav', # default HOSTTYPE + 'EMLMAX' : 256, # up limit of email line count + 'PGBATCH' : '', # current batch service name, SLURM or PBS + 'PGBINDIR' : '', + 'SLMTIME' : 604800, # max runtime for SLURM bath job, (7x24x60x60 seconds) + 'PBSTIME' : 86400, # max runtime for PBS bath job, (24x60x60 seconds) + 'MSSGRP' : None, # set if set to different HPSS group + 'GDEXGRP' : "decs", + 'EMLSEND' : None, # path to sendmail, None if not exists + 'DSCHECK' : None, # carry some cached dscheck information + 'PGDBBUF' : None, # reference to a connected database object + 'HPSSLMT' : 10, # up limit of HPSS streams + 'NOQUIT' : 0, # do not quit if this flag is set for daemons + 'DBRETRY' : 2, # db retry count after error + 'TIMEOUT' : 15, # default timeout (in seconds) for tosystem() + 'CMDTIME' : 120, # default command time (in seconds) for pgsystem() to record end time + 'SYSERR' : None, # cache the error message generated inside pgsystem() + 'ERR2STD' : [], # if non-empty reference to array of strings, change stderr to stdout if match + 'STD2ERR' : [], # if non-empty reference to array of strings, change stdout to stderr if match + 'MISSFILE': "No such file or directory", + 'GITHUB' : "https://github.com" , # github server + 'EMLSRVR' : "ndir.ucar.edu", # UCAR email server and port + 'EMLPORT' : 25 + } + self.PGLOG['RDAUSER'] = self.PGLOG['GDEXUSER'] + self.PGLOG['RDAGRP'] = self.PGLOG['GDEXGRP'] + self.PGLOG['RDAEMAIL'] = self.PGLOG['GDEXEMAIL'] + self.PGLOG['SUDORDA'] = self.PGLOG['SUDOGDEX'] + self.HOSTTYPES = { + 'rda' : 'dsg_mach', + 'casper' : 'dav', + 'crhtc' : 'dav', + 'cron' : 'dav', + } + self.CPID = { + 'PID' : "", + 'CTM' : int(time.time()), + 'CMD' : "", + 'CPID' : "", + } + self.BCHCMDS = {'PBS' : 'qsub'} + # global dists to cashe information + self.COMMANDS = {} + self.SLMHOSTS = [] + self.SLMSTATS = {} + self.PBSHOSTS = [] + self.PBSSTATS = {} + # set additional common PGLOG values + self.set_common_pglog() + + # get time string in format YYMMDDHHNNSS for given ctime; or current time if ctime is 0 + def current_datetime(self, ctime = 0): + if self.PGLOG['GMTZ']: + dt = time.gmtime(ctime) if ctime else time.gmtime() + else: + dt = time.localtime(ctime) if ctime else time.localtime() + return "{:02}{:02}{:02}{:02}{:02}{:02}".format(dt[0], dt[1], dt[2], dt[3], dt[4], dt[5]) + + # get an environment variable and untaint it + def get_environment(self, name, default = None, logact = 0): + env = os.getenv(name, default) + if env is None and logact: + self.pglog(name + ": Environment variable is not defined", logact) + return env + + # cache the msg string to global email entries for later call of send_email() + def set_email(self, msg, logact = 0): + if logact and msg: + if logact&self.EMLTOP: + if self.PGLOG['PRGMSG']: + msg = self.PGLOG['PRGMSG'] + "\n" + msg + self.PGLOG['PRGMSG'] = "" + if self.PGLOG['ERRCNT'] == 0: + if not re.search(r'\n$', msg): msg += "!\n" + else: + if self.PGLOG['ERRCNT'] == 1: + msg += " with 1 Error:\n" + else: + msg += " with {} Errors:\n".format(self.PGLOG['ERRCNT']) + msg += self.break_long_string(self.PGLOG['ERRMSG'], 512, None, self.PGLOG['EMLMAX']/2, None, 50, 25) + self.PGLOG['ERRCNT'] = 0 + self.PGLOG['ERRMSG'] = '' + if self.PGLOG['SUMMSG']: + msg += self.PGLOG['SEPLINE'] + if self.PGLOG['SUMMSG']: msg += "Summary:\n" + msg += self.break_long_string(self.PGLOG['SUMMSG'], 512, None, self.PGLOG['EMLMAX']/2, None, 50, 25) + if self.PGLOG['EMLMSG']: + msg += self.PGLOG['SEPLINE'] + if self.PGLOG['SUMMSG']: msg += "Detail Information:\n" + self.PGLOG['EMLMSG'] = msg + self.break_long_string(self.PGLOG['EMLMSG'], 512, None, self.PGLOG['EMLMAX'], None, 50, 40) + self.PGLOG['SUMMSG'] = "" # in case not + else: + if logact&self.ERRLOG: # record error for email summary + self.PGLOG['ERRCNT'] += 1 + if logact&self.BRKLIN: self.PGLOG['ERRMSG'] += "\n" + self.PGLOG['ERRMSG'] += "{}. {}".format(self.PGLOG['ERRCNT'], msg) + elif logact&self.EMLSUM: + if self.PGLOG['SUMMSG']: + if logact&self.BRKLIN: self.PGLOG['SUMMSG'] += "\n" + if logact&self.SEPLIN: self.PGLOG['SUMMSG'] += self.PGLOG['SEPLINE'] + self.PGLOG['SUMMSG'] += msg # append + if logact&self.EMLLOG: + if self.PGLOG['EMLMSG']: + if logact&self.BRKLIN: self.PGLOG['EMLMSG'] += "\n" + if logact&self.SEPLIN: self.PGLOG['EMLMSG'] += self.PGLOG['SEPLINE'] + self.PGLOG['EMLMSG'] += msg # append + elif msg is None: + self.PGLOG['EMLMSG'] = "" + + # retrieve the cached email message + def get_email(self): + return self.PGLOG['EMLMSG'] + + # send a customized email with all entries included + def send_customized_email(self, logmsg, emlmsg, logact = None): + if logact is None: logact = self.LOGWRN + entries = { + 'fr' : ["From", 1, None], + 'to' : ["To", 1, None], + 'cc' : ["Cc", 0, ''], + 'sb' : ["Subject", 1, None] + } + if logmsg: + logmsg += ': ' + else: + logmsg = '' + msg = emlmsg + for ekey in entries: + entry = entries[ekey][0] + ms = re.search(r'(^|\n)({}: *(.*)\n)'.format(entry), emlmsg, re.I) + if ms: + vals = ms.groups() + msg = msg.replace(vals[1], '') + if vals[2]: entries[ekey][2] = vals[2] + elif entries[ekey][1]: + return self.pglog("{}Missing Entry '{}' for sending email".format(logmsg, entry), logact|self.ERRLOG) + ret = self.send_python_email(entries['sb'][2], entries['to'][2], msg, entries['fr'][2], entries['cc'][2], logact) + if ret == self.SUCCESS or not self.PGLOG['EMLSEND']: return ret + # try commandline sendmail + ret = self.pgsystem(self.PGLOG['EMLSEND'], logact, 4, emlmsg) + logmsg += "Email " + entries['to'][2] + if entries['cc'][2]: logmsg += " Cc'd " + entries['cc'][2] + logmsg += " Subject: " + entries['sb'][2] + if ret: + self.log_email(emlmsg) + self.pglog(logmsg, logact&(~self.EXITLG)) + else: + errmsg = "Error sending email: " + logmsg + self.pglog(errmsg, (logact|self.ERRLOG)&~self.EXITLG) + return ret + + # send an email; if empty msg send email message saved in self.PGLOG['EMLMSG'] instead + def send_email(self, subject = None, receiver = None, msg = None, sender = None, logact = None): + if logact is None: logact = self.LOGWRN + return self.send_python_email(subject, receiver, msg, sender, None, logact) + + # send an email via python module smtplib; if empty msg send email message saved + # in self.PGLOG['EMLMSG'] instead. pass cc = '' for skipping 'Cc: ' + def send_python_email(self, subject = None, receiver = None, msg = None, sender = None, cc = None, logact = None): + if logact is None: logact = self.LOGWRN + if not msg: + if self.PGLOG['EMLMSG']: + msg = self.PGLOG['EMLMSG'] + self.PGLOG['EMLMSG'] = '' + else: + return '' + docc = False if cc else True + if not sender: + sender = self.PGLOG['CURUID'] + if sender != self.PGLOG['GDEXUSER']: docc = False + if sender == self.PGLOG['GDEXUSER']: sender = self.PGLOG['GDEXEMAIL'] + if sender.find('@') == -1: sender += "@ucar.edu" + if not receiver: + receiver = self.PGLOG['EMLADDR'] if self.PGLOG['EMLADDR'] else self.PGLOG['CURUID'] + if receiver == self.PGLOG['GDEXUSER']: receiver = self.PGLOG['GDEXEMAIL'] + if receiver.find('@') == -1: receiver += "@ucar.edu" + if docc and not re.match(self.PGLOG['GDEXUSER'], sender): self.add_carbon_copy(sender, 1) + emlmsg = EmailMessage() + emlmsg.set_content(msg) + emlmsg['From'] = sender + emlmsg['To'] = receiver + logmsg = "Email " + receiver + if cc == None: cc = self.PGLOG['CCDADDR'] + if cc: + emlmsg['Cc'] = cc + logmsg += " Cc'd " + cc + if not subject: subject = "Message from {}-{}".format(self.PGLOG['HOSTNAME'], self.self.get_command()) + # if not re.search(r'!$', subject): subject += '!' + emlmsg['Subject'] = subject + if self.CPID['CPID']: logmsg += " in " + self.CPID['CPID'] + logmsg += ", Subject: {}\n".format(subject) + try: + eml = smtplib.SMTP(self.PGLOG['EMLSRVR'], self.PGLOG['EMLPORT']) + eml.send_message(emlmsg) + except smtplib.SMTPException as err: + errmsg = f"Error sending email:\n{err}\n{logmsg}" + return self.pglog(errmsg, (logact|self.ERRLOG)&~self.EXITLG) + finally: + eml.quit() + self.log_email(str(emlmsg)) + self.pglog(logmsg, logact&~self.EXITLG) + return self.SUCCESS + + # log email sent + def log_email(self, emlmsg): + if not self.CPID['PID']: self.CPID['PID'] = "{}-{}-{}".format(self.PGLOG['HOSTNAME'], self.get_command(), self.PGLOG['CURUID']) + cmdstr = "{} {} at {}\n".format(self.CPID['PID'], self.break_long_string(self.CPID['CMD'], 40, "...", 1), self.current_datetime()) + fn = "{}/{}".format(self.PGLOG['LOGPATH'], self.PGLOG['EMLFILE']) + try: + f = open(fn, 'a') + f.write(cmdstr + emlmsg) + f.close() + except FileNotFoundError as e: + print(e) + + # Function: cmdlog(cmdline) + # cmdline - program name and all arguments + # ctime - time (in seconds) when the command starts + def cmdlog(self, cmdline = None, ctime = 0, logact = None): + if logact is None: logact = self.MSGLOG|self.FRCLOG + if not ctime: ctime = int(time.time()) + if not cmdline or re.match('(end|quit|exit|abort)', cmdline, re.I): + cmdline = cmdline.capitalize() if cmdline else "Ends" + cinfo = self.cmd_execute_time("{} {}".format(self.CPID['PID'], cmdline), (ctime - self.CPID['CTM'])) + ": " + if self.CPID['CPID']: cinfo += self.CPID['CPID'] + " <= " + cinfo += self.break_long_string(self.CPID['CMD'], 40, "...", 1) + if logact: self.pglog(cinfo, logact) + else: + cinfo = self.current_datetime(ctime) + if re.match(r'CPID \d+', cmdline): + self.CPID['PID'] = "{}({})-{}{}".format(self.PGLOG['HOSTNAME'], os.getpid(), self.PGLOG['CURUID'], cinfo) + if logact: self.pglog("{}: {}".format(self.CPID['PID'], cmdline), logact) + self.CPID['CPID'] = cmdline + elif self.CPID['PID'] and re.match(r'(starts|catches) ', cmdline): + if logact: self.pglog("{}: {} at {}".format(self.CPID['PID'], cmdline, cinfo), logact) + else: + self.CPID['PID'] = "{}({})-{}{}".format(self.PGLOG['HOSTNAME'], os.getpid(), self.PGLOG['CURUID'], cinfo) + if logact: self.pglog("{}: {}".format(self.CPID['PID'], cmdline), logact) + self.CPID['CMD'] = cmdline + self.CPID['CTM'] = ctime + + # Function: self.pglog(msg, logact) return self.FAILURE or log message if not exit + # msg -- message to log + # locact -- logging actions: MSGLOG, WARNLG, ERRLOG, EXITLG, EMLLOG, & SNDEML + # log and display message/error and exit program according logact value + def pglog(self, msg, logact = None): + if logact is None: logact = self.MSGLOG + retmsg = None + logact &= self.PGLOG['LOGMASK'] # filtering the log actions + if logact&self.RCDMSG: logact |= self.MSGLOG + if self.PGLOG['NOQUIT']: logact &= ~self.EXITLG + if logact&self.EMEROL: + if logact&self.EMLLOG: logact &= ~self.EMLLOG + if not logact&self.ERRLOG: logact &= ~self.EMEROL + msg = msg.lstrip() if msg else '' # remove leading whitespaces for logging message + if logact&self.EXITLG: + ext = "Exit 1 in {}\n".format(os.getcwd()) + if msg: msg = msg.rstrip() + "; " + msg += ext + else: + if msg and not re.search(r'(\n|\r)$', msg): msg += "\n" + if logact&self.RETMSG: retmsg = msg + if logact&self.EMLALL: + if logact&self.SNDEML or not msg: + title = (msg if msg else "Message from {}-{}".format(self.PGLOG['HOSTNAME'], self.get_command())) + msg = title + self.send_email(title.rstrip()) + elif msg: + self.set_email(msg, logact) + if not msg: return (retmsg if retmsg else self.FAILURE) + if logact&self.EXITLG and (self.PGLOG['EMLMSG'] or self.PGLOG['SUMMSG'] or self.PGLOG['ERRMSG'] or self.PGLOG['PRGMSG']): + if not logact&self.EMLALL: self.set_email(msg, logact) + title = "ABORTS {}-{}".format(self.PGLOG['HOSTNAME'], self.get_command()) + self.set_email((("ABORTS " + self.CPID['PID']) if self.CPID['PID'] else title), self.EMLTOP) + msg = title + '\n' + msg + self.send_email(title) + if logact&self.LOGERR: # make sure error is always logged + msg = self.break_long_string(msg) + if logact&(self.ERRLOG|self.EXITLG): + cmdstr = self.get_error_command(int(time.time()), logact) + msg = cmdstr + msg + if not logact&self.NOTLOG: + if logact&self.ERRLOG: + if not self.PGLOG['ERRFILE']: self.PGLOG['ERRFILE'] = re.sub(r'.log$', '.err', self.PGLOG['LOGFILE']) + self.write_message(msg, f"{self.PGLOG['LOGPATH']}/{self.PGLOG['ERRFILE']}", logact) + if logact&self.EXITLG: + self.write_message(cmdstr, f"{self.PGLOG['LOGPATH']}/{self.PGLOG['LOGFILE']}", logact) + else: + self.write_message(msg, f"{self.PGLOG['LOGPATH']}/{self.PGLOG['LOGFILE']}", logact) + if not self.PGLOG['BCKGRND'] and logact&(self.ERRLOG|self.WARNLG): + self.write_message(msg, None, logact) + + if logact&self.EXITLG: + self.pgexit(1) + else: + return (retmsg if retmsg else self.FAILURE) + + # write a log message + def write_message(self, msg, file, logact): + doclose = False + errlog = logact&self.ERRLOG + if file: + try: + OUT = open(file, 'a') + doclose = True + except FileNotFoundError: + OUT = sys.stderr if logact&(self.ERRLOG|self.EXITLG) else sys.stdout + OUT.write(f"Log File not found: {file}") + else: + OUT = sys.stderr if logact&(self.ERRLOG|self.EXITLG) else sys.stdout + if logact&self.BRKLIN: OUT.write("\n") + if logact&self.SEPLIN: OUT.write(self.PGLOG['SEPLINE']) + OUT.write(msg) + if errlog and file and not logact&(self.EMLALL|self.SKPTRC): OUT.write(self.get_call_trace()) + if doclose: OUT.close() + + # check and disconnet database before exit + def pgexit(self, stat = 0): + if self.PGLOG['PGDBBUF']: self.PGLOG['PGDBBUF'].close() + sys.exit(stat) + + # get a command string for error log dump + def get_error_command(self, ctime, logact): + if not self.CPID['PID']: self.CPID['PID'] = "{}-{}-{}".format(self.PGLOG['HOSTNAME'], self.get_command(), self.PGLOG['CURUID']) + cmdstr = "{} {}".format((("ABORTS" if logact&self.ERRLOG else "QUITS") if logact&self.EXITLG else "ERROR"), self.CPID['PID']) + cmdstr = self.cmd_execute_time(cmdstr, (ctime - self.CPID['CTM'])) + if self.CPID['CPID']: cmdstr += " {} <=".format(self.CPID['CPID']) + cmdstr += " {} at {}\n".format(self.break_long_string(self.CPID['CMD'], 40, "...", 1), self.current_datetime(ctime)) + return cmdstr + + # get call trace track + @staticmethod + def get_call_trace(cut = 1): + t = traceback.extract_stack() + n = len(t) - cut + str = '' + sep = 'Trace: ' + for i in range(n): + tc = t[i] + str += "{}{}({}){}".format(sep, tc[0], tc[1], ("" if tc[2] == '' else "{%s()}" % tc[2])) + if i == 0: sep = '=>' + return str + "\n" if str else "" + + # get caller file name + @staticmethod + def get_caller_file(cidx = 0): + return traceback.extract_stack()[cidx][0] + + # log message, msg, for degugging processes according to the debug level + def pgdbg(self, level, msg = None, do_trace = True): + if not self.PGLOG['DBGLEVEL']: return # no further action + if not isinstance(level, int): + ms = re.match(r'^(\d+)', level) + level = int(ms.group(1)) if ms else 0 + levels = [0, 0] + if isinstance(self.PGLOG['DBGLEVEL'], int): + levels[1] = self.PGLOG['DBGLEVEL'] + else: + ms = re.match(r'^(\d+)$', self.PGLOG['DBGLEVEL']) + if ms: + levels[1] = int(ms.group(1)) + else: + ms = re.match(r'(\d*)-(\d*)', self.PGLOG['DBGLEVEL']) + if ms: + levels[0] = int(ms.group(1)) if ms.group(1) else 0 + levels[1] = int(ms.group(2)) if ms.group(2) else 9999 + if level > levels[1] or level < levels[0]: return # debug level is out of range + if 'DBGPATH' in self.PGLOG: + dfile = self.PGLOG['DBGPATH'] + '/' + self.PGLOG['DBGFILE'] + else: + dfile = self.PGLOG['DBGFILE'] + if not msg: + self.pglog("Append debug Info (levels {}-{}) to {}".format(levels[0], levels[1], dfile), self.WARNLG) + msg = "DEBUG for " + self.CPID['PID'] + " " + if self.CPID['CPID']: msg += self.CPID['CPID'] + " <= " + msg += self.break_long_string(self.CPID['CMD'], 40, "...", 1) + # logging debug info + DBG = open(dfile, 'a') + DBG.write("{}:{}\n".format(level, msg)) + if do_trace: DBG.write(self.get_call_trace()) + DBG.close() + + # return trimed string (strip leading and trailling spaces); remove comments led by '#' if rmcmt > 0 + @staticmethod + def pgtrim(line, rmcmt = 1): + if line: + if rmcmt: + if re.match(r'^\s*#', line): # comment line + line = '' + elif rmcmt > 1: + ms = re.search(r'^(.+)\s\s+\#', line) + if ms: line = ms.group(1) # remove comment and its leading whitespaces + else: + ms = re.search(r'^(.+)\s+\#', line) + if ms: line = ms.group(1) # remove comment and its leading whitespace + line = line.strip() # remove leading and trailing whitespaces + return line + + # set self.PGLOG['PUSGDIR'] from the program file with full path + def set_help_path(self, progfile): + self.PGLOG['PUSGDIR'] = op.dirname(op.abspath(progfile)) + + # Function: show_usage(progname: Perl program name to get file "progname.usg") + # show program usage in file "self.PGLOG['PUSGDIR']/progname.usg" on screen with unix + # system function 'pg', exit program when done. + def show_usage(self, progname, opts = None): + if self.PGLOG['PUSGDIR'] is None: self.set_help_path(self.get_caller_file(1)) + usgname = self.join_paths(self.PGLOG['PUSGDIR'], progname + '.usg') + if opts: # show usage for individual option of dsarch + for opt in opts: + if opts[opt][0] == 0: + msg = "Mode" + elif opts[opt][0] == 1: + msg = "Single-Value Information" + elif opts[opt][0] == 2: + msg = "Multi-Value Information" + else: + msg = "Action" + sys.stdout.write("\nDescription of {} Option -{}:\n".format(msg, opt)) + IN = open(usgname, 'r') + nilcnt = begin = 0 + for line in IN: + if begin == 0: + rx = " -{} or -".format(opt) + if re.match(rx, line): begin = 1 + elif re.match(r'^\s*$', line): + if nilcnt: break + nilcnt = 1 + else: + if re.match(r'\d[\.\s\d]', line): break # section title + if nilcnt and re.match(r' -\w\w or -', line): break + nilcnt = 0 + if begin: sys.stdout.write(line) + IN.close() + else: + os.system("more " + usgname) + self.pgexit(0) + + # compare error message to patterns saved in self.PGLOG['ERR2STD'] + # return 1 if matched; 0 otherwise + def err2std(self, line): + for err in self.PGLOG['ERR2STD']: + if line.find(err) > -1: return 1 + return 0 + + # compare message to patterns saved in self.PGLOG['STD2ERR'] + # return 1 if matched; 0 otherwise + def std2err(self, line): + for out in self.PGLOG['STD2ERR']: + if line.find(out) > -1: return 1 + return 0 + + # Function: pgsystem(pgcmd, logact, cmdopt, instr) + # pgcmd - Linux system command, can be a string, "ls -l", or a list, ['ls', '-l'] + # logact - logging action option, defaults to self.LOGWRN + # cmdopt - command control option, default to 5 (1+4) + # 0 - no command control, + # 1 - log pgcmd (include the sub command calls), + # 2 - log standard output, + # 4 - log error output + # 7 - log all (pgcmd, and standard/error outputs), + # 8 - log command with time, + # 16 - return standard output message upon success + # 32 - log error as standard output + # 64 - force returning self.FAILURE if called process aborts + # 128 - tries 2 times for failed command before quits + # 256 - cache standard error message + # 512 - log instr & seconds with pgcmd if cmdopt&1 + # 1024 - turn on shell + # instr - input string passing to the command via stdin if not None + # seconds - number of seconds to wait for a timeout process if > 0 + def pgsystem(self, pgcmd, logact = None, cmdopt = 5, instr = None, seconds = 0): + if logact is None: logact = self.LOGWRN + ret = self.SUCCESS + if not pgcmd: return ret # empty command + act = logact&~self.EXITLG + if act&self.ERRLOG: + act &= ~self.ERRLOG + act |= self.WARNLG + if act&self.MSGLOG: act |= self.FRCLOG # make sure system calls always logged + cmdact = act if cmdopt&1 else 0 + doshell = True if cmdopt&1024 else self.PGLOG['DOSHELL'] + if isinstance(pgcmd, str): + cmdstr = pgcmd + if not doshell and re.search(r'[*?<>|;]', pgcmd): doshell = True + execmd = pgcmd if doshell else shlex.split(pgcmd) + else: + cmdstr = shlex.join(pgcmd) + execmd = cmdstr if doshell else pgcmd + if cmdact: + if cmdopt&8: + self.cmdlog("starts '{}'".format(cmdstr), None, cmdact) + else: + self.pglog("> " + cmdstr, cmdact) + if cmdopt&512 and (instr or seconds): + msg = '' + if seconds: msg = 'Timeout = {} Seconds'.format(seconds) + if instr: msg += ' With STDIN:\n' + instr + if msg: self.pglog(msg, cmdact) + stdlog = act if cmdopt&2 else 0 + cmdflg = cmdact|stdlog + abort = -1 if cmdopt&64 else 0 + loops = 2 if cmdopt&128 else 1 + self.PGLOG['SYSERR'] = error = retbuf = outbuf = errbuf = '' + for loop in range(1, loops+1): + last = time.time() + try: + if instr: + FD = Popen(execmd, shell=doshell, stdout=PIPE, stderr=PIPE, stdin=PIPE) + if seconds: + outbuf, errbuf = FD.communicate(input=instr.encode(), timeout=seconds) + else: + outbuf, errbuf = FD.communicate(input=instr.encode()) + else: + FD = Popen(execmd, shell=doshell, stdout=PIPE, stderr=PIPE) + if seconds: + outbuf, errbuf = FD.communicate(timeout=seconds) + else: + outbuf, errbuf = FD.communicate() + except TimeoutError as e: + errbuf = str(e) + FD.kill() + ret = self.FAILURE + except Exception as e: + errbuf = str(e) + ret = self.FAILURE + else: + ret = self.FAILURE if FD.returncode else self.SUCCESS + if isinstance(outbuf, bytes): outbuf = str(outbuf, errors='replace') + if isinstance(errbuf, bytes): errbuf = str(errbuf, errors='replace') + if errbuf and cmdopt&32: + outbuf += errbuf + if cmdopt&256: self.PGLOG['SYSERR'] = errbuf + errbuf = '' + if outbuf: + lines = outbuf.split('\n') + for line in lines: + line = self.strip_output_line(line.strip()) + if not line: continue + if self.PGLOG['STD2ERR'] and self.std2err(line): + if cmdopt&260: error += line + "\n" + if abort == -1 and re.match('ABORTS ', line): abort = 1 + else: + if re.match(r'^>+ ', line): + line = '>' + line + if cmdflg: self.pglog(line, cmdflg) + elif stdlog: + self.pglog(line, stdlog) + if cmdopt&16: retbuf += line + "\n" + if errbuf: + lines = errbuf.split('\n') + for line in lines: + line = self.strip_output_line(line.strip()) + if not line: continue + if self.PGLOG['ERR2STD'] and self.err2std(line): + if stdlog: self.pglog(line, stdlog) + if cmdopt&16: retbuf += line + "\n" + else: + if cmdopt&260: error += line + "\n" + if abort == -1 and re.match('ABORTS ', line): abort = 1 + if ret == self.SUCCESS and abort == 1: ret = self.FAILURE + end = time.time() + last = end - last + if error: + if ret == self.FAILURE: + error = "Error Execute: {}\n{}".format(cmdstr, error) + else: + error = "Error From: {}\n{}".format(cmdstr, error) + if loop > 1: error = "Retry " + if cmdopt&256: self.PGLOG['SYSERR'] += error + if cmdopt&4: + errlog = (act|self.ERRLOG) + if ret == self.FAILURE and loop >= loops: errlog |= logact + self.pglog(error, errlog) + if last > self.PGLOG['CMDTIME'] and not re.search(r'(^|/|\s)(dsarch|dsupdt|dsrqst)\s', cmdstr): + cmdstr = "> {} Ends By {}".format(self.break_long_string(cmdstr, 100, "...", 1), self.current_datetime()) + self.cmd_execute_time(cmdstr, last, cmdact) + if ret == self.SUCCESS or loop >= loops: break + time.sleep(6) + if ret == self.FAILURE and retbuf and cmdopt&272 == 272: + if self.PGLOG['SYSERR']: self.PGLOG['SYSERR'] += '\n' + self.PGLOG['SYSERR'] += retbuf + retbuf = '' + return (retbuf if cmdopt&16 else ret) + + # strip carrage return '\r', but keep ending newline '\n' + @staticmethod + def strip_output_line(line): + ms = re.search(r'\r([^\r]+)\r*$', line) + if ms: return ms.group(1) + ms = re.search(r'\s\.+\s+(\d+)%\s+', line) + if ms and int(ms.group(1)) != 100: return None + return line + + # show command running time string formated by seconds_to_string_time() + def cmd_execute_time(self, cmdstr, last, logact = None): + msg = cmdstr + if last >= self.PGLOG['CMDTIME']: # show running for at least one minute + msg += " ({})".format(self.seconds_to_string_time(last)) + if logact: + return self.pglog(msg, logact) + else: + return msg + + # convert given seconds to string time with units of S-Second,M-Minute,H-Hour,D-Day + @staticmethod + def seconds_to_string_time(seconds, showzero = 0): + msg = '' + s = m = h = 0 + if seconds > 0: + s = seconds%60 # seconds (0-59) + minutes = int(seconds/60) # total minutes + m = minutes%60 # minutes (0-59) + if minutes >= 60: + hours = int(minutes/60) # total hours + h = hours%24 # hours (0-23) + if hours >= 24: + msg += "{}D".format(int(hours/24)) # days + if h: msg += "{}H".format(h) + if m: msg += "{}M".format(m) + if s: + msg += "%dS"%(s) if isinstance(s, int) else "{:.3f}S".format(s) + elif showzero: + msg = "0S" + return msg + + # wrap function to call pgsystem() with a timeout control + # return self.FAILURE if error eval or time out + def tosystem(self, cmd, timeout = 0, logact = 0, cmdopt = 5, instr = None): + if logact is None: logact = self.LOGWRN + if not timeout: timeout = self.PGLOG['TIMEOUT'] # set default timeout if missed + return self.pgsystem(cmd, logact, cmdopt, instr, timeout) + + # insert breaks, default to '\n', for every length, default to 1024, + # for long string; return specified number lines if mline given + @staticmethod + def break_long_string(lstr, limit = 1024, bsign = "\n", mline = 200, bchars = ' &;', minlmt = 20, eline = 0): + length = len(lstr) if lstr else 0 + if length <= limit: return lstr + if bsign is None: bsign = "\n" + if bchars is None: bchars = ' &;' + addbreak = offset = 0 + retstr = "" + elines = [] + if eline > mline: eline = mline + mcnt = mline - eline + ecnt = 0 + while offset < length: + bpos = lstr[offset:].find(bsign) + blen = bpos if bpos > -1 else (length - offset) + if blen == 0: + offset += 1 + substr = "" if addbreak else bsign + addbreak = 0 + elif blen <= limit: + blen += 1 + substr = lstr[offset:(offset+blen)] + offset += blen + addbreak = 0 + else: + substr = lstr[offset:(offset+limit)] + bpos = limit - 1 + while bpos > minlmt: + char = substr[bpos] + if bchars.find(char) >= 0: break + bpos -= 1 + if bpos > minlmt: + bpos += 1 + substr = substr[:bpos] + offset += bpos + else: + offset += limit + addbreak = 1 + substr += bsign + if mcnt: + retstr += substr + mcnt -= 1 + if mcnt == 0 and eline == 0: break + elif eline > 0: + elines.append(substr) + ecnt += 1 + else: + break + if ecnt > 0: + if ecnt > eline: + retstr += "..." + bsign + mcnt = ecnt - eline + else: + mcnt = 0 + while mcnt < ecnt: + retstr += elines[mcnt] + mcnt += 1 + return retstr + + # join two paths by remove overlapping directories + # diff = 0: join given pathes + # 1: remove path1 from path2 + @staticmethod + def join_paths(path1, path2, diff = 0): + if not path2: return path1 + if not path1 or not diff and re.match('/', path2): return path2 + if diff: + ms = re.match(r'{}/(.*)'.format(path1), path2) + if ms: return ms.group(1) + adir1 = path1.split('/') + adir2 = path2.split('/') + while adir2 and not adir2[0]: adir2.pop(0) + while adir1 and adir2 and adir2[0] == "..": + adir2.pop(0) + adir1.pop() + while adir2 and adir2[0] == ".": adir2.pop(0) + if adir1 and adir2: + len1 = len(adir1) + len2 = len(adir2) + idx1 = len1-1 + idx2 = mcnt = 0 + while idx2 < len1 and idx2 < len2: + if adir1[idx1] == adir2[idx2]: + mcnt = 1 + break + idx2 += 1 + if mcnt > 0: + while mcnt <= idx2: + if adir1[idx1-mcnt] != adir2[idx2-mcnt]: break + mcnt += 1 + if mcnt > idx2: # remove mcnt matching directories + while mcnt > 0: + adir2.pop(0) + mcnt -= 1 + if diff: + return '/'.join(adir2) + else: + return '/'.join(adir1 + adir2) + + # validate if a command for a given BATCH host is accessable and executable + # Return self.SUCCESS if valid; self.FAILURE if not + def valid_batch_host(self, host, logact = 0): + HOST = host.upper() + return self.SUCCESS if HOST in self.BCHCMDS and self.valid_command(self.BCHCMDS[HOST], logact) else self.FAILURE + + # validate if a given command is accessable and executable + # Return the full command path if valid; '' if not + def valid_command(self, cmd, logact = 0): + ms = re.match(r'^(\S+)( .*)$', cmd) + if ms: + option = ms.group(2) + cmd = ms.group(1) + else: + option = '' + if cmd not in self.COMMANDS: + buf = shutil.which(cmd) + if buf is None: + if logact: self.pglog(cmd + ": executable command not found", logact) + buf = '' + elif option: + buf += option + self.COMMANDS[cmd] = buf + return self.COMMANDS[cmd] + + # add carbon copies to self.PGLOG['CCDADDR'] + def add_carbon_copy(self, cc = None, isstr = None, exclude = 0, specialist = None): + + if not cc: + if cc is None and isstr is None: self.PGLOG['CCDADDR'] = '' + else: + emails = re.split(r'[,\s]+', cc) if isstr else cc + for email in emails: + if not email or email.find('/') >= 0 or email == 'N': continue + if email == "S": + if not specialist: continue + email = specialist + if email.find('@') == -1: email += "@ucar.edu" + if exclude and exclude.find(email) > -1: continue + if self.PGLOG['CCDADDR']: + if self.PGLOG['CCDADDR'].find(email) > -1: continue # email Cc'd already + self.PGLOG['CCDADDR'] += ", " + self.PGLOG['CCDADDR'] += email + + # get the current host name; or batch sever name if getbatch is 1 + def get_host(self, getbatch = 0): + + if getbatch and self.PGLOG['CURBID'] != 0: + host = self.PGLOG['PGBATCH'] + elif self.PGLOG['HOSTNAME']: + return self.PGLOG['HOSTNAME'] + else: + host = socket.gethostname() + + return self.get_short_host(host) + + # + # strip domain names and retrun the server name itself + # + def get_short_host(self, host): + + if not host: return '' + ms = re.match(r'^([^\.]+)\.', host) + if ms: host = ms.group(1) + if self.PGLOG['HOSTNAME'] and (host == 'localhost' or host == self.PGLOG['HOSTNAME']): return self.PGLOG['HOSTNAME'] + HOST = host.upper() + if HOST in self.BCHCMDS: return HOST + + return host + + # get a live PBS host name + def get_pbs_host(self): + if not self.PBSSTATS and self.PGLOG['PBSHOSTS']: + self.PBSHOSTS = self.PGLOG['PBSHOSTS'].split(':') + for host in self.PBSHOSTS: + self.PBSSTATS[host] = 1 + for host in self.PBSHOSTS: + if host in self.PBSSTATS and self.PBSSTATS[host]: return host + return None + + # set host status, 0 dead & 1 live, for one or all avalaible pbs hosts + def set_pbs_host(self, host = None, stat = 0): + if host: + self.PBSSTATS[host] = stat + else: + if not self.PBSHOSTS and self.PGLOG['PBSHOSTS']: + self.PBSHOSTS = self.PGLOG['PBSHOSTS'].split(':') + for host in self.PBSHOSTS: + self.PBSSTATS[host] = stat + + # reset the batch host name in case was not set properly + def reset_batch_host(self, bhost, logact = None): + if logact is None: logact = self.LOGWRN + bchhost = bhost.upper() + if bchhost != self.PGLOG['PGBATCH']: + if self.PGLOG['CURBID'] > 0: + self.pglog("{}-{}: Batch ID is set, cannot change Batch host to {}".format(self.PGLOG['PGBATCH'], self.PGLOG['CURBID'], bchhost) , logact) + else: + ms = re.search(r'(^|:){}(:|$)'.format(bchhost), self.PGLOG['BCHHOSTS']) + if ms: + self.PGLOG['PGBATCH'] = bchhost + if self.PGLOG['CURBID'] == 0: self.PGLOG['CURBID'] = -1 + elif self.PGLOG['PGBATCH']: + self.PGLOG['PGBATCH'] = '' + self.PGLOG['CURBID'] = 0 + + # return the base command name of the current process + @staticmethod + def get_command(cmdstr = None): + if not cmdstr: cmdstr = sys.argv[0] + cmdstr = op.basename(cmdstr) + ms = re.match(r'^(.+)\.(py|pl)$', cmdstr) + if ms: + return ms.group(1) + else: + return cmdstr + + # wrap a given command cmd for either sudo or setuid wrapper pgstart_['username'] + # to run as user asuser + def get_local_command(self, cmd, asuser = None): + cuser = self.PGLOG['SETUID'] if self.PGLOG['SETUID'] else self.PGLOG['CURUID'] + if not asuser or cuser == asuser: return cmd + if cuser == self.PGLOG['GDEXUSER']: + wrapper = "pgstart_" + asuser + if self.valid_command(wrapper): return "{} {}".format(wrapper, cmd) + elif self.PGLOG['SUDOGDEX'] and asuser == self.PGLOG['GDEXUSER']: + return "sudo -u {} {}".format(self.PGLOG['GDEXUSER'], cmd) # sudo as user gdexdata + return cmd + + # wrap a given command cmd for either sudo or setuid wrapper pgstart_['username'] + # to run as user asuser on a given remote host + def get_remote_command(self, cmd, host, asuser = None): + return self.get_local_command(cmd, asuser) + + # wrap a given sync command for given host name with/without sudo + def get_sync_command(self, host, asuser = None): + host = self.get_short_host(host) + if (not (self.PGLOG['SETUID'] and self.PGLOG['SETUID'] == self.PGLOG['GDEXUSER']) and + (not asuser or asuser == self.PGLOG['GDEXUSER'])): + return "sync" + host + return host + "-sync" + + # set self.PGLOG['SETUID'] as needed + def set_suid(self, cuid = 0): + if not cuid: cuid = self.PGLOG['EUID'] + if cuid != self.PGLOG['EUID'] or cuid != self.PGLOG['RUID']: + os.setreuid(cuid, cuid) + self.PGLOG['SETUID'] = pwd.getpwuid(cuid).pw_name + if not (self.PGLOG['SETUID'] == self.PGLOG['GDEXUSER'] or cuid == self.PGLOG['RUID']): + self.set_specialist_environments(self.PGLOG['SETUID']) + self.PGLOG['CURUID'] == self.PGLOG['SETUID'] # set CURUID to a specific specialist + + # set comman pglog + def set_common_pglog(self): + self.PGLOG['CURDIR'] = os.getcwd() + # set current user id + self.PGLOG['RUID'] = os.getuid() + self.PGLOG['EUID'] = os.geteuid() + self.PGLOG['CURUID'] = pwd.getpwuid(self.PGLOG['RUID']).pw_name + try: + self.PGLOG['RDAUID'] = self.PGLOG['GDEXUID'] = pwd.getpwnam(self.PGLOG['GDEXUSER']).pw_uid + self.PGLOG['RDAGID'] = self.PGLOG['GDEXGID'] = grp.getgrnam(self.PGLOG['GDEXGRP']).gr_gid + except: + self.PGLOG['RDAUID'] = self.PGLOG['GDEXUID'] = 0 + self.PGLOG['RDAGID'] = self.PGLOG['GDEXGID'] = 0 + if self.PGLOG['CURUID'] == self.PGLOG['GDEXUSER']: self.PGLOG['SETUID'] = self.PGLOG['GDEXUSER'] + self.PGLOG['HOSTNAME'] = self.get_host() + for htype in self.HOSTTYPES: + ms = re.match(r'^{}(-|\d|$)'.format(htype), self.PGLOG['HOSTNAME']) + if ms: + self.PGLOG['HOSTTYPE'] = self.HOSTTYPES[htype] + break + self.PGLOG['DEFDSID'] = 'd000000' if self.PGLOG['NEWDSID'] else 'ds000.0' + self.SETPGLOG("USRHOME", "/glade/u/home") + self.SETPGLOG("DSSHOME", "/glade/u/home/gdexdata") + self.SETPGLOG("GDEXHOME", "/data/local") + self.SETPGLOG("ADDPATH", "") + self.SETPGLOG("ADDLIB", "") + self.SETPGLOG("OTHPATH", "") + self.SETPGLOG("PSQLHOME", "") + self.SETPGLOG("DSGHOSTS", "") + self.SETPGLOG("DSIDCHRS", "d") + if not os.getenv('HOME'): os.environ['HOME'] = "{}/{}".format(self.PGLOG['USRHOME'], self.PGLOG['CURUID']) + self.SETPGLOG("HOMEBIN", os.environ.get('HOME') + "/bin") + if 'PBS_JOBID' in os.environ: + sbid = os.getenv('PBS_JOBID') + ms = re.match(r'^(\d+)', sbid) + self.PGLOG['CURBID'] = int(ms.group(1)) if ms else -1 + self.PGLOG['PGBATCH'] = self.PGLOG['PBSNAME'] + else: + self.PGLOG['CURBID'] = 0 + self.PGLOG['PGBATCH'] = '' + pgpath = self.PGLOG['HOMEBIN'] + self.PGLOG['LOCHOME'] = "/ncar/rda/setuid" + if not op.isdir(self.PGLOG['LOCHOME']): self.PGLOG['LOCHOME'] = "/usr/local/decs" + pgpath += ":{}/bin".format(self.PGLOG['LOCHOME']) + locpath = "{}/bin/{}".format(self.PGLOG['DSSHOME'], self.PGLOG['HOSTTYPE']) + if op.isdir(locpath): pgpath += ":" + locpath + pgpath = self.add_local_path("{}/bin".format(self.PGLOG['DSSHOME']), pgpath, 1) + if self.PGLOG['PSQLHOME']: + locpath = self.PGLOG['PSQLHOME'] + "/bin" + if op.isdir(locpath): pgpath += ":" + locpath + pgpath = self.add_local_path(os.getenv('PATH'), pgpath, 1) + if self.PGLOG['HOSTTYPE'] == 'dav': pgpath = self.add_local_path('/glade/u/apps/opt/qstat-cache/bin:/opt/pbs/bin', pgpath, 1) + if 'OTHPATH' in self.PGLOG and self.PGLOG['OTHPATH']: + pgpath = self.add_local_path(self.PGLOG['OTHPATH'], pgpath, 1) + if self.PGLOG['ADDPATH']: pgpath = self.add_local_path(self.PGLOG['ADDPATH'], pgpath, 1) + pgpath = self.add_local_path("/bin:/usr/bin:/usr/local/bin:/usr/sbin", pgpath, 1) + os.environ['PATH'] = pgpath + os.environ['SHELL'] = '/bin/sh' + # set self.PGLOG values with environments and defaults + self.SETPGLOG("DSSDBHM", self.PGLOG['DSSHOME']+"/dssdb") # dssdb home dir + self.SETPGLOG("LOGPATH", self.PGLOG['DSSDBHM']+"/log") # path to log file + self.SETPGLOG("LOGFILE", "pgdss.log") # log file name + self.SETPGLOG("EMLFILE", "pgemail.log") # email log file name + self.SETPGLOG("ERRFILE", '') # error file name + sm = "/usr/sbin/sendmail" + if self.valid_command(sm): self.SETPGLOG("EMLSEND", f"{sm} -t") # send email command + self.SETPGLOG("DBGLEVEL", '') # debug level + self.SETPGLOG("BAOTOKEN", 's.lh2t2kDjrqs3V8y2BU2zOocT') # OpenBao token + self.SETPGLOG("DBGPATH", self.PGLOG['DSSDBHM']+"/log") # path to debug log file + self.SETPGLOG("OBJCTBKT", "gdex-data") # default Bucket on Object Store + self.SETPGLOG("BACKUPEP", "gdex-quasar") # default Globus Endpoint on Quasar + self.SETPGLOG("DRDATAEP", "gdex-quasar-drdata") # DRDATA Globus Endpoint on Quasar + self.SETPGLOG("DBGFILE", "pgdss.dbg") # debug file name + self.SETPGLOG("CNFPATH", self.PGLOG['DSSHOME']+"/config") # path to configuration files + self.SETPGLOG("DSSURL", "https://gdex.ucar.edu") # current dss web URL + self.SETPGLOG("RQSTURL", "/datasets/request") # request URL path + self.SETPGLOG("WEBSERVERS", "") # webserver names for Web server + self.PGLOG['WEBHOSTS'] = self.PGLOG['WEBSERVERS'].split(':') if self.PGLOG['WEBSERVERS'] else [] + self.SETPGLOG("DBMODULE", '') + self.SETPGLOG("LOCDATA", "/data") + # set dss web homedir + self.SETPGLOG("DSSWEB", self.PGLOG['LOCDATA']+"/web") + self.SETPGLOG("DSWHOME", self.PGLOG['DSSWEB']+"/datasets") # datast web root path + self.PGLOG['HOMEROOTS'] = "{}|{}".format(self.PGLOG['DSSHOME'], self.PGLOG['DSWHOME']) + self.SETPGLOG("DSSDATA", "/glade/campaign/collections/gdex") # dss data root path + self.SETPGLOG("DSDHOME", self.PGLOG['DSSDATA']+"/data") # dataset data root path + self.SETPGLOG("DECSHOME", self.PGLOG['DSSDATA']+"/decsdata") # dataset decsdata root path + self.SETPGLOG("DSHHOME", self.PGLOG['DECSHOME']+"/helpfiles") # dataset help root path + self.SETPGLOG("GDEXWORK", "/lustre/desc1/gdex/work") # gdex work path + self.SETPGLOG("UPDTWKP", self.PGLOG['GDEXWORK']) # dsupdt work root path + self.SETPGLOG("TRANSFER", "/lustre/desc1/gdex/transfer") # gdex transfer path + self.SETPGLOG("RQSTHOME", self.PGLOG['TRANSFER']+"/dsrqst") # dsrqst home + self.SETPGLOG("DSAHOME", "") # dataset data alternate root path + self.SETPGLOG("RQSTALTH", "") # alternate dsrqst path + self.SETPGLOG("GPFSHOST", "") # empty if writable to glade + self.SETPGLOG("PSQLHOST", "rda-db.ucar.edu") # host name for postgresql server + self.SETPGLOG("SLMHOSTS", "cheyenne:casper") # host names for SLURM server + self.SETPGLOG("PBSHOSTS", "cron:casper") # host names for PBS server + self.SETPGLOG("CHKHOSTS", "") # host names for dscheck daemon + self.SETPGLOG("PVIEWHOST", "pgdb02.k8s.ucar.edu") # host name for view only postgresql server + self.SETPGLOG("PMISCHOST", "pgdb03.k8s.ucar.edu") # host name for misc postgresql server + self.SETPGLOG("FTPUPLD", self.PGLOG['TRANSFER']+"/rossby") # ftp upload path + self.PGLOG['GPFSROOTS'] = "{}|{}|{}".format(self.PGLOG['DSDHOME'], self.PGLOG['UPDTWKP'], self.PGLOG['RQSTHOME']) + if 'ECCODES_DEFINITION_PATH' not in os.environ: + os.environ['ECCODES_DEFINITION_PATH'] = "/usr/local/share/eccodes/definitions" + os.environ['history'] = '0' + # set tmp dir + self.SETPGLOG("TMPPATH", self.PGLOG['GDEXWORK'] + "/ptmp") + if not self.PGLOG['TMPPATH']: self.PGLOG['TMPPATH'] = "/data/ptmp" + self.SETPGLOG("TMPDIR", '') + if not self.PGLOG['TMPDIR']: + self.PGLOG['TMPDIR'] = "/lustre/desc1/scratch/" + self.PGLOG['CURUID'] + os.environ['TMPDIR'] = self.PGLOG['TMPDIR'] + # empty diretory for HOST-sync + self.PGLOG['TMPSYNC'] = self.PGLOG['DSSDBHM'] + "/tmp/.syncdir" + os.umask(2) + + # check and return TMPSYNC path, and add it if not exists + def get_tmpsync_path(self): + if 'DSSHOME' in self.PGLOG and self.PGLOG['DSSHOME'] and not op.exists(self.PGLOG['TMPSYNC']): + self.pgsystem("mkdir " + self.PGLOG['TMPSYNC'], 0, self.LGWNEX, 4) + self.pgsystem("chmod 775 " + self.PGLOG['TMPSYNC'], self.LOGWRN, 4) + return self.PGLOG['TMPSYNC'] + + # append or prepend locpath to pgpath + def add_local_path(self, locpath, pgpath, append = 0): + if not locpath: + return pgpath + elif not pgpath: + return locpath + paths = locpath.split(':') + for path in paths: + if re.match(r'^\./*$', path): continue + path = path.rstrip('\\') + ms = re.search(r'(^|:){}(:|$)'.format(path), pgpath) + if ms: continue + if append: + pgpath += ":" + path + else: + pgpath = path + ":" + pgpath + return pgpath + + # set self.PGLOG value; return a string or an array reference if sep is not emty + def SETPGLOG(self, name, value = ''): + oval = self.PGLOG[name] if name in self.PGLOG else '' + nval = self.get_environment(name, ('' if re.match('PG', value) else value)) + self.PGLOG[name] = nval if nval else oval + + # set specialist home and return the default shell + def set_specialist_home(self, specialist): + if specialist == self.PGLOG['CURUID']: return # no need reset + if 'MAIL' in os.environ and re.search(self.PGLOG['CURUID'], os.environ['MAIL']): + os.environ['MAIL'] = re.sub(self.PGLOG['CURUID'], specialist, os.environ['MAIL']) + home = "{}/{}".format(self.PGLOG['USRHOME'], specialist) + shell = "tcsh" + buf = self.pgsystem("grep ^{}: /etc/passwd".format(specialist), self.LOGWRN, 20) + if buf: + lines = buf.split('\n') + for line in lines: + ms = re.search(r':(/.+):(/.+)', line) + if ms: + home = ms.group(1) + shell = op.basename(ms.group(2)) + break + if home != os.environ['HOME'] and op.exists(home): + os.environ['HOME'] = home + return shell + + # set environments for a specified specialist + def set_specialist_environments(self, specialist): + shell = self.set_specialist_home(specialist) + resource = os.environ['HOME'] + "/.tcshrc" + checkif = 0 # 0 outside of if; 1 start if, 2 check envs, -1 checked already + missthen = 0 + try: + rf = open(resource, 'r') + except: + return # skip if cannot open + nline = rf.readline() + while nline: + line = self.pgtrim(nline) + nline = rf.readline() + if not line: continue + if checkif == 0: + ms = re.match(r'^if(\s|\()', line) + if ms: checkif = 1 # start if + elif missthen: + missthen = 0 + if re.match(r'^then$', line): continue # then on next line + checkif = 0 # end of inline if + elif re.match(r'^endif', line): + checkif = 0 # end of if + continue + elif checkif == -1: # skip the line + continue + elif checkif == 2 and re.match(r'^else', line): + checkif = -1 # done check envs in if + continue + if checkif == 1: + if line == 'else': + checkif = 2 + continue + elif re.search(r'if\W', line): + if(re.search(r'host.*!', line, re.I) and not re.search(self.PGLOG['HOSTNAME'], line) or + re.search(r'host.*=', line, re.I) and re.search(self.PGLOG['HOSTNAME'], line)): + checkif = 2 + if re.search(r'\sthen$', line): + continue + else: + missthen = 1 + if checkif == 1: continue + else: + continue + ms = re.match(r'^setenv\s+(.*)', line) + if ms: self.one_specialist_environment(ms.group(1)) + rf.close() + self.SETPGLOG("HOMEBIN", self.PGLOG['PGBINDIR']) + os.environ['PATH'] = self.add_local_path(self.PGLOG['HOMEBIN'], os.environ['PATH'], 0) + + # set one environment for specialist + def one_specialist_environment(self, line): + ms = re.match(r'^(\w+)[=\s]+(.+)$', line) + if not ms: return + (var, val) = ms.groups() + if re.match(r'^(PATH|SHELL|IFS|CDPATH|)$', var): return + if val.find('$') > -1: val = self.replace_environments(val) + ms = re.match(r'^(\"|\')(.*)(\"|\')$', val) + if ms: val = ms.group(2) # remove quotes + os.environ[var] = val + + # get and repalce environment variables in ginve string; defaults to the values in self.PGLOG + def replace_environments(self, envstr, default = '', logact = 0): + ishash = isinstance(default, dict) + ms = re.search(r'(^|.)\$({*)(\w+)(}*)', envstr) + if ms: + lead = ms.group(1) + name = ms.group(3) + rep = ms.group(2) + name + ms.group(4) + env = self.get_environment(name, (self.PGLOG[name] if name in self.PGLOG else (default[name] if ishash else default)), logact) + pre = (lead if (env or lead != ":") else '') + envstr = re.sub(r'{}\${}'.format(lead, rep), (pre+env), envstr) + return envstr + + # validate if the current host is a valid host to process + def check_process_host(self, hosts, chost = None, mflag = None, pinfo = None, logact = None): + ret = 1 + error = '' + if not mflag: mflag = 'G' + if not chost: chost = self.get_host(1) + if mflag == 'M': # exact match + if not hosts or hosts != chost: + ret = 0 + if pinfo: error = "not matched exactly" + elif mflag == 'I': # inclusive match + if not hosts or hosts.find('!') == 0 or hosts.find(chost) < 0: + ret = 0 + if pinfo: error = "not matched inclusively" + elif hosts: + if hosts.find(chost) >= 0: + if hosts.find('!') == 0: + ret = 0 + if pinfo: error = "matched exclusively" + elif hosts.find('!') != 0: + ret = 0 + if pinfo: error = "not matched" + if error: + if logact is None: logact = self.LOGERR + self.pglog("{}: CANNOT be processed on {} for hosthame {}".format(pinfo, chost, error), logact) + return ret + + # convert special foreign characters into ascii characters + @staticmethod + def convert_chars(name, default = 'X'): + if not name: return default + if re.match(r'^[a-zA-Z0-9]+$', name): return name # conversion not needed + decoded_name = unidecode(name).strip() + # remove any non-alphanumeric and non-underscore characters + cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '', decoded_name) + if cleaned_name: + return cleaned_name + else: + return default + + # Retrieve host and process id + def current_process_info(self, realpid = 0): + if realpid or self.PGLOG['CURBID'] < 1: + return [self.PGLOG['HOSTNAME'], os.getpid()] + else: + return [self.PGLOG['PGBATCH'], self.PGLOG['CURBID']] + + # convert given @ARGV to string. quote the entries with spaces + def argv_to_string(self, argv = None, quote = 1, action = None): + argstr = '' + if argv is None: argv = sys.argv[1:] + for arg in argv: + if argstr: argstr += ' ' + ms = re.search(r'([<>\|\s])', arg) + if ms: + if action: + self.pglog("{}: Cannot {} for special character '{}' in argument value".format(arg, action, ms.group(1)), self.LGEREX) + if quote: + if re.search(r"\'", arg): + arg = "\"{}\"".format(arg) + else: + arg = "'{}'".format(arg) + argstr += arg + return argstr + + # convert an integer to non-10 based string + @staticmethod + def int2base(x, base): + if x == 0: return '0' + negative = 0 + if x < 0: + negative = 1 + x = -x + dgts = [] + while x: + dgts.append(str(int(x%base))) + x = int(x/base) + if negative: dgts.append('-') + dgts.reverse() + return ''.join(dgts) + + # convert a non-10 based string to an integer + @staticmethod + def base2int(x, base): + if not isinstance(x, int): x = int(x) + if x == 0: return 0 + negative = 0 + if x < 0: + negative = 1 + x = -x + num = 0 + fact = 1 + while x: + num += (x%10)*fact + fact *= base + x = int(x/10) + if negative: num = -num + return num + + # convert integer to ordinal string + @staticmethod + def int2order(num): + ordstr = ['th', 'st', 'nd', 'rd'] + snum = str(num) + num %= 100 + if num > 19: num %= 10 + if num > 3: num = 0 + return snum + ordstr[num] diff --git a/src/rda_python_common/pg_opt.py b/src/rda_python_common/pg_opt.py new file mode 100644 index 0000000..e606a4a --- /dev/null +++ b/src/rda_python_common/pg_opt.py @@ -0,0 +1,1447 @@ +# +############################################################################### +# +# Title : pg_opt.py +# +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/26/2020 +# 2025-01-10 transferred to package rda_python_common from +# https://github.com/NCAR/rda-shared-libraries.git +# 2025-12-01 convert to class PgOPT +# Purpose : python library module for holding global varaibles +# functions for processing options and other global functions +# +# Github : https://github.com/NCAR/rda-pyhon-common.git +# +############################################################################### +# +import os +import sys +import re +import time +from os import path as op +from .pg_file import PgFile + +class PgOPT(PgFile): + + def __init__(self): + super().__init__() # initialize parent class + self.OUTPUT = None + self.CMDOPTS = {} + self.INOPTS = {} + # global variables are used by all applications and this package. + # they need be initialized in application specified packages + self.ALIAS = {} + self.TBLHASH = {} + ############################################################################### + # valid options the first hash value: 0 means mode option, 1 means single-value + # option, 2 means multiple-value option, and >=4 means action option the second + # hash values are long option names, either hash keys (considered as short + # option names) or the associated long names can be used. All options, except for + # multi-line value ones, can be specified on command line, while single-value and + # multi-value options, except option -IM for input files, can also given in input + # files long value option names are used in output files all letters of option + # names are case insensitive. + # + # The third hash value define bit flags, + # For Action Options: + # -1 - VSN card actions + # >0 - setions + # + # For Mode Options: + # 1 - mode for archiving actions + # 2 - mode for set actions + # + # For Single-Value Info Options: + # 1(0x001) - auto set value + # 2(0x002) - manually set value + # 16(0x010) - convert to integer from commandline and input files, set to 0 if empty + # 32(0x020) - time field + # 128(0x080) - '' allowed for single letter value + # 256(0x100) - date field + # + # For Multi-Value Info Options: + # 1(0x001) - one for multiple + # 2(0x002) - auto-set, + # 4(0x004) - expanded from one + # 8(0x008) - validated + # 16(0x010) - convert to integer from commandline and input files, set to 0 if empty + # 32(0x020) - time field + # 64(0x040) - text field allowing multiple lines + # 128(0x080) - '' allowed for single letter value + # 256(0x100) - date field + # + # The fourth hash values defined retrictions for single letter values + ############################################################################### + self.OPTS = {} + # global initial optional values + self.PGOPT = { + 'ACTS' : 0, # carry current action bits + 'UACTS' : 0, # carry dsarch skip check UD action bits + 'CACT' : '', # current short action name + 'IFCNT' : 0, # 1 to read a single Input File at a time + 'ANAME' : '', # cache the application name if set + 'TABLE' : '', # table name the action is on + 'UID' : 0, # user.uid + 'MSET' : 'SA', # Action for multiple sets + 'WIDTH' : 128, # max column width + 'TXTBIT' : 64, # text field bit (0x1000) allow multiple lines + 'PEMAX' : 12, # max count of reuqest partition errors for auto reprocesses + 'PTMAX' : 24, # max number of partitions for a single request + 'REMAX' : 2, # max count of reuqest errors for auto reprocesses + 'RSMAX' : 100, # max count of gatherxml with options -R -S + 'RCNTL' : None, # placehold for a request control record + 'dcm' : "dcm", + 'sdp' : "sdp", + 'rcm' : "rcm", + 'scm' : "scm", + 'wpg' : "", + 'gatherxml' : "gatherxml", + 'cosconvert' : "cosconvert", + 'emllog' : self.LGWNEM, + 'emlerr' : self.LOGERR|self.EMEROL, + 'emerol' : self.LOGWRN|self.EMEROL, + 'emlsum' : self.LOGWRN|self.EMLSUM, + 'emlsep' : self.LGWNEM|self.SEPLIN, + 'wrnlog' : self.LOGWRN, + 'errlog' : self.LOGERR, + 'extlog' : self.LGEREX, + 'PTYPE' : "CPRV", + 'WDTYP' : "ADNU", + 'HFTYP' : "DS", + 'SDTYP' : "PORWUV", + 'GXTYP' : "DP" + } + # global default parameters + self.params = { + 'ES' : "<=>", + 'AO' : "", + 'DV' : "<:>" + } + self.WTYPE = { + 'A' : "ARCO", + 'D' : "DATA", + 'N' : "NCAR", + 'U' : "UNKNOWN", + } + self.HTYPE = { + 'D' : "DOCUMENT", + 'S' : "SOFTWARE", + 'U' : "UNKNOWN" + } + self.HPATH = { + 'D' : "docs", + 'S' : "software", + 'U' : "help" + } + self.MTYPE = { + 'P' : "PRIMARY", + 'A' : "ARCHIVING", + 'V' : "VERSION", + 'W' : "WORKING", + 'R' : "ORIGINAL", + 'B' : "BACKUP", + 'O' : "OFFSITE", + 'C' : "CHRONOPOLIS", + 'U' : "UNKNOWN" + } + self.STYPE = { + 'O' : "OFFLINE", + 'P' : "PRIMARY", + 'R' : "ORIGINAL", + 'V' : "VERSION", + 'W' : "WORKING", + 'U' : "UNKNOWN" + } + self.BTYPE = { + 'B' : "BACKUPONLY", + 'D' : "BACKDRDATA", + } + + # process and parsing input information + # aname - application name such as 'dsarch', 'dsupdt', and 'dsrqst' + def parsing_input(self, aname): + self.PGLOG['LOGFILE'] = aname + ".log" + self.PGOPT['ANAME'] = aname + self.dssdb_dbname() + argv = sys.argv[1:] + if not argv: self.show_usage(aname) + self.cmdlog("{} {}".format(aname, ' '.join(argv))) + # process command line options to fill option values + option = infile = None + needhelp = 0 + helpopts = {} + for param in argv: + if re.match(r'^(-{0,2}help|-H)$', param, re.I): + if option: helpopts[option] = self.OPTS[option] + needhelp = 1 + continue + ms = re.match(r'^-([a-zA-Z]\w*)$', param) + if ms: # option parameter + param = ms.group(1) + if option and not needhelp and option not in self.params: + val = self.get_default_info(option) + if val is not None: + self.set_option_value(option, val) + else: + self.parameter_error("-" + option, "missval") + option = self.get_option_key(param) + if needhelp: + helpopts[option] = self.OPTS[option] + break + # set mode/action options + if self.OPTS[option][0]&3 == 0: self.set_option_value(option) + elif option: + ms =re.match(r"^\'(.*)\'$", param) + if ms: param = ms.group(1) + self.set_option_value(option, param) + elif self.find_dataset_id(param): + self.set_option_value('DS', param) + else: + option = self.get_option_key(param, 3, 1) + if option: + self.set_option_value(option) + if needhelp: + helpopts[option] = self.OPTS[option] + break + elif op.exists(param): # assume input file + infile = param + else: + self.parameter_error(param) + if needhelp: self.show_usage(aname, helpopts) + if option and option not in self.params: + val = self.get_default_info(option) + if val is not None: + self.set_option_value(option, val) + else: + self.parameter_error("-" + option, "missval") + # check if only an input filename is given on command line following aname + if infile: + if 'IF' in self.params: + self.parameter_error(infile) + else: + self.params['IF'] = [infile] + # process given one or multiple input files to fill option values + if 'IF' in self.params: + self.PGOPT['IFCNT'] = 1 if self.PGOPT['CACT'] == 'AQ' else 0 + if self.OPTS['DS'][0] == 1: + param = self.validate_infile_names(self.params['DS']) if 'DS' in self.params else 0 + else: + param = 1 + self.get_input_info(self.params['IF']) + if not param and 'DS' in self.params: self.validate_infile_names(self.params['DS']) + if not self.PGOPT['ACTS']: self.parameter_error(aname, "missact") # no action enter + if 'DB' in self.params: + dcnt = len(self.params['DB']) + for i in range(dcnt): + if i == 0: + self.PGLOG['DBGLEVEL'] = self.params['DB'][0] + elif i == 1: + self.PGLOG['DBGPATH'] = self.params['DB'][1] + elif i == 2: + self.PGLOG['DBGFILE'] = self.params['DB'][2] + self.pgdbg(self.PGLOG['DBGLEVEL']) + if 'GZ' in self.params: self.PGLOG['GMTZ'] = self.diffgmthour() + if 'BG' in self.params: self.PGLOG['BCKGRND'] = 1 + + # check and get default value for info option, return None if not available + def get_default_info(self, opt): + olist = self.OPTS[opt] + if olist[0]&3 and len(olist) > 3: + odval = olist[3] + if not odval or isinstance(odval, int): + return odval + else: + return odval[0] # return the first char of a default string + return None + + # set output file name handler now + def open_output(self, outfile = None): + if outfile: # result output file + try: + self.OUTPUT = open(outfile, 'w') + except Exception as e: + self.pglog("{}: Error open file to write - {}".format(outfile, str(e)), self.PGOPT['extlog']) + else: # result to STDOUT + self.OUTPUT = sys.stdout + + # return 1 if valid infile names; sys.exit(1) otherwise + def validate_infile_names(self, dsid): + i = 0 + for infile in self.params['IF']: + if not self.validate_one_infile(infile, dsid): return self.FAILURE + i += 1 + if self.PGOPT['IFCNT'] and i >= self.PGOPT['IFCNT']: break + return i + + # validate an input filename against dsid + def validate_one_infile(self, infile, dsid): + ndsid = self.find_dataset_id(infile) + if ndsid == None: + return self.pglog("{}: No dsid identified in Input file name {}!".format(dsid, infile), self.PGOPT['extlog']) + fdsid = self.format_dataset_id(ndsid) + if fdsid != dsid: + return self.pglog("{}: Different dsid {} found in Input file name {}!".format(dsid, fdsid, infile), self.PGOPT['extlog']) + return self.SUCCESS + + # gather input information from input files + def get_input_info(self, infiles, table = None): + i = 0 + for file in infiles: + i += self.process_infile(file, table) + if not self.PGOPT['IFCNT'] and self.PGOPT['CACT'] == 'AQ': self.PGOPT['IFCNT'] = 1 + if self.PGOPT['IFCNT']: break + return i + + # validate and get info from a single input file + def read_one_infile(self, infile): + dsid = self.params['DS'] + del self.params['DS'] + if self.OPTS['DS'][2]&2: self.OPTS['DS'][2] &= ~2 + if 'DS' in self.CMDOPTS: del self.CMDOPTS['DS'] + self.clean_input_values() + self.process_infile(infile) + if 'DS' in self.params: dsid = self.params['DS'] + if dsid: self.validate_one_infile(infile, dsid) + return dsid + + # gather input option values from one input file + # return 0 if nothing retireved if table is not null + def process_infile(self, infile, table = None): + if not op.exists(infile): self.pglog(infile + ": Input file not exists", self.PGOPT['extlog']) + if table: + self.pglog("Gather '{}' information from input file '{}'..." .format(table, infile), self.PGOPT['wrnlog']) + else: + self.pglog("Gather information from input file '{}'...".format(infile), self.PGOPT['wrnlog']) + try: + fd = open(infile, 'r') + except Exception as e: + self.pglog("{}: Error Open input file - {}!".format(infile, str(e)), self.PGOPT['extlog']) + else: + lines = fd.readlines() + fd.close() + opt = None + columns = [] + chktbl = 1 if table else -1 + mpes = r'^(\w+)\s*{}\s*(.*)$'.format(self.params['ES']) + mpao = r'^(\w+)\s*{}'.format(self.params['AO']) + # column count, column index, value count, value index, line index, option-set count, end divider flag + colcnt = colidx = valcnt = validx = linidx = setcnt = enddiv = 0 + for line in lines: + linidx += 1 + if linidx%50000 == 0: + self.pglog("{}: {} lines read".format(infile, linidx), self.PGOPT['wrnlog']) + if 'NT' not in self.params: line = self.pgtrim(line, 2) + if not line: + if opt: self.set_option_value(opt, '', 1, linidx, line, infile) + continue # skip empty lines + if chktbl > 0: + if re.match(r'^\[{}\]$'.format(table), line, re.I): # found entry for table + chktbl = 0 + self.clean_input_values() # clean previously saved input values + continue + else: + ms = re.match(r'^\[(\w+)\]$', line) + if ms: + if chktbl == 0: break # stop at next sub-title + if not self.PGOPT['MSET']: + self.input_error(linidx, line, infile, ms.group(1) + ": Cannt process sub-title") + elif self.PGOPT['CACT'] != self.PGOPT['MSET']: + self.input_error(linidx, line, infile, "Use Action -{} to Set multiple sub-titles".format(self.PGOPT['MSET'])) + break # stop getting info if no table given or a different table + if colcnt == 0: # check single value and action lines first + ms = re.match(mpes, line) + if ms: # one value assignment + key = ms.group(1).strip() + val = ms.group(2) + if val and 'NT' not in self.params: val = val.strip() + opt = self.get_option_key(key, 1, 0, linidx, line, infile, table) + self.set_option_value(opt, val, 0, linidx, line, infile) + if not self.OPTS[opt][2]&self.PGOPT['TXTBIT']: opt = None + setcnt += 1 + continue + ms = re.match(mpao, line) + if ms: # set mode or action option + key = self.get_option_key(ms.group(1).strip(), 4, 0, linidx, line, infile, table) + self.set_option_value(key, '', 0, linidx, line, infile) + setcnt += 1 + continue + # check mutiple value assignment for one or more multi-value options + values = line.split(self.params['DV']) + valcnt = len(values) + if colcnt == 0: + while colcnt < valcnt: + key = values[colcnt].strip() + if not key: break + opt = self.get_option_key(key, 2, 1, linidx, line, infile, table) + if not opt: break + columns.append(opt) + if opt in self.params: del self.params[opt] + colcnt += 1 + if colcnt < valcnt: + if colcnt == (valcnt-1): + enddiv = 1 + else: + self.input_error(linidx, line, infile, "Multi-value Option Name missed for column {}".format(colcnt+1)) + opt = None + continue + elif valcnt == 1: + if re.match(mpes, line): + self.input_error(linidx, line, infile, "Cannot set single value option after Multi-value Options") + elif re.match(mpao, line): + self.input_error(linidx, line, infile, "Cannot set acttion/mode option after Multi-value Options") + if opt: # add to multipe line value + val = values.pop(0) + valcnt -= 1 + if val and 'NT' not in self.params: val = val.strip() + self.set_option_value(opt, val, 1, linidx, line, infile) + setcnt += 1 + if valcnt == 0: continue # continue to check multiple line value + colidx += 1 + opt = None + reduced = 0 + valcnt += colidx + if valcnt > colcnt: + if enddiv: + val = values.pop() + if not val.strip(): + valcnt -= 1 + reduced = 1 + if valcnt > colcnt: + self.input_error(linidx, line, infile, "Too many values({}) provided for {} columns".format(valcnt+colidx, colcnt)) + if values: + for val in values: + opt = columns[colidx] + colidx += 1 + if val and 'NT' not in self.params: val = val.strip() + self.set_option_value(opt, val, 0, linidx, line, infile) + setcnt += 1 + colidx += (reduced-enddiv) + if colidx == colcnt: + colidx = 0 # done with gathering values of a multi-value line + opt = None + elif opt and not self.OPTS[opt][2]&self.PGOPT['TXTBIT']: + colidx += 1 + opt = None + if setcnt > 0: + if colidx: + if colidx < colcnt: + self.input_error(linidx, '', infile, "{} of {} values missed".format(colcnt-colidx, colcnt)) + elif enddiv: + self.input_error(linidx, '', infile, "Miss end divider '{}'".format(self.params['DV'])) + return 1 # read something + else: + if table: self.pglog("No option information found for '{}'".format(table), self.WARNLG) + return 0 # read nothing + + # clean self.params for input option values when set mutiple tables + def clean_input_values(self): + # clean previously saved input values if any + for opt in self.INOPTS: + del self.params[opt] + self.INOPTS = {} + + # build a hash record for add or update of a table record + def build_record(self, flds, pgrec, tname, idx = 0): + record = {} + if not flds: return record + hash = self.TBLHASH[tname] + for key in flds: + if key not in hash: continue + opt = hash[key][0] + field = hash[key][3] if len(hash[key]) == 4 else hash[key][1] + ms = re.search(r'\.(.+)$', field) + if ms: field = ms.group(1) + if opt in self.params: + if self.OPTS[opt][0] == 1: + val = self.params[opt] + else: + if self.OPTS[opt][2]&2 and pgrec and field in pgrec and pgrec[field]: continue + val = self.params[opt][idx] + sval = pgrec[field] if pgrec and field in pgrec else None + if sval is None: + if val == '': val = None + elif isinstance(sval, int): + if isinstance(val, str): val = (int(val) if val else None) # change '' to None for int + if self.pgcmp(sval, val, 1): record[field] = val # record new or changed value + return record + + # set global variable self.PGOPT['UID'] with value of user.uid, fatal if unsuccessful + def set_uid(self, aname): + self.set_email_logact() + if 'LN' not in self.params: + self.params['LN'] = self.PGLOG['CURUID'] + elif self.params['LN'] != self.PGLOG['CURUID']: + self.params['MD'] = 1 # make sure this set if running as another user + if 'NE' not in self.params: self.PGLOG['EMLADDR'] = self.params['LN'] + if 'DM' in self.params and re.match(r'^(start|begin)$', self.params['DM'], re.I): + msg = "'{}' must start Daemon '{} -{}' as '{}'".format(self.PGLOG['CURUID'], aname, self.PGOPT['CACT'], self.params['LN']) + else: + msg = "'{}' runs '{} -{}' as '{}'!".format(self.PGLOG['CURUID'], aname, self.PGOPT['CACT'], self.params['LN']) + self.pglog(msg, self.PGOPT['wrnlog']) + self.set_specialist_environments(self.params['LN']) + if 'LN' not in self.params: self.pglog("Could not get user login name", self.PGOPT['extlog']) + self.validate_dataset() + if self.OPTS[self.PGOPT['CACT']][2] > 0: self.validate_dsowner(aname) + pgrec = self.pgget("dssdb.user", "uid", "logname = '{}' AND until_date IS NULL".format(self.params['LN']), self.PGOPT['extlog']) + if not pgrec: self.pglog("Could not get user.uid for " + self.params['LN'], self.PGOPT['extlog']) + self.PGOPT['UID'] = pgrec['uid'] + self.open_output(self.params['OF'] if 'OF' in self.params else None) + + # set global variable self.PGOPT['UID'] as 0 for a sudo user + def set_sudo_uid(self, aname, uid): + self.set_email_logact() + if self.PGLOG['CURUID'] != uid: + if 'DM' in self.params and re.match(r'^(start|begin)$', self.params['DM'], re.I): + msg = "'{}': must start Daemon '{} -{} as '{}'".format(self.PGLOG['CURUID'], aname, self.params['CACT'], uid) + else: + msg = "'{}': must run '{} -{}' as '{}'".format(self.PGLOG['CURUID'], aname, self.params['CACT'], uid) + self.pglog(msg, self.PGOPT['extlog']) + self.PGOPT['UID'] = 0 + self.params['LN'] = self.PGLOG['CURUID'] + + # set global variable self.PGOPT['UID'] as 0 for root user + def set_root_uid(self, aname): + self.set_email_logact() + if self.PGLOG['CURUID'] != "root": + if 'DM' in self.params and re.match(r'^(start|begin)$', self.params['DM'], re.I): + msg = "'{}': you must start Daemon '{} -{} as 'root'".format(self.PGLOG['CURUID'], aname, self.params['CACT']) + else: + msg = "'{}': you must run '{} -{}' as 'root'".format(self.PGLOG['CURUID'], aname, self.params['CACT']) + self.pglog(msg, self.PGOPT['extlog']) + self.PGOPT['UID'] = 0 + self.params['LN'] = self.PGLOG['CURUID'] + + # set email logging bits + def set_email_logact(self): + if 'NE' in self.params: + self.PGLOG['LOGMASK'] &= ~self.EMLALL # remove all email bits + elif 'SE' in self.params: + self.PGLOG['LOGMASK'] &= ~self.EMLLOG # no normal email + + # validate dataset owner + # return: 0 or fatal if not valid, 1 if valid, -1 if can not be validated + def validate_dsowner(self, aname, dsid = None, logname = None, pgds = 0, logact = 0): + if not logname: logname = (self.params['LN'] if 'LN' in self.params else self.PGLOG['CURUID']) + if logname == self.PGLOG['GDEXUSER']: return 1 + dsids = {} + if dsid: + dsids[dsid] = 1 + elif 'DS' in self.params: + if self.OPTS['DS'][0] == 2: + for dsid in self.params['DS']: + dsids[dsid] = 1 + else: + dsids[self.params['DS']] = 1 + else: + return -1 + if not pgds and 'MD' in self.params: pgds = 1 + if not logact: logact = self.PGOPT['extlog'] + for dsid in dsids: + if not self.pgget("dsowner", "", "dsid = '{}' AND specialist = '{}'".format(dsid, logname), self.PGOPT['extlog']): + if not self.pgget("dssgrp", "", "logname = '{}'".format(logname), self.PGOPT['extlog']): + return self.pglog("'{}' is not DSS Specialist!".format(logname), logact) + elif not pgds: + return self.pglog("'{}' not listed as Specialist of '{}'\nRun '{}' with Option -MD!".format(logname, dsid, aname), logact) + return 1 + + # validate dataset + def validate_dataset(self): + cnt = 1 + if 'DS' in self.params: + if self.OPTS['DS'][0] == 2: + for dsid in self.params['DS']: + cnt = self.pgget("dataset", "", "dsid = '{}'".format(dsid), self.PGOPT['extlog']) + if cnt == 0: break + else: + dsid = self.params['DS'] + cnt = self.pgget("dataset", "", "dsid = '{}'".format(dsid), self.PGOPT['extlog']) + if not cnt: self.pglog(dsid + " not exists in RDADB!", self.PGOPT['extlog']) + + # validate given group indices or group names + def validate_groups(self, parent = 0): + if parent: + gi = 'PI' + gn = 'PN' + else: + gi = 'GI' + gn = 'GN' + if (self.OPTS[gi][2]&8): return # already validated + dcnd = "dsid = '{}'".format(self.params['DS']) + if gi in self.params: + grpcnt = len(self.params[gi]) + i = 0 + while i < grpcnt: + gidx = self.params[gi][i] + if not isinstance(gidx, int) and re.match(r'^(!|<|>|<>)$', gidx): break + i += 1 + if i >= grpcnt: # normal group index given + for i in range(grpcnt): + gidx = self.params[gi][i] + gidx = int(gidx) if gidx else 0 + self.params[gi][i] = gidx + if gidx == 0 or (i > 0 and gidx == self.params[gi][i-1]): continue + if not self.pgget("dsgroup", '', "{} AND gindex = {}".format(dcnd, gidx), self.PGOPT['extlog']): + if i > 0 and parent and self.params['GI']: + j = 0 + while j < i: + if gidx == self.params['GI'][j]: break + j += 1 + if j < i: continue + self.pglog("Group Index {} not in RDADB for {}".format(gidx, self.params['DS']), self.PGOPT['extlog']) + else: # found none-equal condition sign + pgrec = self.pgmget("dsgroup", "DISTINCT gindex", dcnd + self.get_field_condition("gindex", self.params[gi]), self.PGOPT['extlog']) + grpcnt = (len(pgrec['gindex']) if pgrec else 0) + if grpcnt == 0: + self.pglog("No Group matches given Group Index condition for " + self.params['DS'], self.PGOPT['extlog']) + self.params[gi] = pgrec['gindex'] + elif gn in self.params: + self.params[gi] = self.group_id_to_index(self.params[gn]) + self.OPTS[gi][2] |= 8 # set validated flag + + # get group index array from given group IDs + def group_id_to_index(self, grpids): + count = len(grpids) if grpids else 0 + if count == 0: return None + indices = [] + dcnd = "dsid = '{}'".format(self.params['DS']) + i = 0 + while i < count: + gid = grpids[i] + if gid and (re.match(r'^(!|<|>|<>)$', gid) or gid.find('%') > -1): break + i += 1 + if i >= count: # normal group id given + for i in range(count): + gid = grpids[i] + if not gid: + indices.append(0) + elif i and gid == grpids[i-1]: + indices.append(indices[i-1]) + else: + pgrec = self.pgget("dsgroup", "gindex", "{} AND grpid = '{}'".format(dcnd, gid), self.PGOPT['extlog']) + if not pgrec: self.pglog("Group ID {} not in RDADB for {}".format(gid, self.params['DS']), self.PGOPT['extlog']) + indices.append(pgrec['gindex']) + return indices + else: # found wildcard and/or none-equal condition sign + pgrec = self.pgmget("dsgroup", "DISTINCT gindex", dcnd + self.get_field_condition("grpid", grpids, 1), self.PGOPT['extlog']) + count = (len(pgrec['gindex']) if pgrec else 0) + if count == 0: self.pglog("No Group matches given Group ID condition for " + self.params['DS'], self.PGOPT['extlog']) + return pgrec['gindex'] + + # get group ID array from given group indices + def group_index_to_id(self, indices): + count = len(indices) if indices else 0 + if count == 0: return None + grpids = [] + dcnd = "dsid = '{}'".format(self.params['DS']) + i = 0 + while i < count: + gidx = indices[i] + if not isinstance(gidx, int) and re.match(r'^(!|<|>|<>)$', gidx): break + i += 1 + if i >= count: # normal group index given + for i in range(count): + gidx = indices[i] + if not gidx: + grpids.append('') # default value + elif i and gidx == indices[i-1]: + grpids.append(grpids[i-1]) + else: + pgrec = self.pgget("dsgroup", "grpid", "{} AND gindex = {}".format(dcnd, gidx), self.PGOPT['extlog']) + if not pgrec: self.pglog("Group Index {} not in RDADB for {}".format(gidx, self.params['DS']), self.PGOPT['extlog']) + grpids.append(pgrec['grpid']) + return grpids + else: # found none-equal condition sign + pgrec = self.pgmget("dsgroup", "DISTINCT grpid", dcnd + self.get_field_condition("gindex", indices), self.PGOPT['extlog']) + count = (len(pgrec['grpid']) if pgrec else 0) + if count == 0: self.pglog("No Group matches given Group Index condition for " + self.params['DS'], self.PGOPT['extlog']) + return pgrec['grpid'] + + # validate order fields and + # get an array of order fields that are not in given fields + def append_order_fields(self, oflds, flds, tname, excludes = None): + orders = '' + hash = self.TBLHASH[tname] + for ofld in oflds: + ufld = ofld.upper() + if ufld not in hash or excludes and excludes.find(ufld) > -1: continue + if flds and flds.find(ufld) > -1: continue + orders += ofld + return orders + + # validate mutiple values for given fields + def validate_multiple_values(self, tname, count, flds = None): + opts = [] + hash = self.TBLHASH[tname] + if flds: + for fld in flds: + if fld in hash: opts.append(hash[fld][0]) + else: + for fld in hash: + opts.append(hash[fld][0]) + self.validate_multiple_options(count, opts, (1 if tname == 'htarfile' else 0)) + + # validate multiple values for given options + def validate_multiple_options(self, count, opts, remove = 0): + for opt in opts: + if opt not in self.params or self.OPTS[opt][0] != 2: continue # no value given or not multiple value option + cnt = len(self.params[opt]) + if cnt == 1 and count > 1 and self.OPTS[opt][2]&1: + val0 = self.params[opt][0] + self.params[opt] = [val0]*count + self.OPTS[opt][2] |= 4 # expanded + cnt = count + if cnt != count: + if count == 1 and cnt > 1 and self.OPTS[opt][2]&self.PGOPT['TXTBIT']: + self.params[opt][0] = ' '.join(self.params[opt]) + elif remove and cnt == 1 and count > 1: + del self.params[opt] + elif cnt < count: + self.pglog("Multi-value Option {}({}): {} Given and {} needed".format(opt, self.OPTS[opt][1], cnt, count), self.PGOPT['extlog']) + + # get field keys for a RDADB table, include all if !include + def get_field_keys(self, tname, include = None, exclude = None): + fields = '' + hash = self.TBLHASH[tname] + for fld in hash: + if include and include.find(fld) < 0: continue + if exclude and exclude.find(fld) > -1: continue + opt = hash[fld][0] + if opt in self.params: fields += fld + return fields if fields else None + + # get a string for fields of a RDADB table + def get_string_fields(self, flds, tname, include = None, exclude = None): + fields = [] + hash = self.TBLHASH[tname] + for fld in flds: + ufld = fld.upper() # in case + if include and include.find(ufld) < 0: continue + if exclude and exclude.find(ufld) > -1: continue + if ufld not in hash: + self.pglog("Invalid field '{}' to get from '{}'".format(fld, tname), self.PGOPT['extlog']) + elif hash[ufld][0] not in self.OPTS: + self.pglog("Option '{}' is not defined for field '{} - {}'".format(hash[ufld][0], ufld, hash[ufld][1]), self.PGOPT['extlog']) + if len(hash[ufld]) == 4: + fname = "{} {}".format(hash[ufld][3], hash[ufld][1]) + else: + fname = hash[ufld][1] + fields.append(fname) + return ', '.join(fields) + + # get max count for given options + def get_max_count(self, opts): + count = 0 + for opt in opts: + if opt not in self.params: continue + cnt = len(self.params[opt]) + if cnt > count: count = cnt + return count + + # get a string of fields of a RDADB table for sorting + def get_order_string(self, flds, tname, exclude = None): + orders = [] + hash = self.TBLHASH[tname] + for fld in flds: + if fld.islower(): + desc = " DESC" + fld = fld.upper() + else: + desc = "" + if exclude and exclude.find(fld) > -1: continue + orders.append(hash[fld][1] + desc) + return (" ORDER BY " + ', '.join(orders)) if orders else '' + + # get a string for column titles of a given table + def get_string_titles(self, flds, hash, lens): + titles = [] + colcnt = len(flds) + for i in range(colcnt): + fld = flds[i] + if fld not in hash: continue + opt = hash[fld][0] + if opt not in self.OPTS: self.pglog("ERROR: Undefined option " + opt, self.PGOPT['extlog']) + title = self.OPTS[opt][1] + if lens: + if len(title) > lens[i]: title = opt + title = "{:{}}".format(title, lens[i]) + titles.append(title) + return self.params['DV'].join(titles) + self.params['DV'] + + # display error message and exit + def parameter_error(self, p, opt = None, lidx = 0, line = 0, infile = None): + if not opt: + errmsg = "value passed in without leading info option" + elif opt == "continue": + errmsg = "error processing input file on continue Line" + elif opt == 'specified': + errmsg = "option -{}/-{} is specified already".format(p, self.OPTS[p][1]) + elif opt == "mixed": + errmsg = "single-value option mixed with multi-value option" + elif opt == "missact": + errmsg = "No Action Option is specified" + elif opt == "missval": + errmsg = "No value provided following Info Option" + elif opt == 'duplicate': + errmsg = "multiple actions not allowed" + elif opt == "delayed": + errmsg = "delayed Mode option not supported" + elif self.OPTS[opt][0] == 0: + errmsg = "value follows Mode Option -{}/-{}".format(opt, self.OPTS[opt][1]) + elif self.OPTS[opt][0] == 1: + errmsg = "multiple values follow single-value Option -{}/-{}".format(opt, self.OPTS[opt][1]) + elif self.OPTS[opt][0] >= 4: + errmsg = "value follows Action Option -{}/-{}".format(opt, self.OPTS[opt][1]) + else: + errmsg = None + if errmsg: + if lidx: + self.input_error(lidx, line, infile, "{} - {}".format(p, errmsg)) + else: + self.pglog("ERROR: {} - {}".format(p, errmsg), self.PGOPT['extlog']) + + # wrap function to self.pglog() for error in input files + def input_error(self, lidx, line, infile, errmsg): + self.pglog("ERROR at {}({}): {}\n {}".format(infile, lidx, line, errmsg), self.PGOPT['extlog']) + + # wrap function to self.pglog() for error for action + def action_error(self, errmsg, cact = None): + msg = "ERROR" + if self.PGOPT['ANAME']: msg += " " + self.PGOPT['ANAME'] + if not cact: cact = self.PGOPT['CACT'] + if cact: msg += " for Action {} ({})".format(cact, self.OPTS[cact][1]) + if 'DS' in self.params: + if self.OPTS['DS'][0] == 1: + msg += " of " + self.params['DS'] + elif self.OPTS['DS'][0] == 2 and len(self.params['DS']) == 1: + msg += " of " + self.params['DS'][0] + msg += ": " + errmsg + if self.PGLOG['DSCHECK']: self.record_dscheck_error(msg, self.PGOPT['extlog']) + self.pglog(msg, self.PGOPT['extlog']) + + # get the valid option for given parameter by checking if the given option + # name matches either an valid option key (short name) or its long name + # flag: 1 - value key only, 2 - multi-value key only, 3 - action key only, + # 4 - mode&action key only + def get_option_key(self, p, flag = 0, skip = 0, lidx = 0, line = None, infile = None, table = None): + if p is None: p = '' + opt = self.get_short_option(p) + errmsg = None + if opt: + if flag == 1: + if self.OPTS[opt][0]&3 == 0: errmsg = "NOT a Value Option" + elif flag == 2: + if self.OPTS[opt][0]&2 == 0: errmsg = "NOT a Multi-Value Option" + elif flag == 3: + if self.OPTS[opt][0] < 4: + if lidx: + errmsg = "NOT an Action Option" + else: + errmsg = "Miss leading '-' for none action option" + elif flag == 4: + if self.OPTS[opt][0]&3: + errmsg = "NOT a Mode/Action Option" + if errmsg: errmsg = "{}({}) - {}".format(opt, self.OPTS[opt][1], errmsg) + elif not skip: + if p: + errmsg = "-{} - Unknown Option".format(p) + else: + errmsg = "'' - Empty Option Name" + if errmsg: + if lidx: + self.input_error(lidx, line, infile, errmsg) + else: + self.pglog("ERROR: " + errmsg, self.PGOPT['extlog']) + elif opt and (table or self.PGOPT['IFCNT'] and self.OPTS[opt][0] == 2): + self.INOPTS[opt] = 1 + return opt + + # set values to given options, ignore options set in input files if the options + # already set on command line + def set_option_value(self, opt, val = None, cnl = 0, lidx = 0, line = None, infile = None): + if opt in self.CMDOPTS and lidx: # in input file, but given on command line already + if opt not in self.params: self.params[opt] = self.CMDOPTS[opt] + return + if val is None: val = '' + if self.OPTS[opt][0]&3: + if self.OPTS[opt][2]&16: + if not val: + val = 0 + elif re.match(r'^\d+$', val): + val = int(val) + elif val and (opt == 'DS' or opt == 'OD'): + val = self.format_dataset_id(val) + errmsg = None + if not cnl and self.OPTS[opt][0]&3: + if opt in self.params: + if self.OPTS[opt][0] == 2: + if self.OPTS[opt][2]&2: del self.params[opt] # clean auto set values + elif self.params[opt] != val and not self.OPTS[opt][2]&1: + errmsg = "'{}', multiple values not allowed for Single-Value Option".format(val) + if not errmsg and (not self.PGOPT['CACT'] or self.OPTS[self.PGOPT['CACT']][2]): + dstr = self.OPTS[opt][3] if len(self.OPTS[opt]) > 3 else None + if dstr: + vlen = len(val) + ms = re.match(r'^!(\w*)', dstr) + if ms: + dstr = ms.group(1) + if vlen == 1 and dstr.find(val) > -1: errmsg = "{}: character must not be one of '{}'".format(val, str) + elif vlen > 1 or (vlen == 0 and not self.OPTS[opt][2]&128) or (vlen == 1 and dstr.find(val) < 0): + errmsg = "{} single-letter value must be one of '{}'".format(val, dstr) + if not errmsg: + if self.OPTS[opt][0] == 2: # multiple value option + if opt not in self.params: + self.params[opt] = [val] # set the first value + if opt == 'QF' and self.PGOPT['ACTS'] == self.OPTS['DL'][0]: self.OPTS['FS'][3] = 'ANT' + else: + if cnl: + rowidx = len(self.params[opt]) - 1 + if self.params[opt][rowidx]: + if not re.match(r'^(DE|DI|DM|DW)$', opt): + errmsg = "Multi-line value not allowed" + else: + self.params[opt][rowidx] += "\n" + val # multiple line value + else: + self.params[opt][rowidx] = val + else: + self.params[opt].append(val) # add next value + elif self.OPTS[opt][0] == 1: # single value option + if cnl and opt in self.params: + if val: errmsg = "Multi-line value not allowed" + elif self.OPTS[opt][2]&2 and self.pgcmp(self.params[opt], val): + errmsg = "{}: Single-Value Info Option has value '{}' already".format(val, self.params[opt]) + else: + self.params[opt] = val + self.OPTS[opt][2] |= 2 + elif val: + if self.OPTS[opt][0] == 0 and re.match(r'^(Y|N)$', val, re.I): + self.params[opt] = 1 if (val == 'Y' or val == 'y') else 0 + else: + self.parameter_error(val, opt, lidx, line, infile) # no value for flag or action options + elif opt not in self.params: + self.params[opt] = 1 # set flag or action option + if self.OPTS[opt][0] > 2: + if self.PGOPT['ACTS']: self.parameter_error(opt, "duplicate", lidx ,line, infile) # no duplicated action options + self.PGOPT['ACTS'] = self.OPTS[opt][0] # add action bit + self.PGOPT['CACT'] = opt # add action name + if opt == "SB": self.PGOPT['MSET'] = opt + if errmsg: + if lidx: + self.input_error(lidx, line, infile, "{}({}) - {}".format(opt, self.OPTS[opt][1], errmsg)) + else: + self.pglog("ERROR: {}({}) - {}".format(opt, self.OPTS[opt][1], errmsg), self.PGOPT['extlog']) + if not lidx: self.CMDOPTS[opt] = self.params[opt] # record options set on command lines + + # get width for a single row if in column format + def get_row_width(self, pgrec): + slen = len(self.params['DV']) + width = 0 + for key in pgrec: + wd = 0 + for val in pgrec[key]: + if not val: continue + if not isinstance(val, str): val = str(val) + if key == 'note': + vlen = val.find('\n') + 1 + else: + vlen = 0 + if vlen < 1: vlen = len(val) + if vlen > wd: wd = vlen # get max width of each column + # accumulate all column width plus length of delimiter to get row width + if width: width += slen + width += wd + return width + + # get a short option name by searching dict self.OPTS and self.ALIAS + def get_short_option(self, p): + plen = len(p) + if plen == 2: + p = p.upper() + if p in self.OPTS: return p + for opt in self.OPTS: # get main option first + if not self.pgcmp(self.OPTS[opt][1], p, 1): return opt + for opt in self.ALIAS: # then check alias option + for key in self.ALIAS[opt]: + if not self.pgcmp(key, p, 1): return opt + return None + + # print result in column format, with multiple values each row + def print_column_format(self, pgrec, flds, hash, lens, retbuf = 0): + rowcnt = -1 + colcnt = len(flds) + buf = '' + fields = [] + flens = [] + for i in range(colcnt): + fld = flds[i] + if fld in hash: + fld = hash[fld][1] + ms = re.search(r'\.(.+)$', fld) + if ms: fld = ms.group(1) + if fld in pgrec: + fields.append(fld) + flens.append((lens[i] if lens else 0)) + if rowcnt < 0: rowcnt = len(pgrec[fld]) + else: + self.pglog(fld + ": Unkown field name", self.PGOPT['extlog']) + colcnt = len(fields) + for i in range(rowcnt): + offset = 0 + values = [] + for j in range(colcnt): + fld = fields[j] + idx = -1 + val = pgrec[fld][i] + slen = flens[j] + if val is None: + val = '' + elif isinstance(val, str): + idx = val.find("\n") + if idx > 0: + val = "\n" + val + idx = 0 + else: + val = str(val) + if slen: + if idx < 0: + val = "{:{}}".format(val, slen) + else: + val += "\n{:{}}".format(' ', offset) + offset += slen + values.append(val) + line = self.params['DV'].join(values) + self.params['DV'] + "\n" + if retbuf: + buf += line + else: + self.OUTPUT.write(line) + return buf if retbuf else rowcnt + + # print result in row format, with single value on each row + def print_row_format(self, pgrec, flds, hash): + for fld in flds: + if fld not in hash: continue + line = "{}{}".format(self.OPTS[hash[fld][0]][1], self.params['ES']) + field = hash[fld][1] + ms = re.search(r'\.(.+)$', field) + if ms: field = ms.group(1) + if field in pgrec: + value = pgrec[field] + if value is not None: line += str(value) + self.OUTPUT.write(line + "\n") + + # compress/uncompress given files and change the formats accordingly + def compress_files(self, files, formats, count): + if 'UZ' in self.params: + strcmp = 'Uncompress' + actcmp = 0 + else: + strcmp = 'Compress' + actcmp = 1 + fmtcnt = len(formats) + if not fmtcnt: return files # just in case + s = 's' if count > 1 else '' + self.pglog("{}ing {} File{} for {} ...".format(strcmp, count, s, self.params['DS']), self.PGOPT['wrnlog']) + cmpcnt = 0 + for i in range(count): + fmt = formats[i] if(i < fmtcnt and formats[i]) else formats[0] + (ofile, fmt) = self.compress_local_file(files[i], fmt, actcmp, self.PGOPT['extlog']) + if ofile != files[i]: + files[i] = ofile + cmpcnt += 1 + self.pglog("{}/{} Files {}ed for {}".format(cmpcnt, count, strcmp, self.params['DS']) , self.PGOPT['emllog']) + if 'ZD' in self.params: del self.params['ZD'] + if 'UZ' in self.params: del self.params['UZ'] + return files + + # get hash condition + # tname - table name to identify a table hash + # noand - 1 for not add leading 'AND' + def get_hash_condition(self, tname, include = None, exclude = None, noand = 0): + condition = '' + hash = self.TBLHASH[tname] + for key in hash: + if include and include.find(key) < 0: continue + if exclude and exclude.find(key) > -1: continue + opt = hash[key][0] + if opt not in self.params: continue # no option value + flg = hash[key][2] + if flg < 0: # condition is ignore for this option + self.pglog("Condition given per Option -{} (-{}) is ignored".format(opt, self.OPTS[opt][1]), self.PGOPT['errlog']) + continue + fld = hash[key][1] + condition += self.get_field_condition(fld, self.params[opt], flg, noand) + noand = 0 + return condition + + # set default self.params value for given opt empty the value if 'all' is given + def set_default_value(self, opt, dval = None): + flag = self.OPTS[opt][0] + if flag&3 == 0: return # skip if not single&multiple value options + oval = 0 + if opt in self.params: + if flag == 1: + oval = self.params[opt] + else: + count = len(self.params[opt]) + if count == 1: + oval = self.params[opt][0] + elif count > 1: + return # multiple values given already + if oval: + if re.match(r'^all$', oval, re.I): + del self.params[opt] # remove option value for all + return # value given already + if dval: + # set default value + if flag == 1: + self.params[opt] = dval + else: + self.params[opt] = [dval] + + # add/strip COS block for give file name and cosflg if given/not-given cosfile + # return the file size after the convertion + def cos_convert(self, locfile, cosflg, cosfile = None): + if cosfile: + cmd = "cosconvert -{} {} {}".format(cosflg, cosfile, locfile) + else: + cmd = "cosconvert -{} {}".format(cosflg.lower(), locfile) + cosfile = locfile + self.pgsystem(cmd) + info = self.check_local_file(cosfile) + if not info: + return self.pglog("Error - " + cmd, self.PGOPT['errlog']) # should not happen + else: + return info['data_size'] + + # evaluate count of values for given options + def get_option_count(self, opts): + count = 0 + for opt in opts: + if opt in self.params: + cnt = len(self.params[opt]) + if cnt > count: count = cnt + if count > 0: self.validate_multiple_options(count, opts) + return count + + # gather subgroup indices recursively for given condition + # dsid: Dataset Id + # pidx: parent group index + # gtype: group type if not empty, P - public groups only) + # Return: array reference of group indices + def get_all_subgroups(self, dcnd, pidx, gtype = None): + gidxs = [pidx] + gflds = "gindex" + if gtype: gflds += ", grptype" + grecs = self.pgmget("dsgroup", gflds, "{} and pindex = {}".format(dcnd, pidx), self.LGWNEX) + if not grecs: return gidxs + gcnt = len(grecs['gindex']) + for i in range(gcnt): + gidx = grecs['gindex'][i] + if abs(gidx) <= abs(pidx) or gtype and grecs['grptype'][i] != gtype: continue + subs = self.get_all_subgroups(dcnd, gidx, gtype) + gidxs.extend(subs) + return gidxs + + # gather public subgroup indices recursively for given condition. A group index is + # gathered only if there are data files right under it. The pidx is included too + # if file count of it larger then zero. + # dsid: Dataset Id + # pidx: parent group index + # cfld: count field (dwebcnt, nwebcnt, savedcnt) + # pfcnt: file count for parent group index pidx 0 to skip) + # Return: array reference of group indices + def get_data_subgroups(self, dcnd, pidx, cfld, pfcnt = 0): + if not pfcnt: # get file count for the parent group + pfcnt = self.group_file_count(dcnd, pidx, cfld) + if not pfcnt: return None + gflds = "gindex, " + cfld + gcnd = "{} AND pindex = {} AND {} > 0".format(dcnd, pidx, cfld) + grecs = self.pgmget("dsgroup", gflds, gcnd, self.LGWNEX) + if not grecs: return ([pidx] if pfcnt > 0 else None) + gcnt = len(grecs['gindex']) + gidxs = [] + for i in range(gcnt): + gidx = grecs['gindex'][i] + fcnt = grecs[cfld][i] + if fcnt == 0 or abs(gidx) <= abs(pidx): continue + subs = self.get_data_subgroups(dcnd, gidx, cfld, fcnt) + if subs: gidxs.extend(subs) + pfcnt -= fcnt + if pfcnt > 0: gidxs.insert(0, pidx) + return (gidxs if gidxs else None) + + # get group file count for given count field name + def group_file_count(self, cnd, gidx, cfld): + if gidx: + table = "dsgroup" + cnd += " AND gindex = {}".format(gidx) + else: + table = "dataset" + pgrec = self.pgget(table, cfld, cnd) + return (pgrec[cfld] if pgrec else 0) + + # set file format for actions -AM/-AW from given local files + def set_file_format(self, count): + if 'LF' in self.params: + files = self.params['LF'] + else: + return + fmtcnt = 0 + fmts = [None] * count + for i in range(count): + fmt = self.get_file_format(files[i]) + if fmt: + fmtcnt += 1 + fmts[i] = fmt + if fmtcnt: + self.params['AF'] = fmts + self.OPTS['AF'][2] |= 2 + + # get frequency information + @staticmethod + def get_control_frequency(frequency): + val = nf = 0 + unit = None + ms = re.match(r'^(\d+)([YMWDHNS])$', frequency, re.I) + if ms: + val = int(ms.group(1)) + unit = ms.group(2).upper() + else: + ms = re.match(r'^(\d+)M/(\d+)', frequency, re.I) + if ms: + val = int(ms.group(1)) + nf = int(ms.group(2)) + unit = 'M' + if nf < 2 or nf > 10 or (30%nf): val = 0 + if not val: + if nf: + unit = "fraction of month frequency '{}' MUST be (2,3,5,6,10)".format(frequency) + elif unit: + val = "frequency '{}' MUST be larger than 0".format(frequency) + elif re.search(r'/(\d+)$', frequency): + val = "fractional frequency '{}' for month ONLY".format(frequency) + else: + val = "invalid frequency '{}', unit must be (Y,M,W,D,H)".format(frequency) + return (None, unit) + freq = [0]*7 # initialize the frequence list + uidx = {'Y' : 0, 'D' : 2, 'H' : 3, 'N' : 4, 'S' : 5} + if unit == 'M': + freq[1] = val + if nf: freq[6] = nf # number of fractions in a month + elif unit == 'W': + freq[2] = 7 * val + elif unit in uidx: + freq[uidx[unit]] = val + return (freq, unit) + + # check if valid data time for given pindex + def valid_data_time(self, pgrec, cstr = None, logact = 0): + if pgrec['pindex'] and pgrec['datatime']: + (freq, unit) = self.get_control_frequency(pgrec['frequency']) + if not freq: + if cstr: self.pglog("{}: {}".format(cstr, unit), logact) + return self.FAILURE + dtime = self.adddatetime(pgrec['datatime'], freq[0], freq[1], freq[2], freq[3], freq[4], freq[5], freq[6]) + if self.pgget("dcupdt", "", "cindex = {} AND datatime < '{}'".format(pgrec['pindex'], dtime), self.PGOPT['extlog']): + if cstr: self.pglog("{}: MUST be processed After Control Index {}".format(cstr, pgrec['pindex']), logact) + return self.FAILURE + return self.SUCCESS + + # publish filelists for given datasets + def publish_dataset_filelist(self, dsids): + for dsid in dsids: + self.pgsystem("publish_filelist " + dsid, self.PGOPT['wrnlog'], 7) + + # get the current active version index for given dsid + def get_version_index(self, dsid, logact = 0): + pgrec = self.pgget("dsvrsn", "vindex", "dsid = '{}' AND status = 'A'".format(dsid), logact) + return (pgrec['vindex'] if pgrec else 0) + + # append given format (data or archive) sfmt to format string sformat + @staticmethod + def append_format_string(sformat, sfmt, chkend = 0): + mp = r'(^|\.){}$' if chkend else r'(^|\.){}(\.|$)' + if sfmt: + if not sformat: + sformat = sfmt + else: + for fmt in re.split(r'\.', sfmt): + if not re.search(mp.format(fmt), sformat, re.I): sformat += '.' + fmt + return sformat + + # get request type string or shared info + @staticmethod + def request_type(rtype, idx = 0): + RTYPE = { + 'C' : ["Customized Data", 0], + 'D' : ["CDP Link", 0], + 'M' : ["Delayed Mode Data", 1], + 'N' : ["NCARDAP(THREDDS) Data Server", 0], + 'Q' : ["Database Query", 0], + 'R' : ["Realtime Data", 0], + 'S' : ["Subset Data", 0], + 'T' : ["Subset/Format-Conversion Data", 0], + 'F' : ["Format Conversion Data", 1], # web + 'A' : ["Archive Format Conversion", 1], # web + 'P' : ["Plot Chart", 0], + 'U' : ["Data", 0] + } + if rtype not in RTYPE: rtype = 'U' + return RTYPE[rtype][idx] + + # email notice of for user + def send_request_email_notice(self, pgrqst, errmsg, fcount, rstat, readyfile = None, pgpart = None): + pgcntl = self.PGOPT['RCNTL'] + rhome = self.params['WH'] if 'WH' in self.params and self.params['WH'] else self.PGLOG['RQSTHOME'] + if errmsg: + if pgpart: + if self.cache_partition_email_error(pgpart['rindex'], errmsg): return rstat + enote = "email_part_error" + else: + enote = "email_error" + elif fcount == 0: + if pgcntl and pgcntl['empty_out'] == 'Y': + enote = "email_empty" + else: + errmsg = "NO output data generated" + if pgpart: + if self.cache_partition_email_error(pgpart['rindex'], errmsg): return rstat + enote = "email_part_error" + else: + enote = "email_error" + elif 'EN' in self.params and self.params['EN'][0]: + enote = self.params['EN'][0] + elif pgrqst['enotice']: + enote = pgrqst['enotice'] + elif pgcntl and pgcntl['enotice']: + enote = pgcntl['enotice'] + elif pgrqst['globus_transfer'] == 'Y' and pgrqst['task_id']: + enote = "email_notice_globus" + else: + enote = "email_" + ("command" if pgrqst['location'] else "notice") + if enote[0] not in '/.': enote = "{}/notices/{}".format(rhome, enote) + finfo = self.check_local_file(enote, 128) + if not finfo: + if finfo is None: + ferror = "file not exists" + else: + ferror = "Error check file" + else: + ef = open(enote, 'r') # open email notice file + ferror = None + if ferror: + if errmsg: + self.pglog("{}: {}\nCannot email error to {}@ucar.edu: {}".format(enote, ferror, self.PGLOG['CURUID'], errmsg), + (self.PGOPT['errlog'] if rstat else self.PGOPT['extlog'])) + return "E" + else: + errmsg = self.pglog("{}: {}\nCannot email notice to {}".format(enote, ferror, pgrqst['email']), self.PGOPT['errlog']|self.RETMSG) + enote = rhome + "/notices/email_error" + ef = open(enote, 'r') + rstat = 'E' + ebuf = '' + ebuf += ef.read() + ef.close() + einfo = {} + einfo['HOSTNAME'] = self.PGLOG['HOSTNAME'] + einfo['DSID'] = pgrqst['dsid'] + einfo['DSSURL'] = self.PGLOG['DSSURL'] + if pgrqst['location']: + einfo['WHOME'] = pgrqst['location'] + else: + einfo['WHOME'] = self.PGLOG['RQSTURL'] + einfo['SENDER'] = pgrqst['specialist'] + "@ucar.edu" + einfo['RECEIVER'] = pgrqst['email'] + einfo['RTYPE'] = self.request_type(pgrqst['rqsttype']) + self.add_carbon_copy() # clean carbon copy email in case not empty + exclude = (einfo['SENDER'] if errmsg else einfo['RECEIVER']) + if not errmsg and pgcntl and pgcntl['ccemail']: + self.add_carbon_copy(pgcntl['ccemail'], 1, exclude, pgrqst['specialist']) + if self.PGLOG['CURUID'] != pgrqst['specialist'] and self.PGLOG['CURUID'] != self.PGLOG['GDEXUSER']: + self.add_carbon_copy(self.PGLOG['CURUID'], 1, exclude) + if 'CC' in self.params: self.add_carbon_copy(self.params['CC'], 0, exclude) + einfo['CCD'] = self.PGLOG['CCDADDR'] + einfo['RINDEX'] = str(pgrqst['rindex']) + einfo['RQSTID'] = pgrqst['rqstid'] + pgrec = self.pgget("dataset", "title", "dsid = '{}'".format(pgrqst['dsid']), self.PGOPT['extlog']) + einfo['DSTITLE'] = pgrec['title'] if pgrec and pgrec['title'] else '' + einfo['SUBJECT'] = '' + if errmsg: + einfo['ERRMSG'] = self.get_error_command(int(time.time()), self.PGOPT['errlog']) + errmsg + einfo['SUBJECT'] = "Error " + if pgpart: + einfo['PARTITION'] = " partition" + einfo['PTIDX'] = "(PTIDX{})".format(pgpart['pindex']) + einfo['SUBJECT'] += "Process Partitions of " + else: + einfo['PARTITION'] = einfo['PTIDX'] = '' + einfo['SUBJECT'] += "Build " + einfo['SUBJECT'] += "{} Rqst{} from {}".format(einfo['RTYPE'], pgrqst['rindex'], pgrqst['dsid']) + else: + if fcount == 0: + einfo['SUBJECT'] += "NO Output:" + else: + einfo['SUBJECT'] += "Completed:" + einfo['DAYS'] = str(self.PGOPT['VP']) + pgrec = self.pgget("dssgrp", "lstname, fstname, phoneno", + "logname = '{}'".format(self.PGLOG['CURUID']), self.PGOPT['extlog']) + if pgrec: + einfo['SPECIALIST'] = "{} {}".format(pgrec['fstname'], pgrec['lstname']) + einfo['PHONENO'] = pgrec['phoneno'] + einfo['SUBJECT'] += f" {pgrqst['dsid']} {einfo['RTYPE']} request {pgrqst['rindex']}" + if pgrqst['note']: + einfo['RNOTE'] = "\nRequest Detail:\n{}\n".format(pgrqst['note']) + elif fcount > 0 and pgrqst['rinfo']: + einfo['RNOTE'] = "\nRequest Detail:\n{}\n".format(pgrqst['rinfo']) + else: + einfo['RNOTE'] = "" + if pgrqst['globus_transfer'] == 'Y' and pgrqst['task_id']: + einfo['GLOBUS_TASK_URL'] = "https://app.globus.org/activity/" + pgrqst['task_id'] + for ekey in einfo: + if ekey == 'CCD' and not einfo['CCD']: + mp = r'Cc:\s*\s*' + rep = '' + else: + mp = r'<{}>'.format(ekey) + rep = einfo[ekey] + if rep is None: + self.pglog("{}.{}: None ekey value for reuqest email".format(pgrqst['rindex'], ekey), + self.PGOPT['wrnlog']|self.FRCLOG) + rep = '' + ebuf = re.sub(mp, rep, ebuf) + if self.PGLOG['DSCHECK'] and not pgpart: + tbl = "dscheck" + cnd = "cindex = {}".format(self.PGLOG['DSCHECK']['cindex']) + else: + tbl = "dsrqst" + cnd = "rindex = {}".format(pgrqst['rindex']) + if self.send_customized_email(f"{tbl}.{cnd}", ebuf, 0): + if errmsg: + self.pglog("Error Email sent to {} for {}.{}:\n{}".format(einfo['SENDER'], tbl, cnd, errmsg), self.PGOPT['errlog']) + readyfile = None + else: + self.pglog("{}Email sent to {} for {}.{}\nSubset: {}".format(("Customized " if pgrqst['enotice'] else ""), einfo['RECEIVER'], tbl, cnd, einfo['SUBJECT']), + self.PGOPT['wrnlog']|self.FRCLOG) + else: + if not self.cache_customized_email(tbl, "einfo", cnd, ebuf, 0): return 'E' + if errmsg: + self.pglog("Error Email {} cached to {}.einfo for {}:\n{}".format(einfo['SENDER'], tbl, cnd, errmsg), self.PGOPT['errlog']) + readyfile = None + else: + self.pglog("{}Email {} cached to {}.einfo for {}\nSubset: {}".format(("Customized " if pgrqst['enotice'] else ""), einfo['RECEIVER'], tbl, cnd, einfo['SUBJECT']), + self.PGOPT['wrnlog']|self.FRCLOG) + if readyfile: + rf = open(readyfile, 'w') + rf.write(ebuf) + rf.close() + self.set_local_mode(readyfile, 1, self.PGLOG['FILEMODE']) + return rstat + + # cache partition process error to existing email buffer + def cache_partition_email_error(self, ridx, errmsg): + pkey = "" + pgrec = self.pgget("dsrqst", 'einfo', "rindex = {}".format(ridx), self.PGOPT['extlog']) + if not (pgrec and pgrec['einfo'] and pgrec['einfo'].find(pkey) > -1): return 0 + errmsg = self.get_error_command(int(time.time()), self.PGOPT['errlog']) + ("{}\n{}".format(errmsg, pkey)) + pgrec['einfo'] = re.sub(pkey, errmsg, pgrec['einfo']) + return self.pgupdt("dsrqst", pgrec, "rindex = {}".format(ridx), self.PGOPT['extlog']) diff --git a/src/rda_python_common/pg_pass.py b/src/rda_python_common/pg_pass.py new file mode 100644 index 0000000..3ae7dbd --- /dev/null +++ b/src/rda_python_common/pg_pass.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: pg_pass +# Author: Zaihua Ji, zji@ucar.edu +# Date: 2025-10-27 +# 2025-12-02 convert to class PgPassword +# Purpose: python script to retrieve passwords for postgrsql login to connect a +# gdex database from inside an python application +# +# Github: https://github.com/NCAR/rda-python-common.git +# +################################################################################## + +import sys +import re +import hvac +from .pg_dbi import PgDBI + +class PgPassword(PgDBI): + + def __init__(self): + super().__init__() # initialize parent class + self.DBFLDS = { + 'd' : 'dbname', + 'c' : 'scname', + 'h' : 'dbhost', + 'p' : 'dbport', + 'u' : 'lnname' + } + self.DBINFO = { + 'dbname' : "", + 'scname' : "", + 'lnname' : "", + 'dbhost' : "", + 'dbport' : 5432 + } + self.dbopt = False + self.password = '' + + # read in comman line parameters + def read_parameters(self): + argv = sys.argv[1:] + opt = None + dohelp = True + for arg in argv: + if re.match(r'^-\w+$', arg): + opt = arg[1:] + elif opt: + if opt == 'l': + self.PGDBI['BAOURL'] = arg + elif opt == 'k': + self.PGDBI['BAOTOKEN'] = arg + elif opt in self.DBFLDS: + self.dbopt = True + self.DBINFO[self.DBFLDS[opt]] = arg + else: + self.pglog(arg + ": Unknown option", self.LGEREX) + dohelp = False + else: + self.pglog(arg + ": Value provided without option", self.LGEREX) + if dohelp: + print("Usage: pg_pass [-l OpenBaoURL] [-k TokenName] [-d DBNAME] \\") + print(" [-c SCHEMA] [-u USName] [-h DBHOST] [-p DBPORT]") + print(" -l OpenBao URL to retrieve passwords") + print(" -k OpenBao Token Name to retrieve passwords") + print(" -d PostgreSQL Database Name") + print(" -c PostgreSQL Schema Name") + print(" -u PostgreSQL Login User Name") + print(" -h PostgreSQL Server Host Name") + print(" -p PostgreSQL Port Number") + sys.exit(0) + + # get the pgpassword + def read_pgpassword(self): + if self.dbopt: + self.default_scinfo(self.DBINFO['dbname'], self.DBINFO['scname'], self.DBINFO['dbhost'], + self.DBINFO['lnname'], None, self.DBINFO['dbport']) + self.password = self.get_baopassword() + if not self.password: self.password = self.get_pg_pass() + +# main function to excecute this script +def main(): + pgpass = PgPassword() + pgpass.read_parameters() + pgpass.read_pgpassword() + print(pgpass.password) + sys.exit(0) + +# call main() to start program +if __name__ == "__main__": main() diff --git a/src/rda_python_common/pg_sig.py b/src/rda_python_common/pg_sig.py new file mode 100644 index 0000000..827f5f9 --- /dev/null +++ b/src/rda_python_common/pg_sig.py @@ -0,0 +1,879 @@ +# +############################################################################### +# +# Title : pg_sig.py +# +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/05/2020 +# 2025-01-10 transferred to package rda_python_common from +# https://github.com/NCAR/rda-shared-libraries.git +# 2025-11-20 convert to class PgSIG +# Purpose : python library module for start and control daemon process +# +# Github : https://github.com/NCAR/rda-python-common.git +# +############################################################################### +# +import os +import re +import sys +import errno +import signal +import time +from contextlib import contextmanager +from .pg_dbi import PgDBI + +class PgSIG(PgDBI): + + def __init__(self): + super().__init__() # initialize parent class + self.VUSERS = [] # allow users to start this daemon + self.CPIDS = {} # allow upto 'mproc' processes at one time for daemon + self.CBIDS = {} # allow upto 'bproc' background processes at one time for each child + self.SDUMP = { + 'DEF' : '/dev/null', + 'ERR' : '', + 'OUT' : '' + } + self.PGSIG = { + 'QUIT' : 0, # 1 if QUIT signal received, quit server if no child + 'MPROC' : 1, # default number of multiple processes + 'BPROC' : 1, # default number of multiple background processes + 'ETIME' : 20, # default error waiting time (in seconds) + 'WTIME' : 120, # default waiting time (in seconds) + 'DTIME' : 600, # the daemon record refresh time (in seconds) + 'RTIME' : 2400, # the web rda config unlocking and unconfigured system down waiting time (in seconds) + 'CTIME' : 4800, # the lock cleaning & configued system down waiting time (in seconds) + 'PPID' : -1, # 1 - server, (> 1) - child, 0 - non-daemon mode + 'PID' : 0, # current process ID + 'DNAME' : '', # daemon name + 'DSTR' : '', # string for daemon with user login name + 'MTIME' : 0, # maximum daemon running time in seconds, 0 for unlimited + 'STIME' : 0, # time the daemon is started + 'STRTM' : '', # string format of 'STIME' + } + + # add users for starting this daemon + def add_vusers(self, user = None, mores = None): + if not user: + self.VUSERS = [] # clean all vusers + else: + self.VUSERS.append(user) + if mores: self.VUSERS.extend(mores) + + # valid user for starting this daemon + def check_vuser(self, user, aname = None): + if user and self.VUSERS: + valid = 0; + for vuser in self.VUSERS: + if user == vuser: + valid = 1; + break + if valid == 0: + vuser = ', '.join(self.VUSERS) + self.pglog("{}: must be '{}' to run '{}' in Daemon mode".format(user, vuser, aname), self.LGEREX) + + # turn this process into a daemon + # aname - application name, or daemon name + # uname - user login name to started the application + # mproc - upper limit of muiltiple child processes + # wtime - waiting time (in seconds) for next process for the daemon + # logon - turn on the logging if true + # bproc - multiple background processes if > 1 + # mtime - maximum running time for the daemon if provided + def start_daemon(self, aname, uname, mproc = 1, wtime = 120, logon = 0, bproc = 1, mtime = 0): + dstr = "Daemon '{}'{} on {}".format(aname, (" By {}".format(uname) if uname else ''), self.PGLOG['HOSTNAME']) + pid = self.check_daemon(aname, uname) + if pid: + self.pglog("***************** WARNNING **************************\n" + + "** {} is running as PID={}\n".format(dstr, pid) + + "** You need stop it before starting a new one!\n" + + "*****************************************************" , self.WARNLG) + self.pglog("{} is already running as PID={}".format(dstr, pid), self.FRCLOG|self.MSGLOG) + sys.exit(0) + if mproc > 1: self.PGSIG['MPROC'] = mproc + if bproc > 1: self.PGSIG['BPROC'] = bproc + self.PGSIG['WTIME'] = self.get_wait_time(wtime, 120, "Polling Wait Time") + self.PGSIG['MTIME'] = self.get_wait_time(mtime, 0, "Maximum Running Time") + pid = self.process_fork(dstr) + cpid = pid if pid > 0 else os.getpid() + msg = "PID={},PL={},WI={}".format(cpid, self.PGSIG['MPROC'], self.PGSIG['WTIME']) + if self.PGSIG['MTIME']: msg += ",MT={}".format(self.PGSIG['MTIME']) + logmsg = "{}({}) started".format(dstr, msg) + if logon: logmsg += " With Logging On" + if pid > 0: + self.pglog(logmsg, self.WARNLG) + sys.exit(0) + os.setsid() + os.umask(0) + # setup to catch signals in daemon only + signal.signal(signal.SIGCHLD, self.clean_dead_child) + signal.signal(signal.SIGQUIT, self.signal_catch) + signal.signal(signal.SIGUSR1, self.signal_catch) + signal.signal(signal.SIGUSR2, self.signal_catch) + self.PGSIG['DSTR'] = dstr + self.PGSIG['DNAME'] = aname + self.PGSIG['STIME'] = int(time.time()) + self.PGSIG['STRTM'] = self.current_datetime(self.PGSIG['STIME']) + self.PGSIG['PPID'] = 1 + self.PGSIG['PID'] = cpid + sys.stdin = open(self.SDUMP['DEF']) + self.cmdlog("{} By {}".format(logmsg, self.PGSIG['STRTM'])) + if logon: + self.PGLOG['LOGMASK'] &= ~(self.WARNLG|self.EMLLOG) # turn off warn/email in daemon + self.set_dump() + else: + self.PGLOG['LOGMASK'] &= ~(self.LGWNEM) # turn off log/warn/email in daemon + self.set_dump(self.SDUMP['DEF']) + self.PGLOG['BCKGRND'] = 1 # make sure the background flag is always on + self.pgdisconnect(1) # disconnect database in daemon + + # set dump output file + def set_dump(self, default = None): + errdump = self.get_environment("ERRDUMP", default) + outdump = self.get_environment("OUTDUMP", default) + if not errdump: + if not self.PGLOG['ERRFILE']: + self.PGLOG['ERRFILE'] = re.sub(r'\.log$', '.err', self.PGLOG['LOGFILE'], 1) + errdump = "{}/{}".format(self.PGLOG['LOGPATH'], self.PGLOG['ERRFILE']) + if errdump != self.SDUMP['ERR']: + sys.stderr = open(errdump, 'a') + self.SDUMP['ERR'] = errdump + if not outdump: outdump = "{}/{}".format(self.PGLOG['LOGPATH'], self.PGLOG['LOGFILE']) + if outdump != self.SDUMP['OUT']: + sys.stdout = open(outdump, 'a') + self.SDUMP['OUT'] = outdump + + # stop daemon and log the ending info + def stop_daemon(self, msg): + msg = " with " + msg if msg else '' + self.PGLOG['LOGMASK'] |= self.MSGLOG # turn on logging before daemon stops + self.pglog("{} Started at {}, Stopped gracefully{} by {}".format(self.PGSIG['DSTR'], self.PGSIG['STRTM'], msg, self.current_datetime()), self.LOGWRN) + + # check if a daemon is running already + # aname - application name for the daemon + # uname - user login name who started the daemon + # return the process id if yes and 0 if not + def check_daemon(self, aname, uname = None): + if uname: + self.check_vuser(uname, aname) + pcmd = "ps -u {} -f | grep {} | grep ' 1 '".format(uname, aname) + mp = r"^\s*{}\s+(\d+)\s+1\s+".format(uname) + else: + pcmd = "ps -C {} -f | grep ' 1 '".format(aname) + mp = r"^\s*\w+\s+(\d+)\s+1\s+" + buf = self.pgsystem(pcmd, self.LOGWRN, 20+1024) + if buf: + cpid = os.getpid() + lines = buf.split('\n') + for line in lines: + ms = re.match(mp, line) + pid = int(ms.group(1)) if ms else 0 + if pid > 0 and pid != cpid: return pid + return 0 + + # check if an application is running already; other than the current processs + # aname - application name + # uname - user login name who started the application + # argv - argument string + # return the process id if yes and 0 if not + def check_application(self, aname, uname = None, sargv = None): + if uname: + self.check_vuser(uname, aname) + pcmd = "ps -u {} -f | grep {} | grep -v ' grep '".format(uname, aname) + mp = r"^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname) + else: + pcmd = "ps -C {} -f".format(aname) + mp = r"^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname) + buf = self.pgsystem(pcmd, self.LOGWRN, 20+1024) + if not buf: return 0 + cpids = [os.getpid(), os.getppid()] + pids = [] + ppids = [] + astrs = [] + lines = buf.split('\n') + for line in lines: + ms = re.match(mp, line) + if not ms: continue + pid = int(ms.group(1)) + ppid = int(ms.group(2)) + if pid in cpids: + if ppid not in cpids: cpids.append(ppid) + continue + pids.append(pid) + ppids.append(ppid) + if sargv: astrs.append(ms.group(3)) + pcnt = len(pids) + if not pcnt: return 0 + i = 0 + while i < pcnt: + pid = pids[i] + if pid and pid in cpids: + pids[i] = 0 + ppid = ppids[i] + if ppid not in cpids: cpids.append(ppid) + i = 0 + else: + i += 1 + for i in range(pcnt): + pid = pids[i] + if pid and (not sargv or sargv.find(astrs[i]) > -1): return pid + return 0 + + # validate if the current process is a single one. Quit if not + def validate_single_process(self, aname, uname = None, sargv = None, logact = None): + if logact is None: logact = self.LOGWRN + pid = self.check_application(aname, uname, sargv) + if pid: + msg = aname + if sargv: msg += ' ' + sargv + msg += ": already running as PID={} on {}".format(pid, self.PGLOG['HOSTNAME']) + if uname: msg += ' By ' + uname + self.pglog(msg + ', Quit Now', logact) + sys.exit(0) + + # check how many processes are running for an application already + # aname - application name + # uname - user login name who started the application + # argv - argument string + # return the the number of processes (exclude the child one) + def check_multiple_application(self, aname, uname = None, sargv = None): + if uname: + self.check_vuser(uname, aname) + pcmd = "ps -u {} -f | grep {} | grep -v ' grep '".format(uname, aname) + mp = r"^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname) + else: + pcmd = "ps -C {} -f".format(aname) + mp = r"^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname) + buf = self.pgsystem(pcmd, self.LOGWRN, 20+1024) + if not buf: return 0 + dpids = [os.getpid(), os.getppid()] + pids = [] + ppids = [] + astrs = [] + lines = buf.split('\n') + for line in lines: + ms = re.match(mp, line) + if not ms: continue + pid = int(ms.group(1)) + ppid = int(ms.group(2)) + if pid in dpids: + if ppid > 1 and ppid not in dpids: dpids.append(ppid) + continue + elif ppid in pids: + if pid not in dpids: dpids.append(pid) + continue + pids.append(pid) + ppids.append(ppid) + if sargv: astrs.append(ms.group(3)) + pcnt = len(pids) + if not pcnt: return 0 + i = 0 + while i < pcnt: + pid = pids[i] + ppid = ppids[i] + if pid: + if pid in dpids: + if ppid > 1 and ppid not in dpids: dpids.append(ppid) + i = pids[i] = 0 + continue + elif ppid in pids: + if pid not in dpids: dpids.append(pid) + i = pids[i] = 0 + continue + i += 1 + ccnt = 0 + for i in range(pcnt): + if pids[i] and (not sargv or sargv.find(astrs[i]) > -1): ccnt += 1 + return ccnt + + # validate if the running processes reach the limit for the given app; Quit if yes + def validate_multiple_process(self, aname, plimit, uname = None, sargv = None, logact = None): + if logact is None: logact = self.LOGWRN + pcnt = self.check_multiple_application(aname, uname, sargv) + if pcnt >= plimit: + msg = aname + if sargv: msg += ' ' + sargv + msg += ": already running in {} processes on {}".format(pcnt, self.PGLOG['HOSTNAME']) + if uname: msg += ' By ' + uname + self.pglog(msg + ', Quit Now', logact) + sys.exit(0) + + # fork process + # return the defined result from call of fork + def process_fork(self, dstr): + for i in range(10): # try 10 times + try: + pid = os.fork() + return pid + except OSError as e: + if e.errno == errno.EAGAIN: + os.sleep(5) + else: + self.pglog("{}: {}".format(dstr, str(e)), self.LGEREX) + break + self.pglog("{}: too many tries (10) for os.fork()".format(dstr), self.LGEREX) + + # process the predefined signals + def signal_catch(self, signum, frame): + if self.PGSIG['PPID'] == 1: + tmp = 'Server' + elif self.PGSIG['PPID'] > 1: + tmp = 'Child' + else: + tmp = 'Process' + if signum == signal.SIGQUIT: + sname = "<{} - signal.SIGQUIT - Quit>".format(signum) + elif signum == signal.SIGUSR1: + linfo = 'Logging On' + if self.PGLOG['LOGMASK']&self.MSGLOG: linfo += ' & Debugging On' + sname = "<{} - signal.SIGUSR1 - {}>".format(signum, linfo) + elif signum == signal.SIGUSR2: + if self.PGLOG['DBGLEVEL']: + linfo = 'Logging off & Debugging Off' + else: + linfo = 'Logging Off' + sname = "<{} - signal.SIGUSR2 - {}>".format(signum, linfo) + else: + sname = "<{} - Signal Not Supports Yet>".format(signum) + dumpon = 1 if self.SDUMP['OUT'] and self.SDUMP['OUT'] != self.SDUMP['DEF'] else 0 + if not dumpon: self.set_dump() + self.pglog("catches {} in {} {}".format(sname, tmp, self.PGSIG['DSTR']), self.LOGWRN|self.FRCLOG) + if signum == signal.SIGUSR1: + if self.PGLOG['LOGMASK']&self.MSGLOG: + self.PGLOG['DBGLEVEL'] = 1000 # turn logon twice + else: + self.PGLOG['LOGMASK'] |= self.MSGLOG # turn on logging + elif signum == signal.SIGUSR2: + self.PGLOG['LOGMASK'] &= ~(self.MSGLOG) # turn off logging + self.PGLOG['DBGLEVEL'] = 0 # turn off debugging + self.set_dump(self.SDUMP['DEF']) + else: + if not dumpon: self.set_dump(self.SDUMP['DEF']) + if signum == signal.SIGQUIT: self.PGSIG['QUIT'] = 1 + if self.PGSIG['PPID'] <= 1 and len(self.CPIDS) > 0: # passing signal to child processes + for pid in self.CPIDS: self.kill_process(pid, signum) + + # wrapper function to call os.kill() logging caught error based on logact + # return self.SUCCESS is success; PgLog.FAILURE if not + def kill_process(self, pid, signum, logact = 0): + try: + os.kill(pid, signum) + except Exception as e: + ret = self.FAILURE + if logact: + if type(signum) is int: + sigstr = str(signum) + else: + sigstr = "{}-{}".format(signum.name, int(signum)) + self.pglog("Error pass signal {} to pid {}: {}".format(sigstr, pid, str(e)), logact) + else: + ret = self.SUCCESS + return ret + + # wait child process to finish + def clean_dead_child(self, signum, frame): + live = 0 + while True: + try: + dpid, status = os.waitpid(-1, os.WNOHANG) + except ChildProcessError as e: + break # no child process any more + except Exception as e: + self.pglog("Error check child process: {}".format(str(e)), self.ERRLOG) + break + else: + if dpid == 0: + if live > 0: break # wait twice if a process is still a live + live += 1 + elif self.PGSIG['PPID'] < 2: + if dpid in self.CPIDS: del self.CPIDS[dpid] + + # send signal to daemon and exit + def signal_daemon(self, sname, aname, uname): + dstr = "Daemon '{}'{} on {}".format(aname, ((" By " + uname) if uname else ""), self.PGLOG['HOSTNAME']) + pid = self.check_daemon(aname, uname) + if pid > 0: + dstr += " (PID = {})".format(pid) + if re.match(r'^(quit|stop)$', sname, re.I): + signum = signal.SIGQUIT + msg = "QUIT" + elif re.match(r'^(logon|on)$', sname, re.I): + signum = signal.SIGUSR1 + msg = "Logging ON" + elif re.match(r'^(logoff|off)$', sname, re.I): + signum = signal.SIGUSR2 + msg = "Logging OFF" + self.PGLOG['DBGLEVEL'] = 0 + else: + self.pglog("{}: invalid Signal for {}".format(sname, dstr), self.LGEREX) + + if self.kill_process(pid, signum, self.LOGERR) == self.SUCCESS: + self.pglog("{}: signal sent to {}".format(msg, dstr), self.LOGWRN|self.FRCLOG) + else: + self.pglog(dstr + ": not running currently", self.LOGWRN|self.FRCLOG) + sys.exit(0) + + # start a time child to run the command in case hanging + def timeout_command(self, cmd, logact = None, cmdopt = 4): + if logact is None: logact = self.LOGWRN + if logact&self.EXITLG: logact &= ~self.EXITLG + self.pglog("> " + cmd, logact) + if self.start_timeout_child(cmd, logact): + self.pgsystem(cmd, logact, cmdopt) + sys.exit(0) + + # start a timeout child process + # return: 1 - in child, 0 - in parent + def start_timeout_child(self, msg, logact = None): + if logact is None: logact = self.LOGWRN + pid = self.process_fork(msg) + if pid == 0: # in child + signal.signal(signal.SIGQUIT, self.signal_catch) # catch quit signal only + self.PGSIG['PPID'] = self.PGSIG['PID'] + self.PGSIG['PID'] = pid = os.getpid() + self.cmdlog("Timeout child to " + msg, time.time(), 0) + self.pgdisconnect(0) # disconnect database in child + return 1 + # in parent + for i in range(self.PGLOG['TIMEOUT']): + if not self.check_process(pid): break + sys.sleep(2) + if self.check_process(self, pid): + msg += ": timeout({} secs) in CPID {}".format(2*self.PGLOG['TIMEOUT'], pid) + pids = self.kill_children(pid, 0) + sys.sleep(6) + if self.kill_process(pid, signal.SIGKILL, self.LOGERR): pids.insert(0, pid) + if pids: msg += "\nProcess({}) Killed".format(','.join(map(str, pids))) + self.pglog(msg, logact) + return 0 + + # kill children recursively start from the deepest and return the pids got killed + def kill_children(self, pid, logact = None): + if logact is None: logact = self.LOGWRN + buf = self.pgsystem("ps --ppid {} -o pid".format(pid), logact, 20) + pids = [] + if buf: + lines = buf.split('\n') + for line in lines: + ms = re.match(r'^\s*(\d+)', line) + if not ms: continue + cid = int(ms.group(1)) + if not self.check_process(cid): continue + cids = self.kill_children(cid, logact) + if cids: pids = cids + pids + if self.kill_process(cid, signal.SIGKILL, logact) == self.SUCCESS: pids.insert(0, cid) + if logact and len(pids): self.pglog("Process({}) Killed".format(','.join(map(str, pids))), logact) + return pids + + # start a child process + # pname - unique process name + def start_child(self, pname, logact = None, dowait = 0): + if logact is None: logact = self.LOGWRN + if self.PGSIG['MPROC'] < 2: return 1 # no need child process + if logact&self.EXITLG: logact &= ~self.EXITLG + if logact&self.MSGLOG: logact |= self.FRCLOG + if self.PGSIG['QUIT']: + return self.pglog("{} is in QUIT mode, cannot start CPID for {}".format(self.PGSIG['DSTR'], pname), logact) + elif len(self.CPIDS) >= self.PGSIG['MPROC']: + i = 0 + while True: + pcnt = self.check_child(None, 0, logact) + if pcnt < self.PGSIG['MPROC']: break + if dowait: + self.show_wait_message(i, "{}-{}: wait any {} child processes".format(self.PGSIG['DSTR'], pname, pcnt), logact, dowait) + i += 1 + else: + return self.pglog("{}-{}: {} child processes already running at {}".format(self.PGSIG['DSTR'], pname, pcnt, self.current_datetime()), logact) + if self.check_child(pname): return -1 # process is running already + pid = self.process_fork(self.PGSIG['DSTR']) + if pid: + self.CPIDS[pid] = pname # record the child process id + self.pglog("{}: starts CPID {} for {}".format(self.PGSIG['DSTR'], pid, pname)) + else: + signal.signal(signal.SIGQUIT, signal.SIG_DFL) # turn off catch QUIT signal in child + self.PGLOG['LOGMASK'] &= ~self.WARNLG # turn off warn in child + self.PGSIG['PPID'] = self.PGSIG['PID'] + self.PGSIG['PID'] = pid = os.getpid() + self.PGSIG['MPROC'] = 1 # 1 in child process + self.CBIDS = {} # empty backgroud proces info in case not + self.PGSIG['DSTR'] += ": CPID {} for {}".format(pid, pname) + self.cmdlog("CPID {} for {}".format(pid, pname)) + self.pgdisconnect(0) # disconnect database in child + return 1 # child started successfully + + # get child process id for given pname + def pname2cpid(self, pname): + for cpid in self.CPIDS: + if self.CPIDS[cpid] == pname: return cpid + return 0 + + # check one or all child processes if they are still running + # pname - unique process name if given + # pid - check this specified process id if given + # dowait - 0 no wait, 1 wait all done, -1 wait only when all children are running + # return the number of running processes if dowait == 0 or 1 + # return the number of none-running processes if dowait == -1 + def check_child(self, pname, pid = 0, logact = None, dowait = 0): + if logact is None: logact = self.LOGWRN + if self.PGSIG['MPROC'] < 2: return 0 # no child process + if logact&self.EXITLG: logact &= ~self.EXITLG + ccnt = i = 0 + if dowait < 0: ccnt = 1 if (pid or pname) else self.PGSIG['MPROC'] + while True: + pcnt = 0 + if not pid and pname: pid = self.pname2cpid(pname) + if pid: + if self.check_process(pid): # process is not done yet + if pname: + self.pglog("{}({}): Child still running".format(pname, pid), logact) + else: + self.pglog("{}: Child still running".format(pid), logact) + pcnt = 1 + elif pid in self.CPIDS: + del self.CPIDS[pid] # clean the saved info for the process + elif not pname: + cpids = list(self.CPIDS) + for cpid in cpids: + if self.check_process(cpid): # process is not done yet + pcnt += 1 + elif cpid in self.CPIDS: + del self.CPIDS[cpid] + if pcnt == 0 or dowait == 0 or pcnt < ccnt: break + self.show_wait_message(i, "{}: wait {}/{} child processes".format(self.PGSIG['DSTR'], pcnt, self.PGSIG['MPROC']), logact, dowait) + i += 1 + return (ccnt - pcnt) if ccnt else pcnt + + # start this process in none daemon mode + # aname - application name, or daemon name + # cact - short action name + # uname - user login name to started the application + # mproc - upper limit of muiltiple child processes + # wtime - waiting time (in seconds) for next process + def start_none_daemon(self, aname, cact = None, uname = None, mproc = 1, wtime = 120, logon = 1, bproc = 1): + dstr = aname + if cact: dstr += " for Action " + cact + if uname: + dstr += " By " + uname + self.check_vuser(uname, aname) + signal.signal(signal.SIGQUIT, self.signal_catch) # catch quit signal only + signal.signal(signal.SIGCHLD, self.clean_dead_child) + self.PGSIG['DSTR'] = dstr + self.PGSIG['DNAME'] = aname + self.PGSIG['PPID'] = 0 + self.PGSIG['PID'] = os.getpid() + self.PGSIG['MPROC'] = mproc + self.PGSIG['BPROC'] = bproc + self.PGLOG['CMDTIME'] = self.PGSIG['WTIME'] = self.get_wait_time(wtime, 120, "Polling Wait Time") + if self.PGSIG['MPROC'] > 1: + self.cmdlog("starts non-daemon {}(ML={},WI={})".format(aname, self.PGSIG['MPROC'], self.PGSIG['WTIME'])) + if not logon: self.PGLOG['LOGMASK'] &= ~self.MSGLOG # turn off message logging + + # check one process id other than the current one if it is still running + # pid - specified process id + # pmsg - process message if given + def check_process(self, pid): + buf = self.pgsystem("ps -p {} -o pid".format(pid), self.LGWNEX, 20) + if buf: + mp = r'^\s*{}$'.format(pid) + lines = buf.split('\n') + for line in lines: + if re.match(mp, line): return 1 + return 0 + + # check a process id on give host + def check_host_pid(self, host, pid, pmsg = None, logact = None): + if logact is None: logact = self.LOGWRN + cmd = 'rdaps' + if host: cmd += " -h " + host + cmd += " -p {}".format(pid) + buf = self.pgsystem(cmd, logact, 276) # 4+16+256 + if not buf: return (-1 if self.PGLOG['SYSERR'] else 0) + if pmsg: self.pglog(pmsg, logact&(~self.EXITLG)) + return 1 + + # check one process id on a given host name if it is still running, with default timeout + # pid - specified process id + # ppid - specified parent process id + # uname - user login name who started the daemon + # host - host name the pid supposed to be running on + # aname - application name + # pmsg - process message if given + # return 1 if process is steal live, 0 died already, -1 error checking + def check_host_process(self, host, pid, ppid = 0, uname = None, aname = None, pmsg = None, logact = None): + if logact is None: logact = self.LOGWRN + cmd = "rdaps" + if host: cmd += " -h " + host + if pid: cmd += " -p {}".format(pid) + if ppid: cmd += " -P {}".format(ppid) + if uname: cmd += " -u " + uname + if aname: cmd += " -a " + aname + buf = self.pgsystem(cmd, logact, 276) # 4+16+256 + if not buf: return (-1 if self.PGLOG['SYSERR'] else 0) + if pmsg: self.pglog(pmsg, logact&(~self.EXITLG)) + return 1 + + # get a single pbs status record via qstat + def get_pbs_info(self, qopts, multiple = 0, logact = 0, chkcnt = 1): + stat = {} + loop = 0 + buf = None + while loop < chkcnt: + buf = self.pgsystem("qstat -n -w {}".format(qopts), logact, 16) + if buf: break + loop += 1 + time.sleep(6) + if not buf: return stat + chkt = chkd = 1 + lines = buf.split('\n') + for line in lines: + if chkt: + if re.match(r'^Job ID', line): + line = re.sub(r'^Job ID', 'JobID', line, 1) + ckeys = re.split(r'\s+', self.pgtrim(line)) + ckeys[1] = 'UserName' + ckeys[3] = 'JobName' + ckeys[7] = 'Reqd' + ckeys[7] + ckeys[8] = 'Reqd' + ckeys[7] + ckeys[9] = 'State' + ckeys[10] = 'Elap' + ckeys[7] + ckeys.append('Node') + kcnt = len(ckeys) + if multiple: + for i in range(kcnt): + stat[ckeys[i]] = [] + chkt = 0 + elif chkd: + if re.match(r'^-----', line): chkd = 0 + else: + vals = re.split(r'\s+', self.pgtrim(line)) + vcnt = len(vals) + if vcnt == 1: + if multiple: + stat[ckeys[kcnt-1]].append(vals[0]) + else: + stat[ckeys[kcnt-1]] = vals[0] + break + elif vcnt > 1: + ms = re.match(r'^(\d+)', vals[0]) + if ms: vals[0] = ms.group(1) + for i in range(vcnt): + if multiple: + stat[ckeys[i]].append(vals[i]) + else: + stat[ckeys[i]] = vals[i] + if vcnt == kcnt: break + return stat + + # check status of a pbs batch id + # bid - specified batch id + # return hash of batch status, 0 if cannot check any more + def check_pbs_status(self, bid, logact = None): + if logact is None: logact = self.LOGWRN + stat = {} + buf = self.pgsystem("qhist -w -j {}".format(bid), logact, 20) + if not buf: return stat + chkt = 1 + lines = buf.split('\n') + for line in lines: + if chkt: + if re.match(r'^Job', line): + line = re.sub(r'^Job ID', 'JobID', line, 1) + line = re.sub(r'Finish Time', 'FinishTime', line, 1) + line = re.sub(r'Req Mem', 'ReqMem', line, 1) + line = re.sub(r'Used Mem\(GB\)', 'UsedMem(GB)', line, 1) + line = re.sub(r'Avg CPU \(%\)', 'AvgCPU(%)', line, 1) + line = re.sub(r'Elapsed \(h\)', 'WallTime(h)', line, 1) + line = re.sub(r'Job Name', 'JobName', line, 1) + ckeys = re.split(r'\s+', self.pgtrim(line)) + ckeys[1] = 'UserName' + kcnt = len(ckeys) + chkt = 0 + else: + vals = re.split(r'\s+', self.pgtrim(line)) + for i in range(kcnt): + stat[ckeys[i]] = vals[i] + break + return stat + + # check if a pbs batch id is live + # bid - specified batch id + # return 1 if process is steal live, 0 died already or error checking + def check_pbs_process(self, bid, pmsg = None, logact = None): + if logact is None: logact = self.LOGWRN + stat = self.get_pbs_info(bid, 0, logact) + ret = -1 + if stat: + ms = re.match(r'^(B|R|Q|S|H|W|X)$', stat['State']) + if ms: + if pmsg: pmsg += ", STATE='{}' and returns 1".format(ms.group(1)) + ret = 1 + else: + if pmsg: pmsg += ", STATE='{}' and returns 0".format(stat['State']) + ret = 0 + elif pmsg: + pmsg += ", Process Not Exists and returns -1" + if pmsg: self.pglog(pmsg, logact&~self.EXITLG) + return ret + + # get wait time + def get_wait_time(self, wtime, default, tmsg): + if not wtime: wtime = default # use default time + if type(wtime) is int: return wtime + if re.match(r'^(\d*)$', wtime): return int(wtime) + ms = re.match(r'^(\d*)([DHMS])$', wtime, re.I) + if ms: + ret = int(ms.group(1)) + unit = ms.group(2) + else: + self.pglog("{}: '{}' NOT in (D,H,M,S)".format(wtime, tmsg), self.LGEREX) + if unit != 'S': + ret *= 60 # seconds in a minute + if unit != 'M': + ret *= 60 # minutes in an hour + if unit != 'H': + ret *= 24 # hours in a day + return ret # in seconds + + # start a background process and record its id; check self.pgsystem() in self.pm for + # valid cmdopt values + def start_background(self, cmd, logact = None, cmdopt = 5, dowait = 0): + if logact is None: logact = self.LOGWRN + if self.PGSIG['BPROC'] < 2: return self.pgsystem(cmd, logact, cmdopt) # no background + act = logact&(~self.EXITLG) + if act&self.MSGLOG: act |= self.FRCLOG # make sure background calls always logged + if len(self.CBIDS) >= self.PGSIG['BPROC']: + i = 0 + while True: + bcnt = self.check_background(None, 0, act) + if bcnt < self.PGSIG['BPROC']: break + if dowait: + self.show_wait_message(i, "{}-{}: wait any {} background calls".format(self.PGSIG['DSTR'], cmd, bcnt), act, dowait) + i += 1 + else: + return self.pglog("{}-{}: {} background calls already at {}".format(self.PGSIG['DSTR'], cmd, bcnt, self.current_datetime()), act) + cmdlog = (act if cmdopt&1 else self.WARNLG) + if cmdopt&8: + self.cmdlog("starts '{}'".format(cmd), None, cmdlog) + else: + self.pglog("{}({})-{} >{} &".format(self.PGLOG['HOSTNAME'], os.getpid(), self.current_datetime(), cmd), cmdlog) + bckcmd = cmd + if cmdopt&2: + bckcmd += " >> {}/{}".format(self.PGLOG['LOGPATH'], self.PGLOG['LOGFILE']) + if cmdopt&4: + if not self.PGLOG['ERRFILE']: + self.PGLOG['ERRFILE'] = re.sub(r'\.log$', '.err', self.PGLOG['LOGFILE'], 1) + bckcmd += " 2>> {}/{}".format(self.PGLOG['LOGPATH'], self.PGLOG['ERRFILE']) + bckcmd += " &" + os.system(bckcmd) + return self.record_background(cmd, logact) + + # get background process id for given bcmd + def bcmd2cbid(self, bcmd): + for cbid in self.CBIDS: + if self.CBIDS[cbid] == bcmd: return cbid + return 0 + + # check one or all child processes if they are still running + # bid - check this specified background process id if given + # return the number of processes are still running + def check_background(self, bcmd, bid = 0, logact = None, dowait = 0): + if logact is None: logact = self.LOGWRN + if self.PGSIG['BPROC'] < 2: return 0 # no background process + if logact&self.EXITLG: logact &= ~self.EXITLG + if not bid and bcmd: bid = self.bcmd2cbid(bcmd) + bcnt = i = 0 + while True: + if bid: + if self.check_process(bid): # process is not done yet + if bcmd: + self.pglog("{}({}): Background process still running".format(bcmd, bid), logact) + else: + self.pglog("{}: Background process still running".format(bid), logact) + bcnt = 1 + elif bid in self.CBIDS: + del self.CBIDS[bid] # clean the saved info for the process + elif not bcmd: + for bid in self.CBIDS: + if self.check_process(bid): # process is not done yet + bcnt += 1 + else: + del self.CBIDS[bid] + if not (bcnt and dowait): break + self.show_wait_message(i, "{}: wait {}/{} background processes".format(self.PGSIG['DSTR'], bcnt, self.PGSIG['MPROC']), logact, dowait) + i += 1 + bcnt = 0 + return bcnt + + # check and record process id for background command; return 1 if success full; + # 0 otherwise; -1 if done already + def record_background(self, bcmd, logact = None): + if logact is None: logact = self.LOGWRN + ms = re.match(r'^(\S+)', bcmd) + if ms: + aname = ms.group(1) + else: + aname = bcmd + mp = r"^\s*(\S+)\s+(\d+)\s+1\s+.*{}(.*)$".format(aname) + pc = "ps -u {},{} -f | grep ' 1 ' | grep {}".format(self.PGLOG['CURUID'], self.PGLOG['GDEXUSER'], aname) + for i in range(2): + buf = self.pgsystem(pc, logact, 20+1024) + if buf: + lines = buf.split('\n') + for line in lines: + ms = re.match(mp, line) + if not ms: continue + (uid, sbid, acmd) = ms.groups() + bid = int(sbid) + if bid in self.CBIDS: return -1 + if uid == self.PGLOG['GDEXUSER']: + acmd = re.sub(r'^\.(pl|py)\s+', '', acmd, 1) + if re.match(r'^{}{}'.format(aname, acmd), bcmd): continue + self.CBIDS[bid] = bcmd + return 1 + time.sleep(2) + return 0 + + # sleep for given period for the daemon, stops if maximum running time reached + def sleep_daemon(self, wtime = 0, mtime = None): + if not wtime: wtime = self.PGSIG['WTIME'] + if mtime is None: mtime = self.PGSIG['MTIME'] + if mtime > 0: + rtime = int(time.time()) - self.PGSIG['STIME'] + if rtime >= mtime: + self.PGSIG['QUIT'] = 1 + wtime = 0 + if wtime: time.sleep(wtime) + return wtime + + # show wait message every dintv and then sleep for PGSIG['WTIME'] + def show_wait_message(self, loop, msg, logact = None, dowait = 0): + if logact is None: logact = self.LOGWRN + if loop > 0 and (loop%30) == 0: + self.pglog("{} at {}".format(msg, self.current_datetime()), logact) + if dowait: time.sleep(self.PGSIG['WTIME']) + + # register a time out function to raise a time out error + @contextmanager + def pgtimeout(self, seconds = 0, logact = 0): + if not seconds: seconds = self.PGLOG['TIMEOUT'] + signal.signal(signal.SIGALRM, self.raise_pgtimeout) + signal.alarm(seconds) + try: + yield + except TimeoutError as e: + pass + finally: + signal.signal(signal.SIGALRM, signal.SIG_IGN) + + # raise a timeout Error + @staticmethod + def raise_pgtimeout(signum, frame): + raise TimeoutError + + # Add a timeout block. + def timeout_func(self): + with self.pgtimeout(1): + print('entering block') + time.sleep(10) + print('This should never get printed because the line before timed out') diff --git a/src/rda_python_common/pg_split.py b/src/rda_python_common/pg_split.py new file mode 100644 index 0000000..1aa715e --- /dev/null +++ b/src/rda_python_common/pg_split.py @@ -0,0 +1,260 @@ +# +############################################################################### +# +# Title : pg_split.py -- PostgreSQL DataBase Interface foe table wfile +# Author : Zaihua Ji, zji@ucar.edu +# Date : 09/010/2024 +# 2025-01-10 transferred to package rda_python_common from +# https://github.com/NCAR/rda-shared-libraries.git +# 2025-12-01 convert to class PgSplit +# Purpose : Python library module to handle query and manipulate table wfile +# +# Github : https://github.com/NCAR/rda-python-common.git +# +############################################################################### + +import os +import re +from os import path as op +from .pg_dbi import PgDBI +from .pg_util import PgUtil + +class PgSplit(PgDBI): + + def __init__(self): + super().__init__() # initialize parent class + + # compare wfile records between tables wfile and wfile_dNNNNNN, + # and return the records need to be added, modified and deleted + @staticmethod + def compare_wfile(wfrecs, dsrecs): + flds = dsrecs.keys() + flen = len(flds) + arecs = dict(zip(flds, [[]]*flen)) + mrecs = {} + drecs = [] + wfcnt = len(wfrecs['wid']) + dscnt = len(dsrecs['wid']) + pi = pj = -1 + i = j = 0 + while i < wfcnt and j < dscnt: + if i > pi: + wfrec = PgUtil.onerecord(wfrecs, i) + wwid = wfrec['wid'] + pi = i + if j > pj: + dsrec = PgUtil.onerecord(dsrecs, j) + dwid = dsrec['wid'] + pj = j + if wwid == dwid: + mrec = PgSplit.compare_one_record(flds, wfrec, dsrec) + if mrec: mrecs[wwid] = mrec + i += 1 + j += 1 + elif wwid > dwid: + drecs.append(dwid) + j += 1 + else: + for fld in flds: + arecs[fld].append(wfrec[fld]) + i += 1 + if i < wfcnt: + for fld in flds: + arecs[fld].extend(wfrecs[fld][i:wfcnt]) + elif j < dscnt: + drecs.extend(dsrecs['wid'][j:dscnt]) + if len(arecs['wid']) == 0: arecs = {} + return (arecs, mrecs, drecs) + + # Compare column values and return the new one; empty if the same + @staticmethod + def compare_one_record(flds, wfrec, dsrec): + mrec = {} + for fld in flds: + if wfrec[fld] != dsrec[fld]: mrec[fld] = wfrec[fld] + return mrec + + # convert wfile records to wfile_dsid records + @staticmethod + def wfile2wdsid(wfrecs, wids = None): + dsrecs = {} + if wfrecs: + for fld in wfrecs: + if fld == 'dsid': continue + dsrecs[fld] = wfrecs[fld] + if wids: dsrecs['wid'] = wids + return dsrecs + + # trim wfile records + @staticmethod + def trim_wfile_fields(wfrecs): + records = {} + if 'wfile' in wfrecs: records['wfile'] = wfrecs['wfile'] + if 'dsid' in wfrecs: records['dsid'] = wfrecs['dsid'] + return records + + # check the condition string, and add dsid if needed + @staticmethod + def get_dsid_condition(dsid, condition): + if condition: + if re.search(r'(^|.| )(wid|dsid)\s*=', condition): + return condition + else: + dscnd = "wfile.dsid = '{}' ".format(dsid) + if not re.match(r'^\s*(ORDER|GROUP|HAVING|OFFSET|LIMIT)\s', condition, re.I): dscnd += 'AND ' + return dscnd + condition # no where clause, append directly + else: + return "wfile.dsid = '{}'".format(dsid) + + # insert one record into wfile and/or wfile_dsid + def pgadd_wfile(self, dsid, wfrec, logact = None, getid = None): + if logact is None: logact = self.LOGERR + record = {'wfile' : wfrec['wfile'], + 'dsid' : (wfrec['dsid'] if 'dsid' in wfrec else dsid)} + wret = self.pgadd('wfile', record, logact, 'wid') + if wret: + record = self.wfile2wdsid(wfrec, wret) + self.pgadd('wfile_' + dsid, record, logact|self.ADDTBL) + if logact&self.AUTOID or getid: + return wret + else: + return 1 if wret else 0 + + # insert multiple records into wfile and/or wfile_dsid + def pgmadd_wfile(self, dsid, wfrecs, logact = None, getid = None): + if logact is None: logact = self.LOGERR + records = {'wfile' : wfrecs['wfile'], + 'dsid' : (wfrecs['dsid'] if 'dsid' in wfrecs else [dsid]*len(wfrecs['wfile']))} + wret = self.pgmadd('wfile', records, logact, 'wid') + wcnt = wret if isinstance(wret, int) else len(wret) + if wcnt: + records = self.wfile2wdsid(wfrecs, wret) + self.pgmadd('wfile_' + dsid, records, logact|self.ADDTBL) + if logact&self.AUTOID or getid: + return wret + else: + return wcnt + + # update one or multiple rows in wfile and/or wfile_dsid + # exclude dsid in condition + def pgupdt_wfile(self, dsid, wfrec, condition, logact = None): + if logact is None: logact = self.LOGERR + record = self.trim_wfile_fields(wfrec) + if record: + wret = self.pgupdt('wfile', record, self.get_dsid_condition(dsid, condition), logact) + else: + wret = 1 + if wret: + record = self.wfile2wdsid(wfrec) + if record: wret = self.pgupdt("wfile_" + dsid, record, condition, logact|self.ADDTBL) + return wret + + # update one row in wfile and/or wfile_dsid with dsid change + # exclude dsid in condition + def pgupdt_wfile_dsid(self, dsid, odsid, wfrec, wid, logact = None): + if logact is None: logact = self.LOGERR + record = self.trim_wfile_fields(wfrec) + cnd = 'wid = {}'.format(wid) + if record: + wret = self.pgupdt('wfile', record, cnd, logact) + else: + wret = 1 + if wret: + record = self.wfile2wdsid(wfrec) + tname = 'wfile_' + dsid + doupdt = True + if odsid and odsid != dsid: + oname = 'wfile_' + odsid + pgrec = self.pgget(oname, '*', cnd, logact|self.ADDTBL) + if pgrec: + for fld in record: + pgrec[fld] = record[fld] + wret = self.pgadd(tname, pgrec, logact|self.ADDTBL) + if wret: self.pgdel(oname, cnd, logact) + doupdt = False + if doupdt and record: + wret = self.pgupdt(tname, record, cnd, logact|self.ADDTBL) + return wret + + # delete one or multiple rows in wfile and/or wfile_dsid, and add the record(s) into wfile_delete + # exclude dsid in conidtion + def pgdel_wfile(self, dsid, condition, logact = None): + if logact is None: logact = self.LOGERR + pgrecs = self.pgmget_wfile(dsid, '*', condition, logact|self.ADDTBL) + wret = self.pgdel('wfile', self.get_dsid_condition(dsid, condition), logact) + if wret: self.pgdel("wfile_" + dsid, condition, logact) + if wret and pgrecs: self.pgmadd('wfile_delete', pgrecs, logact) + return wret + + # delete one or multiple rows in sfile, and add the record(s) into sfile_delete + def pgdel_sfile(self, condition, logact = None): + if logact is None: logact = self.LOGERR + pgrecs = self.pgmget('sfile', '*', condition, logact) + sret = self.pgdel('sfile', condition, logact) + if sret and pgrecs: self.pgmadd('sfile_delete', pgrecs, logact) + return sret + + # update one or multiple rows in wfile and/or wfile_dsid for multiple dsid + # exclude dsid in condition + def pgupdt_wfile_dsids(self, dsid, dsids, brec, bcnd, logact = None): + if logact is None: logact = self.LOGERR + record = self.trim_wfile_fields(brec) + if record: + wret = self.pgupdt("wfile", record, bcnd, logact) + else: + wret = 1 + if wret: + record = self.wfile2wdsid(brec) + if record: + wret = 0 + dids = [dsid] + if dsids: dids.extend(dsids.split(',')) + for did in dids: + wret += self.pgupdt("wfile_" + did, record, bcnd, logact|self.ADDTBL) + return wret + + # get one record from wfile or wfile_dsid + # exclude dsid in fields and condition + def pgget_wfile(self, dsid, fields, condition, logact = None): + if logact is None: logact = self.LOGERR + tname = "wfile_" + dsid + flds = fields.replace('wfile.', tname + '.') + cnd = condition.replace('wfile.', tname + '.') + record = self.pgget(tname, flds, cnd, logact|self.ADDTBL) + if record and flds == '*': record['dsid'] = dsid + return record + + # get one record from wfile or wfile_dsid joing other tables + # exclude dsid in fields and condition + def pgget_wfile_join(self, dsid, tjoin, fields, condition, logact = None): + if logact is None: logact = self.LOGERR + tname = "wfile_" + dsid + flds = fields.replace('wfile.', tname + '.') + jname = tname + ' ' + tjoin.replace('wfile.', tname + '.') + cnd = condition.replace('wfile.', tname + '.') + record = self.pgget(jname, flds, cnd, logact|self.ADDTBL) + if record and flds == '*': record['dsid'] = dsid + return record + + # get multiple records from wfile or wfile_dsid + # exclude dsid in fields and condition + def pgmget_wfile(self, dsid, fields, condition, logact = None): + if logact is None: logact = self.LOGERR + tname = "wfile_" + dsid + flds = fields.replace('wfile.', tname + '.') + cnd = condition.replace('wfile.', tname + '.') + records = self.pgmget(tname, flds, cnd, logact|self.ADDTBL) + if records and flds == '*': records['dsid'] = [dsid]*len(records['wid']) + return records + + # get multiple records from wfile or wfile_dsid joining other tables + # exclude dsid in fields and condition + def pgmget_wfile_join(self, dsid, tjoin, fields, condition, logact = None): + if logact is None: logact = self.LOGERR + tname = "wfile_" + dsid + flds = fields.replace('wfile.', tname + '.') + jname = tname + ' ' + tjoin.replace('wfile.', tname + '.') + cnd = condition.replace('wfile.', tname + '.') + records = self.pgmget(jname, flds, cnd, logact|self.ADDTBL) + if records and flds == '*': records['dsid'] = [dsid]*len(records['wid']) + return records diff --git a/src/rda_python_common/pg_util.py b/src/rda_python_common/pg_util.py new file mode 100644 index 0000000..a674c1f --- /dev/null +++ b/src/rda_python_common/pg_util.py @@ -0,0 +1,1534 @@ +# +############################################################################### +# +# Title : pg_util.py -- module for misc utilities. +# Author : Zaihua Ji, zji@ucar.edu +# Date : 07/27/2020 +# 2025-01-10 transferred to package rda_python_common from +# https://github.com/NCAR/rda-shared-libraries.git +# 2025-11-20 convert to class PgUtil +# Purpose : python library module for global misc utilities +# +# Github : https://github.com/NCAR/rda-python-common.git +# +############################################################################### +# +import os +import re +import time +import datetime +import calendar +import glob +from os import path as op +from .pg_log import PgLOG + +class PgUtil(PgLOG): + + def __init__(self): + super().__init__() # initialize parent class + self.DATEFMTS = { + 'C' : '(CC|C)', # century + 'Y' : '(YYYY|YY00|YYY|YY|YEAR|YR|Y)', # YYY means decade + 'Q' : '(QQ|Q)', # quarter + 'M' : '(Month|Mon|MM|M)', # numeric or string month + 'W' : '(Week|Www|W)', # string or numeric weedday + 'D' : '(DDD|DD|D)', # days in year or month + 'H' : '(HHH|HH|H)', # hours in month or day + 'N' : '(NNNN|NN|N)', # minutes in day or hour + 'S' : '(SSSS|SS|S)' # seconds in hour or minute + } + self.MONTHS = [ + "january", "february", "march", "april", "may", "june", + "july", "august", "september", "october", "november", "december" + ] + self.MNS = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"] + self.WDAYS = ["sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"] + self.WDS = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"] + self.MDAYS = [365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + + # dt: optional given date in format of "YYYY-MM-DD" + # return weekday: 0 - Sunday, 1 - Monday, ..., 6 - Saturday + def get_weekday(self, date = None): + if date is None: + ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime() + else: + ct = time.strptime(str(date), "%Y-%m-%d") + return (ct[6]+1)%7 + + # mn: given month string like "Jan" or "January", or numeric number 1 to 12 + # Return: numeric Month if not fmt (default); three-charater or full month names for given fmt + def get_month(self, mn, fmt = None): + if not isinstance(mn, int): + if re.match(r'^\d+$', mn): + mn = int(mn) + else: + for m in range(12): + if re.match(mn, self.MONTHS[m], re.I): + mn = m + 1 + break + if fmt and mn > 0 and mn < 13: + slen = len(fmt) + if slen == 2: + smn = "{:02}".format(mn) + elif re.match(r'^mon', fmt, re.I): + smn = self.MNS[mn-1] if slen == 3 else self.MONTHS[mn-1] + if re.match(r'^Mon', fmt): + smn = smn.capitalize() + elif re.match(r'^MON', fmt): + smn = smn.upper() + else: + smn = str(mn) + return smn + else: + return mn + + # wday: given weekday string like "Sun" or "Sunday", or numeric number 0 to 6 + # Return: numeric Weekday if !fmt (default); three-charater or full week name for given fmt + def get_wday(self, wday, fmt = None): + if not isinstance(wday, int): + if re.match(r'^\d+$', wday): + wday = int(wday) + else: + for w in range(7): + if re.match(wday, self.WDAYS[w], re.I): + wday = w + break + if fmt and wday >= 0 and wday <= 6: + slen = len(fmt) + if slen == 4: + swday = self.WDAYS[w] + if re.match(r'^We', fmt): + swday = swday.capitalize() + elif re.match(r'^WE', fmt): + swday = swday.upper() + elif slen == 3: + swday = self.WDS[wday] + if re.match(r'^Ww', fmt): + swday = swday.capitalize() + elif re.match(r'^WW', fmt): + swday = swday.upper() + else: + swday = str(wday) + return swday + else: + return wday + + # file: given file name + # Return: type if given file name is a valid online file; '' otherwise + @staticmethod + def valid_online_file(file, type = None, exists = None): + if exists is None or exists: + if not op.exists(file): return '' # file does not exist + bname = op.basename(file) + if re.match(r'^,.*', bname): return '' # hidden file + if re.search(r'index\.(htm|html|shtml)$', bname, re.I): return '' # index file + if type and type != 'D': return type + if re.search(r'\.(doc|php|html|shtml)(\.|$)', bname, re.I): return '' # file with special extention + return 'D' + + # Return: current time string in format of HH:MM:SS + def curtime(self, getdate = False): + ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime() + fmt = "%Y-%m-%d %H:%M:%S" if getdate else "%H:%M:%S" + return time.strftime(fmt, ct) + + # wrapper function of curtime(True) to get datetime in form of YYYY-MM-DD HH:NN:SS + def curdatetime(self): + return self.curtime(True) + + # fmt: optional date format, defaults to YYYY-MM-DD + # Return: current (date, hour) + def curdatehour(self, fmt = None): + ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime() + dt = self.fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct) + return [dt, ct[3]] + + # tm: optional time in seconds since the Epoch + # Return: current date and time strings + def get_date_time(self, tm = None): + act = ct = None + if tm == None: + ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime() + elif isinstance(tm, str): + act = tm.split(' ') + elif isinstance(tm, (int, float)): + ct = time.localtime(tm) + elif isinstance(tm, datetime.datetime): + act = str(tm).split(' ') + elif isinstance(tm, datetime.date): + act = [str(tm), '00:00:00'] + elif isinstance(tm, datetime.time): + act = [None, str(tm)] + if ct == None: + return act if act else None + else: + return [time.strftime("%Y-%m-%d", ct), time.strftime("%H:%M:%S", ct)] + + # tm: optional time in seconds since the Epoch + # Return: current datetime strings + def get_datetime(self, tm = None): + if tm == None: + ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime() + return time.strftime("%Y-%m-%d %H:%M:%S", ct) + elif isinstance(tm, str): + return tm + elif isinstance(tm, (int, float)): + ct = time.localtime(tm) + return time.strftime("%Y-%m-%d %H:%M:%S", ct) + elif isinstance(tm, datetime.datetime): + return str(tm) + elif isinstance(tm, datetime.date): + return (str(tm) + ' 00:00:00') + return tm + + # file: file name, get curent timestamp if missed + # Return: timestsmp string in format of 'YYYYMMDDHHMMSS + def timestamp(self, file = None): + if file is None: + ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime() + else: + mt = os.stat(file).st_mtime # file last modified time + ct = time.gmtime(mt) if self.PGLOG['GMTZ'] else time.localtime(mt) + return time.strftime("%Y%m%d%H%M%S", ct) + + # dt: datetime string + # check date/time and set to default one if empty date + @staticmethod + def check_datetime(date, default): + if not date: return default + if not isinstance(date, str): date = str(date) + if re.match(r'^0000', date): return default + return date + + # fmt: date format, default to "YYYY-MM-DD" + # Return: new formated current date string + def curdate(self, fmt = None): + ct = time.gmtime() if self.PGLOG['GMTZ'] else time.localtime() + return self.fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct) + + # check given string to identify temporal pattern and their units + # defined in (keys self.DATEFMTS) + def temporal_pattern_units(self, string, seps): + mkeys = ['D', 'Q', 'M', 'C', 'Y', 'H', 'N', 'S'] + units = {} + match = seps[0] + "([^" + seps[1] + "]+)" + seps[1] + patterns = re.findall(match, string) + for pattern in patterns: + # skip generic pattern and current time + if re.match(r'^(P\d*|C.+C)$', pattern, re.I): continue + for mkey in mkeys: + ms = re.findall(self.DATEFMTS[mkey], pattern, re.I) + if ms: + if mkey == 'Q': + units[mkey] = 3 + elif mkey == 'C': + units[mkey] = 100 + else: + units[mkey] = 1 + for m in ms: + pattern = pattern.replace(m, '', 1) + return units + + # format output for given date and hour + def format_datehour(self, date, hour, tofmt = None, fromfmt = None): + if date: + datehour = self.format_date(str(date), tofmt, fromfmt) + elif tofmt: + datehour = tofmt + else: + datehour = '' + if hour != None: + if tofmt: + fmts = re.findall(self.DATEFMTS['H'], datehour, re.I) + for fmt in fmts: + if len(fmt) > 1: + shr = "{:02}".format(int(hour)) + else: + shr = str(hour) + datehour = re.sub(fmt, shr, datehour, 1) + else: + datehour += " {:02}".format(int(hour)) + return datehour + + # split a date, time or datetime into an array according to + # the sep value; str to int for digital values + @staticmethod + def split_datetime(sdt, sep = r'\D'): + if not isinstance(sdt, str): sdt = str(sdt) + adt = re.split(sep, sdt) + acnt = len(adt) + for i in range(acnt): + if re.match(r'^\d+$', adt[i]): adt[i] = int(adt[i]) + return adt + + # date: given date in format of fromfmt + # tofmt: date formats; ex. "Month D, YYYY" + # fromfmt: date formats, default to YYYY-MM-DD + # Return: new formated date string according to tofmt + def format_date(self, cdate, tofmt = None, fromfmt = None): + if not cdate: return cdate + if not isinstance(cdate, str): cdate = str(cdate) + dates = [None, None, None] + sep = '|' + mns = sep.join(self.MNS) + months = sep.join(self.MONTHS) + mkeys = ['D', 'M', 'Q', 'Y', 'C', 'H'] + PATTERNS = [r'(\d\d\d\d)', r'(\d+)', r'(\d\d)', + r'(\d\d\d)', '(' + mns + ')', '(' + months + ')'] + if not fromfmt: + if not tofmt: + if re.match(r'^\d\d\d\d-\d\d-\d\d$', cdate): return cdate # no need formatting + ms = re.match(r'^\d+(\W)\d+(\W)\d+', cdate) + if ms: + fromfmt = "Y" + ms.group(1) + "M" + ms.group(2) + "D" + else: + self.pglog(cdate + ": Invalid date, should be in format YYYY-MM-DD", self.LGEREX) + pattern = fromfmt + fmts = {} + formats = {} + for mkey in mkeys: + ms = re.search(self.DATEFMTS[mkey], pattern, re.I) + if ms: + fmts[mkey] = ms.group(1) + pattern = re.sub(fmts[mkey], '', pattern) + cnt = 0 + for mkey in fmts: + fmt = fmts[mkey] + i = len(fmt) + if mkey == 'D': + if i == 4: i = 1 + elif mkey == 'M': + if i == 3: i = 4 + elif mkey == 'Y': + if i == 4: i = 0 + formats[fromfmt.find(fmt)] = fmt + fromfmt = fromfmt.replace(fmt, PATTERNS[i]) + cnt += 1 + ms = re.findall(fromfmt, cdate) + mcnt = len(ms[0]) if ms else 0 + i = 0 + for k in sorted(formats): + if i >= mcnt: break + fmt = formats[k] + val = ms[0][i] + if re.match(r'^Y', fmt, re.I): + dates[0] = int(val) + if len(fmt) == 3: dates[0] *= 10 + elif re.match(r'^C', fmt, re.I): + dates[0] = 100 * int(val) # year at end of century + elif re.match(r'^M', fmt, re.I): + if re.match(r'^Mon', fmt, re.I): + dates[1] = self.get_month(val) + else: + dates[1] = int(val) + elif re.match(r'^Q', fmt, re.I): + dates[1] = 3 * int(val) # month at end of quarter + elif re.match(r'^H', fmt, re.I): # hour + dates.append(int(val)) + else: # day + dates[2] = int(val) + i += 1 + if len(dates) > 3: + cdate = self.fmtdatehour(dates[0], dates[1], dates[2], dates[3], tofmt) + else: + cdate = self.fmtdate(dates[0], dates[1], dates[2], tofmt) + return cdate + + # yr: year value + # mn: month value, 1-12 + # dy: day of the month + # hr: hour of the day + # nn: minute of the hour + # ss: second of the minute + # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD HH:NN:SS" + # Return: new formated datehour string + def fmtdatetime(self, yr, mn, dy, hr = None, nn = None, ss = None, tofmt = None): + if not tofmt: tofmt = "YYYY-MM-DD HH:NN:SS" + tms = [ss, nn, hr, dy] + fks = ['S', 'N', 'H'] + ups = [60, 60, 24] + # adjust second/minute/hour values out of range + for i in range(3): + if tms[i] != None and tms[i+1] != None: + if tms[i] < 0: + while tms[i] < 0: + tms[i] += ups[i] + tms[i+1] -= 1 + elif tms[i] >= ups[i]: + while tms[i] >= ups[i]: + tms[i] -= ups[i] + tms[i+1] += 1 + sdt = self.fmtdate(yr, mn, dy, tofmt) + # format second/minute/hour values + for i in range(3): + if tms[i] != None: + ms = re.search(self.DATEFMTS[fks[i]], sdt, re.I) + if ms: + fmt = ms.group(1) + if len(fmt) == 2: + str = "{:02}".format(tms[i]) + else: + str = str(tms[i]) + sdt = re.sub(fmt, str, sdt, 1) + return sdt + + # yr: year value + # mn: month value, 1-12 + # dy: day of the month + # hr: hour of the day + # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD:HH" + # Return: new formated datehour string + def fmtdatehour(self, yr, mn, dy, hr, tofmt = None): + if not tofmt: tofmt = "YYYY-MM-DD:HH" + if hr != None and dy != None: # adjust hour value out of range + if hr < 0: + while hr < 0: + hr += 24 + dy -= 1 + elif hr > 23: + while hr > 23: + hr -= 24 + dy += 1 + datehour = self.fmtdate(yr, mn, dy, tofmt) + if hr != None: + ms = re.search(self.DATEFMTS['H'], datehour, re.I) + if ms: + fmt = ms.group(1) + if len(fmt) == 2: + shr = "{:02}".format(hr) + else: + shr = str(hr) + datehour = re.sub(fmt, shr, datehour, 1) + return datehour + + # yr: year value + # mn: month value, 1-12 + # dy: day of the month + # tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD" + # Return: new formated date string + def fmtdate(self, yr, mn, dy, tofmt = None): + (y, m, d) = self.adjust_ymd(yr, mn, dy) + if not tofmt or tofmt == 'YYYY-MM-DD': return "{}-{:02}-{:02}".format(y, m, d) + if dy != None: + md = re.search(self.DATEFMTS['D'], tofmt, re.I) + if md: + fmt = md.group(1) # day + slen = len(fmt) + if slen > 2: # days of the year + for i in range(1, m): d += self.MDAYS[i] + sdy = "{:03}".format(d) + elif slen == 2: + sdy = "{:02}".format(d) + else: + sdy = str(d) + tofmt = re.sub(fmt, sdy, tofmt, 1) + if mn != None: + md = re.search(self.DATEFMTS['M'], tofmt, re.I) + if md: + fmt = md.group(1) # month + slen = len(fmt) + if slen == 2: + smn = "{:02}".format(m) + elif re.match(r'^mon', fmt, re.I): + smn = self.MNS[m-1] if slen == 3 else self.MONTHS[m-1] + if re.match(r'^Mo', fmt): + smn = smn.capitalize() + elif re.match(r'^MO', fmt): + smn = smn.upper() + else: + smn = str(m) + tofmt = re.sub(fmt, smn, tofmt, 1) + else: + md = re.search(self.DATEFMTS['Q'], tofmt, re.I) + if md: + fmt = md.group(1) # quarter + m = int((m+2)/3) + smn = "{:02}".format(m) if len(fmt) == 2 else str(m) + tofmt = re.sub(fmt, smn, tofmt, 1) + if yr != None: + md = re.search(self.DATEFMTS['Y'], tofmt, re.I) + if md: + fmt = md.group(1) # year + slen = len(fmt) + if slen == 2: + syr = "{:02}".format(y%100) + elif slen == 3: # decade + if y > 999: y = int(y/10) + syr = "{:03}".format(y) + else: + if re.search(r'^YY00', fmt, re.I): y = 100*int(y/100) # hundred years + syr = "{:04}".format(y) + tofmt = re.sub(fmt, syr, tofmt, 1) + else: + md = re.search(self.DATEFMTS['C'], tofmt, re.I) + if md: + fmt = md.group(1) # century + slen = len(fmt) + if y > 999: + y = 1 + int(y/100) + elif y > 99: + y = 1 + int(yr/10) + syr = "{:02}".format(y) + tofmt = re.sub(fmt, syr, tofmt, 1) + return tofmt + + # format given date and time into standard timestamp + @staticmethod + def join_datetime(sdate, stime): + if not sdate: return None + if not stime: stime = "00:00:00" + if not isinstance(sdate, str): sdate = str(sdate) + if not isinstance(stime, str): stime = str(stime) + if re.match(r'^\d:', stime): stime = '0' + stime + return "{} {}".format(sdate, stime) + fmttime = join_datetime + + # split a date or datetime into an array of [date, time] + @staticmethod + def date_and_time(sdt): + if not sdt: return [None, None] + if not isinstance(sdt, str): sdt = str(sdt) + adt = re.split(' ', sdt) + acnt = len(adt) + if acnt == 1: adt.append('00:00:00') + return adt + + # convert given date/time to unix epoch time; -1 if cannot + @staticmethod + def unixtime(stime): + pt = [0]*9 + if not isinstance(stime, str): stime = str(stime) + ms = re.match(r'^(\d+)-(\d+)-(\d+)', stime) + if ms: + for i in range(3): + pt[i] = int(ms.group(i+1)) + ms = re.search(r'^(\d+):(\d+):(\d+)$', stime) + if ms: + for i in range(3): + pt[i+3] = int(ms.group(i+1)) + pt[8] = -1 + return time.mktime(time.struct_time(pt)) + + # sdate: start date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD' + # edate: end date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD' + # Return: list of start and end dates in format of YYYY-MM-DD + def daterange(self, sdate, edate): + if sdate: + if not isinstance(sdate, str): sdate = str(sdate) + if not re.search(r'\d+-\d+-\d+', sdate): + ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', sdate) + if ms: + sdate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4)) + else: + ms = re.match(r'^(\W*)(\d+)(\W*)$', sdate) + if ms: + sdate = "{}{}-01-01{}".format(ms.group(1), ms.group(2), ms.group(3)) + if edate: + if not isinstance(edate, str): edate = str(edate) + if not re.search(r'\d+-\d+-\d+', edate): + ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', edate) + if ms: + edate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4)) + edate = self.adddate(edate, 0, 1, -1) + else: + ms = re.match(r'^(\W*)(\d+)(\W*)$', edate) + if ms: + edate = "{}{}-12-31{}".format(ms.group(1), ms.group(2), ms.group(3)) + return [sdate, edate] + + # date to datetime range + @staticmethod + def dtrange(dates): + date = dates[0] + if date: + if not isinstance(date, str): date = str(date) + dates[0] = date + ' 00:00:00' + date = dates[1] + if date: + if not isinstance(date, str): date = str(date) + dates[1] = date + ' 23:59:59' + return dates + + # sdate: starting date in format of 'YYYY-MM-DD' + # edate: ending date + # fmt: period format, ex. "YYYYMon-YYYMon", default to "YYYYMM-YYYYMM" + # Return: a string of formated period + def format_period(self, sdate, edate, fmt = None): + period = '' + if not fmt: + sfmt = efmt = "YYYYMM" + sep = '-' + else: + ms = re.match(r'^(.*)(\s*-\s*)(.*)$', fmt) + if ms: + (sfmt, sep, efmt) = ms.groups() + else: + sfmt = fmt + efmt = None + sep = '' + if sdate: + if not isinstance(sdate, str): sdate = str(sdate) + ms = re.search(r'(\d+)-(\d+)-(\d+)', sdate) + if ms: + (yr, mn, dy) = ms.groups() + period = self.fmtdate(int(yr), int(mn), int(dy), sfmt) + if sep: period += sep + if efmt: + if re.search(r'current', efmt, re.I): + period += efmt + elif edate: + if not isinstance(edate, str): edate = str(edate) + ms = re.search(r'(\d+)-(\d+)-(\d+)', edate) + if ms: + (yr, mn, dy) = ms.groups() + period += self.fmtdate(int(yr), int(mn), int(dy), efmt) + return period + + # dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN + # newid: True to format a new dsid; defaults to False for now + # returns a new or old dsid according to the newid option + def format_dataset_id(self, dsid, newid = None, logact = None): + if newid is None: newid = self.PGLOG['NEWDSID'] + if logact is None: logact = self.LGEREX + dsid = str(dsid) + ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid) + if ms: + ids = list(ms.groups()) + if ids[0] not in self.PGLOG['DSIDCHRS']: + if logact: self.pglog("{}: dsid leading character must be '{}'".format(dsid, self.PGLOG['DSIDCHRS']), logact) + return dsid + if newid: return dsid + if ids[2][:2] != '00': + if logact: self.pglog(dsid + ": Cannot convert new dsid to old format", logact) + return dsid + return 'ds{}.{}'.format(ids[1], ids[2][2]) + ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid, re.I) + if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid) + if ms: + if newid: + return "d{}00{}".format(ms.group(1), ms.group(3)) + else: + return 'ds{}.{}'.format(ms.group(1), ms.group(3)) + if logact: self.pglog(dsid + ": invalid dataset id", logact) + return dsid + + # dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN + # newid: True to format a new dsid; defaults to False for now + # returns a new or old metadata dsid according to the newid option + def metadata_dataset_id(self, dsid, newid = None, logact = None): + if newid is None: newid = self.PGLOG['NEWDSID'] + if logact is None: logact = self.LGEREX + ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid) + if ms: + ids = list(ms.groups()) + if ids[0] not in self.PGLOG['DSIDCHRS']: + if logact: self.pglog("{}: dsid leading character must be '{}'".format(dsid, self.PGLOG['DSIDCHRS']), logact) + return dsid + if newid: return dsid + if ids[2][:2] != '00': + if logact: self.pglog(dsid + ": Cannot convert new dsid to old format", logact) + return dsid + return '{}.{}'.format(ids[1], ids[2][2]) + ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid) + if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid) + if ms: + if newid: + return "d{}00{}".format(ms.group(1), ms.group(3)) + else: + return '{}.{}'.format(ms.group(1), ms.group(3)) + if logact: self.pglog(dsid + ": invalid dataset id", logact) + return dsid + + # idstr: string holding a dsid in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN + # and find it according to the flag value O (Old), N (New) or B (Both) formats + # returns dsid if found in given id string; None otherwise + def find_dataset_id(self, idstr, flag = 'B', logact = 0): + if flag in 'NB': + ms = re.search(r'(^|\W)(([a-z])\d{6})($|\D)', idstr) + if ms and ms.group(3) in self.PGLOG['DSIDCHRS']: return ms.group(2) + if flag in 'OB': + ms = re.search(r'(^|\W)(ds\d\d\d(\.|)\d)($|\D)', idstr) + if not ms: ms = re.search(r'(^|\W)(\d\d\d\.\d)($|\D)', idstr) + if ms: return ms.group(2) + if logact: self.pglog("{} : No valid dsid found for flag {}".format(idstr, flag), logact) + return None + + # find and convert all found dsids according to old/new dsids + # for newid = False/True + def convert_dataset_ids(self, idstr, newid = None, logact = 0): + if newid is None: newid = self.PGLOG['NEWDSID'] + flag = 'O' if newid else 'N' + cnt = 0 + if idstr: + while True: + dsid = self.find_dataset_id(idstr, flag = flag) + if not dsid: break + ndsid = self.format_dataset_id(dsid, newid = newid, logact = logact) + if ndsid != dsid: idstr = idstr.replace(dsid, ndsid) + cnt += 1 + return (idstr, cnt) + + # records: dict of mutiple records, + # idx: index of the records to return + # Return: a dict to the idx record out of records + @staticmethod + def onerecord(records, idx): + record = {} + for fld in records: + record[fld] = records[fld][idx] + return record + + # records: dict of mutiple records, + # record: record to add + # idx: index of the record to add + # Return: add a record to a dict of lists + @staticmethod + def addrecord(records, record, idx): + if records is None: records = {} # initialize dist of lists structure + if not records: + for key in record: + records[key] = [] + for key in record: + slen = len(records[key]) + if idx < slen: + records[key][idx] = record[key] + else: + while idx > slen: + records[key].append(None) + slen += 1 + records[key].append(record[key]) + return records + + # convert a hash with multiple rows from pgmget() to an array of hashes + @staticmethod + def hash2array(hrecs, hkeys = None): + if not hkeys: hkeys = list(hrecs) + acnt = len(hrecs[hkeys[0]]) if hrecs and hkeys[0] in hrecs else 0 + arecs = [None]*acnt + for i in range(acnt): + arec = {} + for hkey in hkeys: arec[hkey] = hrecs[hkey][i] + arecs[i] = arec + return arecs + + # convert an array of hashes to a hash with multiple rows for pgmget() + @staticmethod + def array2hash(arecs, hkeys = None): + hrecs = {} + acnt = len(arecs) if arecs else 0 + if acnt > 0: + if not hkeys: hkeys = list(arecs[0]) + for hkey in hkeys: + hrecs[hkey] = [None]*acnt + for i in range(acnt): hrecs[hkey][i] = arecs[i][hkey] + return hrecs + + # records: dict of mutiple records, + # opt: 0 - column count, + # 1 - row count, + # 2 - both + # Return: a single number or list of two dependend on given opt + @staticmethod + def hashcount(records, opt = 0): + ret = [0, 0] + if records: + clen = len(records) + if opt == 0 or opt == 2: + ret[0] = clen + if opt == 1 or opt == 2: + ret[1] = len(next(iter(records.values()))) + return ret if opt == 2 else ret[opt] + + # adict: dict a + # bdict: dict b + # default: default values if missed + # unique: unique join if set + # Return: the joined dict records with default value for missing ones + # For unique join, a record in bdict must not be contained in adict already + @staticmethod + def joinhash(adict, bdict, default = None, unique = None): + if not bdict: return adict + if not adict: return bdict + akeys = list(adict.keys()) + bkeys = list(bdict.keys()) + acnt = len(adict[akeys[0]]) + bcnt = len(bdict[bkeys[0]]) + ckeys = [] # common keys for unique joins + # check and assign default value for missing keys in adict + for bkey in bkeys: + if bkey in akeys: + if unique and bkey not in ckeys: ckeys.append(bkey) + else: + adict[bkey] = [default]*acnt + # check and assign default value for missing keys in bdict + for akey in akeys: + if akey in bkeys: + if unique and akey not in ckeys: ckeys.append(akey) + else: + bdict[akey] = [default]*bcnt + if unique: # append bdict + kcnt = len(ckeys) + for i in range(bcnt): + j = 0 + while(j < acnt): + k = 0 + for ckey in ckeys: + if PgUtil.pgcmp(adict[ckey][j], bdict[ckey][i]): break + k += 1 + if k >= kcnt: break + j += 1 + + if j >= acnt: + for key in adict: + adict[key].append(bdict[key][i]) + else: + for key in adict: + adict[key].extend(bdict[key]) + return adict + + # lst1: list 1 + # lst2: list 2 + # unique: unique join if set + # Return: the joined list + @staticmethod + def joinarray(lst1, lst2, unique = None): + if not lst2: return lst1 + if not lst1: return lst2 + cnt1 = len(lst1) + cnt2 = len(lst2) + if unique: + for i in (cnt2): + for j in (cnt1): + if PgUtil.pgcmp(lst1[j], lst2[i]) != 0: break + if j >= cnt1: + lst1.append(lst2[i]) + else: + lst1.extend(lst2) + return lst1 + + # Function: crosshash(ahash, bhash) + # Return: a reference to the cross-joined hash records + @staticmethod + def crosshash(ahash, bhash): + if not bhash: return ahash + if not ahash: return bhash + akeys = list(ahash.keys()) + bkeys = list(bhash.keys()) + acnt = len(ahash[akeys[0]]) + bcnt = len(bhash[bkeys[0]]) + rets = {} + for key in akeys: rets[key] = [] + for key in bkeys: rets[key] = [] + for i in range(acnt): + for j in range(bcnt): + for key in akeys: rets[key].append(ahash[key][i]) + for key in bkeys: rets[key].append(bhash[key][j]) + return rets + + # strip database and table names for a field name + @staticmethod + def strip_field(field): + ms = re.search(r'\.([^\.]+)$', field) + if ms: field = ms.group(1) + return field + + # pgrecs: dict obterned from pgmget() + # flds: list of single letter fields to be sorted on + # hash: table dict for pre-defined fields + # patterns: optional list of temporal patterns for order fields + # Return: a sorted dict list + def sorthash(self, pgrecs, flds, hash, patterns = None): + fcnt = len(flds) # count of fields to be sorted on + # set sorting order, descenting (-1) or ascenting (1) + # get the full field names to be sorted on + desc = [1]*fcnt + fields = [] + nums = [1]*fcnt # initialize each column as numerical + for i in range(fcnt): + if flds[i].islower(): desc[i] = -1 + fld = self.strip_field(hash[flds[i].upper()][1]) + fields.append(fld) + count = len(pgrecs[fields[0]]) # row count of pgrecs + if count < 2: return pgrecs # no need of sording + pcnt = len(patterns) if patterns else 0 + # prepare the dict list for sortting + srecs = [] + for i in range(count): + pgrec = self.onerecord(pgrecs, i) + rec = [] + for j in range(fcnt): + if j < pcnt and patterns[j]: + # get the temporal part of each value matching the pattern + val = self.format_date(pgrec[fields[j]], "YYYYMMDDHH", patterns[j]) + else: + # sort on the whole value if no pattern given + val = pgrec[fields[j]] + if nums[j]: nums[j] = self.pgnum(val) + rec.append(val) + rec.append(i) # add column to cache the row index + srecs.append(rec) + srecs = self.quicksort(srecs, 0, count-1, desc, fcnt, nums) + # sort pgrecs according the cached row index column in ordered srecs + rets = {} + for fld in pgrecs: + rets[fld] = [] + for i in range(count): + pgrec = self.onerecord(pgrecs, srecs[i][fcnt]) + for fld in pgrecs: + rets[fld].append(pgrec[fld]) + return rets + + # Return: the number of days bewteen date1 and date2 + @staticmethod + def diffdate(date1, date2): + ut1 = ut2 = 0 + if date1: ut1 = PgUtil.unixtime(date1) + if date2: ut2 = PgUtil.unixtime(date2) + return round((ut1 - ut2)/86400) # 24*60*60 + + # Return: the number of seconds bewteen time1 and time2 + @staticmethod + def difftime(time1, time2): + ut1 = ut2 = 0 + if time1: ut1 = PgUtil.unixtime(time1) + if time2: ut2 = PgUtil.unixtime(time2) + return round(ut1 - ut2) + diffdatetime = difftime + + # Return: the number of days between date and '1970-01-01 00:00:00' + @staticmethod + def get_days(cdate): + return PgUtil.diffdate(str(cdate), '1970-01-01') + + # Function: get_month_days(date) + # Return: the number of days in given month + @staticmethod + def get_month_days(cdate): + ms = re.match(r'^(\d+)-(\d+)', str(cdate)) + if ms: + yr = int(ms.group(1)) + mn = int(ms.group(2)) + return calendar.monthrange(yr, mn)[1] + else: + return 0 + + # Function: validate_date(date) + # Return: a date in format of YYYY-MM-DD thar all year/month/day are validated + @staticmethod + def validate_date(cdate): + ms = re.match(r'^(\d+)-(\d+)-(\d+)', str(cdate)) + if ms: + (yr, mn, dy) = (int(m) for m in ms.groups()) + if yr < 1000: + yr += 2000 + elif yr > 9999: + yr %= 10000 + if mn < 1: + mn = 1 + elif mn > 12: + mn = 12 + md = calendar.monthrange(yr, mn)[1] + if dy < 1: + dy = 1 + elif dy > md: + dy = md + cdate = '{}-{:02d}-{:02d}'.format(yr, mn, dy) + return cdate + + # Function: get_date(days) + # Return: the date in format of "YYYY-MM-DD" for given number of days + # from '1970-01-01 00:00:00' + def get_date(self, days): + return self.adddate('1970-01-01', 0, 0, int(days)) + + # compare date/hour and return the different hours + @staticmethod + def diffdatehour(date1, hour1, date2, hour2): + if hour1 is None: hour1 = 23 + if hour2 is None: hour2 = 23 + return (hour1 - hour2) + 24*PgUtil.diffdate(date1, date2) + + # hour difference between GMT and local time + def diffgmthour(self): + tg = time.gmtime() + tl = time.localtime() + dg = self.fmtdate(tg[0], tg[1], tg[2]) + dl = self.fmtdate(tl[0], tl[1], tl[2]) + hg = tg[3] + hl = tl[3] + return self.diffdatehour(dg, hg, dl, hl) + + # compare date and time (if given) and return 1, 0 and -1 + @staticmethod + def cmptime(date1, time1, date2, time2): + stime1 = PgUtil.join_datetime(date1, time1) + stime2 = PgUtil.join_datetime(date2, time2) + return PgUtil.pgcmp(stime1, stime2) + + # date: the original date in format of 'YYYY-MM-DD', + # mf: the number of month fractions to add + # nf: number of fractions of a month + # Return: new date + def addmonth(self, cdate, mf, nf = 1): + if not mf: return cdate + if not nf or nf < 2: return self.adddate(cdate, 0, mf, 0) + ms = re.match(r'^(\d+)-(\d+)-(\d+)$', cdate) + if ms: + (syr, smn, sdy) = ms.groups() + yr = int(syr) + mn = int(smn) + ody = int(sdy) + dy = 0 # set to end of previous month + ndy = int(30/nf) # number of days in each fraction + while ody > ndy: + dy += ndy + ody -= ndy + dy += mf * ndy + if mf > 0: + while dy >= 30: + dy -= 30 + mn += 1 + else: + while dy < 0: + dy += 30 + mn -= 1 + dy += ody + cdate = self.fmtdate(yr, mn, dy) + return cdate + + # add yr years & mn months to yearmonth ym in format YYYYMM + @staticmethod + def addyearmonth(ym, yr, mn): + if yr == None: yr = 0 + if mn == None: mn = 0 + ms =re.match(r'^(\d\d\d\d)(\d\d)$', ym) + if ms: + (syr, smn) = ms.groups() + yr = int(syr) + mn = int(smn) + if mn < 0: + while mn < 0: + yr -= 1 + mn += 12 + else: + while mn > 12: + yr += 1 + mn -= 12 + ym = "{:04}{:02}".format(yr, mn) + return ym + + # set number of days in Beburary for Leap year according PGLOG['NOLEAP'] + def set_leap_mdays(self, year): + if not self.PGLOG['NOLEAP'] and calendar.isleap(year): + self.MDAYS[0] = 366 + self.MDAYS[2] = 29 + ret = 1 + else: + self.MDAYS[0] = 365 + self.MDAYS[2] = 28 + ret = 0 + return ret + + # wrap on calendar.isleap() + is_leapyear = calendar.isleap + + # reutn 1 if is end of month + def is_end_month(self, yr, mn, dy): + self.set_leap_mdays(yr) + return 1 if dy == self.MDAYS[mn] else 0 + + # adust the year, month and day values that are out of ranges + def adjust_ymd(self, yr, mn, dy): + if yr is None: yr = 1970 + if mn is None: mn = 1 + if dy is None: dy = 1 + while True: + if mn > 12: + yr += 1 + mn -= 12 + continue + elif mn < 1: + yr -= 1 + mn += 12 + continue + self.set_leap_mdays(yr) + if dy < 1: + if(dy < -self.MDAYS[0]): + yr -= 1 + dy += self.MDAYS[0] + else: + mn -= 1 + if mn < 1: + yr -= 1 + mn += 12 + dy += self.MDAYS[mn] + continue + elif dy > self.MDAYS[mn]: + if(dy > self.MDAYS[0]): + dy -= self.MDAYS[0] + yr += 1 + else: + dy -= self.MDAYS[mn] + mn += 1 + continue + break + return [yr, mn, dy] + + # date: the original date in format of 'YYYY-MM-DD', + # yr: the number of years to add/subtract from the odate for positive/negative value, + # mn: the number of months to add/subtract from the odate for positive/negative value, + # dy: the number of days to add/subtract from the odate for positive/negative value) + # Return: new date + def adddate(self, cdate, yr, mn = 0, dy = 0, tofmt = None): + if not cdate: return cdate + if not isinstance(cdate, str): cdate = str(cdate) + if yr is None: + yr = 0 + elif isinstance(yr, str): + yr = int(yr) + if mn is None: + mn = 0 + elif isinstance(mn, str): + mn = int(mn) + if dy is None: + dy = 0 + elif isinstance(dy, str): + dy = int(dy) + ms = re.search(r'(\d+)-(\d+)-(\d+)', cdate) + if not ms: return cdate # non-standard date format + (nyr, nmn, ndy) = (int(m) for m in ms.groups()) + mend = 0 + if mn and ndy > 27: mend = self.is_end_month(nyr, nmn, ndy) + if yr: nyr += yr + if mn: + (nyr, nmn, tdy) = self.adjust_ymd(nyr, nmn+mn+1, 0) + if mend: ndy = tdy + if dy: ndy += dy + return self.fmtdate(nyr, nmn, ndy, tofmt) + addNoLeapDate = adddate + + # add given hours to the initial date and time + def addhour(self, sdate, stime, nhour): + if nhour and isinstance(nhour, str): nhour = int(nhour) + if sdate and not isinstance(sdate, str): sdate = str(sdate) + if stime and not isinstance(stime, str): stime = str(stime) + if not nhour: return [sdate, stime] + hr = dy = 0 + ms = re.match(r'^(\d+)', stime) + if ms: + shr = ms.group(1) + hr = int(shr) + nhour + if hr < 0: + while hr < 0: + dy -= 1 + hr += 24 + else: + while hr > 23: + dy += 1 + hr -= 24 + shour = "{:02}".format(hr) + if shr != shour: stime = re.sub(shr, shour, stime, 1) + if dy: sdate = self.adddate(sdate, 0, 0, dy) + return [sdate, stime] + + # add given years, months, days and hours to the initial date and hour + def adddatehour(self, sdate, nhour, yr, mn, dy, hr = 0): + if sdate and not isinstance(sdate, str): sdate = str(sdate) + if hr: + if nhour != None: + if isinstance(nhour, str): nhour = int(nhour) + hr += nhour + if hr < 0: + while hr < 0: + dy -= 1 + hr += 24 + else: + while hr > 23: + dy += 1 + hr -= 24 + if nhour != None: nhour = hr + if yr or mn or dy: sdate = self.adddate(sdate, yr, mn, dy) + return [sdate, nhour] + + # add given yyyy, mm, dd, hh, nn, ss to sdatetime + # if nf, add fraction of month only + def adddatetime(self, sdatetime, yy, mm, dd, hh, nn, ss, nf = 0): + if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime) + (sdate, stime) = re.split(' ', sdatetime) + if hh or nn or ss: (sdate, stime) = self.addtime(sdate, stime, hh, nn, ss) + if nf: + sdate = self.addmonth(sdate, mm, nf) + mm = 0 + if yy or mm or dd: sdate = self.adddate(sdate, yy, mm, dd) + return "{} {}".format(sdate, stime) + + # add given yyyy, mm, dd, hh, nn, ss to sdatetime + # if nf, add fraction of month only + def adddatetime(self, sdatetime, yy, mm, dd, hh, nn, ss, nf = 0): + if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime) + (sdate, stime) = re.split(' ', sdatetime) + if hh or nn or ss: (sdate, stime) = self.addtime(sdate, stime, hh, nn, ss) + if nf: + sdate = self.addmonth(sdate, mm, nf) + mm = 0 + if yy or mm or dd: sdate = self.adddate(sdate, yy, mm, dd) + return "{} {}".format(sdate, stime) + + # add given hours, minutes and seconds to the initial date and time + def addtime(self, sdate, stime, h, m, s): + if sdate and not isinstance(sdate, str): sdate = str(sdate) + if stime and not isinstance(stime, str): sdate = str(stime) + ups = (60, 60, 24) + tms = [0, 0, 0, 0] # (sec, min, hour, day) + if s: tms[0] += s + if m: tms[1] += m + if h: tms[2] += h + if stime: + ms = re.match(r'^(\d+):(\d+):(\d+)$', stime) + if ms: + tms[2] += int(ms.group(1)) + tms[1] += int(ms.group(2)) + tms[0] += int(ms.group(3)) + for i in range(3): + if tms[i] < 0: + while tms[i] < 0: + tms[i] += ups[i] + tms[i+1] -= 1 + elif tms[i] >= ups[i]: + while tms[i] >= ups[i]: + tms[i] -= ups[i] + tms[i+1] += 1 + stime = "{:02}:{:02}:{:02}".format(tms[2], tms[1], tms[0]) + if tms[3]: sdate = self.adddate(sdate, 0, 0, tms[3]) + return [sdate, stime] + + # add time interval array to datetime + # opt = -1 - minus, 0 - begin time, 1 - add (default) + def addintervals(self, sdatetime, intv, opt = 1): + if not isinstance(sdatetime, str): sdatetime = str(sdatetime) + if not intv: return sdatetime + tv = [0]*7 + i = 0 + for v in intv: + tv[i] = v + i += 1 + # assume the given datetime is end of the current interval; + # add one second to set it to beginning of the next one + if opt == 0: sdatetime = self.adddatetime(sdatetime, 0, 0, 0 ,0, 0, 1) + if opt < 1: # negative intervals for minus + for i in range(6): + if tv[i]: tv[i] = -tv[i] + return self.adddatetime(sdatetime, tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6]) + + # adjust end date to the specified day days for frequency of year/month/week + # end of period if days == 0 + # nf - number of fractions of a month, for unit of 'M' only + def enddate(self, sdate, days, unit, nf = 0): + if sdate and not isinstance(sdate, str): sdate = str(sdate) + if days and isinstance(days, str): days = int(days) + if not (unit and unit in 'YMW'): return sdate + if unit == 'Y': + ms = re.match(r'^(\d+)', sdate) + if ms: + yr = int(ms.group(1)) + if days: + mn = 1 + dy = days + else: + mn = 12 + dy = 31 + sdate = self.fmtdate(yr, mn, dy) + elif unit == 'M': + ms = re.match(r'^(\d+)-(\d+)-(\d+)', sdate) + if ms: + (yr, mn, dy) = (int(m) for m in ms.groups()) + else: + ms = re.match(r'^(\d+)-(\d+)', sdate) + if ms: + (yr, mn) = (int(m) for m in ms.groups()) + dy = 1 + else: + return sdate + if not nf or nf == 1: + nd = days if days else calendar.monthrange(yr, mn)[1] + if nd != dy: sdate = self.fmtdate(yr, mn, nd) + else: + val = int(30/nf) + if dy >= 28: + mf = nf + else: + mf = int(dy/val) + if (mf*val) < dy: mf += 1 + if days: + dy = (mf-1)*val + days + elif mf < nf: + dy = mf*val + else: + mn += 1 + dy = 0 + sdate = self.fmtdate(yr, mn, dy) + elif unit == 'W': + val = self.get_weekday(sdate) + if days != val: sdate = self.adddate(sdate, 0, 0, days-val) + return sdate + + # adjust end time to the specified h/n/s for frequency of hour/mimute/second + def endtime(self, stime, unit): + if stime and not isinstance(stime, str): stime = str(stime) + if not (unit and unit in 'HNS'): return stime + if stime: + tm = self.split_datetime(stime, 'T') + else: + tm = [0, 0, 0] + if unit == 'H': + tm[1] = tm[2] = 59 + elif unit == 'N': + tm[2] = 59 + elif unit != 'S': + tm[0] = 23 + tm[1] = tm[2] = 59 + return "{:02}:{:02}:{:02}".format(tm[0], tm[1]. tm[2]) + + # adjust end time to the specified h/n/s for frequency of year/month/week/day/hour/mimute/second + def enddatetime(self, sdatetime, unit, days = 0, nf = 0): + if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime) + if not (unit and unit in 'YMWDHNS'): return sdatetime + (sdate, stime) = re.split(' ', sdatetime) + if unit in 'HNS': + stime = self.endtime(stime, unit) + else: + sdate = self.enddate(sdate, days, unit, nf) + return "{} {}".format(sdate, stime) + + # get the string length dynamically + @staticmethod + def get_column_length(colname, values): + clen = len(colname) if colname else 2 # initial column length as the length of column title + for val in values: + if val is None: continue + sval = str(val) + if sval and not re.search(r'\n', sval): + slen = len(sval) + if slen > clen: clen = slen + return clen + + # Function: hour2time() + # Return: time string in format of date HH:MM:SS + @staticmethod + def hour2time(sdate, nhour, endtime = 0): + if sdate and not isinstance(sdate, str): sdate = str(sdate) + stime = "{:02}:".format(nhour) + if endtime: + stime += "59:59" + else: + stime += "00:00" + if sdate: + return "{} {}".format(sdate, stime) + else: + return stime + + # Function: time2hour() + # Return: list of date and hour + @staticmethod + def time2hour(stime): + sdate = nhour = None + times = stime.split(' ') + if len(times) == 2: + sdate = times[0] + stime = times[1] + ms = re.match(r'^(\d+)', stime) + if ms: nhour = int(ms.group(1)) + return [sdate, nhour] + + # get the all column widths + @staticmethod + def all_column_widths(pgrecs, flds, tdict): + colcnt = len(flds) + lens = [0]*colcnt + for i in range(colcnt): + fld = flds[i] + if fld not in tdict: continue + field = PgUtil.strip_field(tdict[fld][1]) + lens[i] = PgUtil.get_column_length(None, pgrecs[field]) + return lens + + # check a give value, return 1 if numeric, 0 therwise + @staticmethod + def pgnum(val): + if not isinstance(val, str): val = str(val) + ms = re.match(r'^\-{0,1}(\d+|\d+\.\d*|d*\.\d+)([eE]\-{0,1}\d+)*$', val) + return 1 if ms else 0 + + # Function: pgcmp(val1, val2) + # Return: 0 if both empty or two values are identilcal; -1 if val1 < val2; otherwise 1 + @staticmethod + def pgcmp(val1, val2, ignorecase = 0, num = 0): + if val1 is None: + if val2 is None: + return 0 + else: + return -1 + elif val2 is None: + return 1 + typ1 = type(val1) + typ2 = type(val2) + if typ1 != typ2: + if num: + if typ1 is str: + typ1 = int + val1 = int(val1) + if typ2 is str: + typ2 = int + val2 = int(val2) + else: + if typ1 != str: + typ1 = str + val1 = str(val1) + if typ2 != str: + typ2 = str + val2 = str(val2) + if typ1 is str: + if num: + if typ1 is str and PgUtil.pgnum(val1) and PgUtil.pgnum(val2): + val1 = int(val1) + val2 = int(val2) + elif ignorecase: + val1 = val1.lower() + val2 = val2.lower() + if val1 > val2: + return 1 + elif val1 < val2: + return -1 + else: + return 0 + + # infiles: initial file list + # Return: final file list with all the subdirectories expanded + @staticmethod + def recursive_files(infiles): + ofiles = [] + for file in infiles: + if op.isdir(file): + ofiles.extend(PgUtil.recursive_files(glob.glob(file + "/*"))) + else: + ofiles.append(file) + return ofiles + + # lidx: lower index limit (including) + # hidx: higher index limit (excluding) + # key: string value to be searched, + # list: reference to a sorted list where the key is searched) + # Return: index if found; -1 otherwise + @staticmethod + def asearch(lidx, hidx, key, list): + ret = -1 + if (hidx - lidx) < 11: # use linear search for less than 11 items + for midx in range(lidx, hidx): + if key == list[midx]: + ret = midx + break + else: + midx = (lidx + hidx)/2 + if key == list[midx]: + ret = midx + elif key < list[midx]: + ret = PgUtil.asearch(lidx, midx, key, list) + else: + ret = PgUtil.asearch(midx + 1, hidx, key, list) + return ret + + # lidx: lower index limit (including) + # hidx: higher index limit (excluding) + # key: string value to be searched, + # list: reference to a sorted list where the key is searched) + # Return: index if found; -1 otherwise + @staticmethod + def psearch(lidx, hidx, key, list): + ret = -1 + if (hidx - lidx) < 11: # use linear search for less than 11 items + for midx in range(lidx, hidx): + if re.search(list[midx], key): + ret = midx + break + else: + midx = int((lidx + hidx)/2) + if re.search(list[midx], key): + ret = midx + elif key < list[midx]: + ret = PgUtil.psearch(lidx, midx, key, list) + else: + ret = PgUtil.psearch(midx + 1, hidx, key, list) + return ret + + # quicksort for pattern + @staticmethod + def quicksort(srecs, lo, hi, desc, cnt, nums = None): + i = lo + j = hi + mrec = srecs[int((lo+hi)/2)] + while True: + while PgUtil.cmp_records(srecs[i], mrec, desc, cnt, nums) < 0: i += 1 + while PgUtil.cmp_records(srecs[j], mrec, desc, cnt, nums) > 0: j -= 1 + if i <= j: + if i < j: + tmp = srecs[i] + srecs[i] = srecs[j] + srecs[j] = tmp + i += 1 + j -= 1 + if i > j: break + #recursion + if lo < j: srecs = PgUtil.quicksort(srecs, lo, j, desc, cnt, nums) + if i < hi: srecs = PgUtil.quicksort(srecs, i, hi, desc, cnt, nums) + return srecs + + # compare two arrays + @staticmethod + def cmp_records(arec, brec, desc, cnt, nums): + for i in range(cnt): + num = nums[i] if nums else 0 + ret = PgUtil.pgcmp(arec[i], brec[i], 0, num) + if ret != 0: + return (ret*desc[i]) + return 0 # identical records + + # format one floating point value + @staticmethod + def format_float_value(val, precision = 2): + units = ('B', 'KB', 'MB', 'GB', 'TB', 'PB') + if val is None: + return '' + elif not isinstance(val, int): + val = int(val) + idx = 0 + while val >= 1000 and idx < 5: + val /= 1000 + idx += 1 + return "{:.{}f}{}".format(val, precision, units[idx]) + + # check a file is a ASCII text one + # return 1 if yes, 0 if not; or -1 if file not checkable + @staticmethod + def is_text_file(fname, blocksize = 256, threshhold = 0.1): + # File doesn't exist or is not a regular file + if not op.exists(fname) or not op.isfile(fname): return -1 + if op.getsize(fname) == 0: return 1 # Empty files are considered text + try: + buffer = None + with open(fname, 'rb') as f: + buffer = f.read(blocksize) + # Check for null bytes (a strong indicator of a binary file) + if not buffer or b'\0' in buffer: return 0 + text_characters = ( + b'\t\n\r\f\v' + # Whitespace characters + bytes(range(32, 127)) # Printable ASCII characters + ) + non_text_count = 0 + for byte in buffer: + if byte not in text_characters: + non_text_count += 1 # Count non-text characters + # If a significant portion of the buffer consists of non-text characters, + # it's likely a binary file. + return 1 if((non_text_count/len(buffer)) < threshhold) else 0 + except IOError: + return -1 # Handle cases where the file cannot be opened or read diff --git a/test/test_common.py b/test/test_common.py index 06053f7..4d9fe98 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -14,3 +14,14 @@ def test_common(): import rda_python_common.PgSIG import rda_python_common.PgSplit import rda_python_common.PgUtil + import rda_python_common.pgpassword + import rda_python_common.pg_cmd + import rda_python_common.pg_dbi + import rda_python_common.pg_file + import rda_python_common.pg_lock + import rda_python_common.pg_log + import rda_python_common.pg_opt + import rda_python_common.pg_sig + import rda_python_common.pg_split + import rda_python_common.pg_util + import rda_python_common.pg_pass