diff --git a/.github/workflows/blank.yml b/.github/workflows/blank.yml deleted file mode 100644 index 01502b1..0000000 --- a/.github/workflows/blank.yml +++ /dev/null @@ -1,36 +0,0 @@ -# This is a basic workflow to help you get started with Actions - -name: CI - -# Controls when the workflow will run -on: - # Triggers the workflow on push or pull request events but only for the "main" branch - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - # This workflow contains a single job called "build" - build: - # The type of runner that the job will run on - runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v4 - - # Runs a single command using the runners shell - - name: Run a one-line script - run: echo Hello, world! - - # Runs a set of commands using the runners shell - - name: Run a multi-line script - run: | - echo Add other actions to build, - echo test, and deploy your project. diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..1d51a22 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,68 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# GitHub recommends pinning actions to a commit SHA. +# To get a newer version, you will need to update the SHA. +# You can also reference a tag or branch, but the action may change without warning. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + release-build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Build release distributions + run: | + # NOTE: put your own distribution build steps here. + python -m pip install build + python -m build + + - name: Upload distributions + uses: actions/upload-artifact@v4 + with: + name: release-dists + path: dist/ + + pypi-publish: + runs-on: ubuntu-latest + + needs: + - release-build + + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write + + # Dedicated environments with protections for publishing are strongly recommended. + environment: + name: pypi + # OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status: + url: https://pypi.org/project/rda-python-common/ + + steps: + - name: Retrieve release distributions + uses: actions/download-artifact@v4 + with: + name: release-dists + path: dist/ + + - name: Publish release distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist/ diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml new file mode 100644 index 0000000..0efb7c5 --- /dev/null +++ b/.github/workflows/python-app.yml @@ -0,0 +1,36 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python application + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +permissions: + contents: read + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: "3.9" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d2c03f1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 National Center for Atmospheric Research + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/PgDBI.py b/PgDBI.py new file mode 100644 index 0000000..dee4e23 --- /dev/null +++ b/PgDBI.py @@ -0,0 +1,2225 @@ +# +############################################################################### +# +# Title : PgDBI.py -- PostgreSQL DataBase Interface +# Author : Zaihua Ji, zji@ucar.edu +# Date : 06/07/2022 +# Purpose : Python library module to handle query and manipulate PostgreSQL database +# +# Work File : $DSSHOME/lib/python/PgDBI.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### + +import os +import re +import time +from datetime import datetime +import psycopg2 as PgSQL +from psycopg2.extras import execute_values +from psycopg2.extras import execute_batch +from os import path as op +import PgLOG + +pgdb = None # reference to a connected database object +curtran = 0 # 0 - no transaction, 1 - in transaction +NMISSES = [] # array of mising userno +LMISSES = [] # array of mising logname +TABLES = {} # record table field information +SEQUENCES = {} # record table sequence fielnames +SPECIALIST = {} # hash array refrences to specialist info of dsids +SYSDOWN = {} +PGDBI = {} +ADDTBLS = [] +PGSIGNS = ['!', '<', '>', '<>'] +CHCODE = 1042 + +# hard coded db ports for dbnames +DBPORTS = { + 'default' : 0 # skip default port number 5432 +} + +# hard coded db names for given schema names +DBNAMES = { + 'ivaddb' : 'ivaddb', + 'cntldb' : 'ivaddb', + 'cdmsdb' : 'ivaddb', + 'ispddb' : 'ispddb', + 'obsua' : 'upadb', + 'default' : 'rdadb', +} + +# hard coded socket paths for machine_dbnames +DBSOCKS = { + 'default' : '', +} + +# home path for check db on alter host +VIEWHOMES = { + 'default' : PgLOG.PGLOG['DSSDBHM'] +} + +# add more to the list if used for names +PGRES = ['end', 'window'] + +# +# PostgreSQL specified query timestamp format +# +fmtyr = lambda fn: "extract(year from {})::int".format(fn) +fmtqt = lambda fn: "extract(quarter from {})::int".format(fn) +fmtmn = lambda fn: "extract(month from {})::int".format(fn) +fmtdt = lambda fn: "date({})".format(fn) +fmtym = lambda fn: "to_char({}, 'yyyy-mm')".format(fn) +fmthr = lambda fn: "extract(hour from {})::int".format(fn) + +# +# set environments and defaults +# +def SETPGDBI(name, value): + PGDBI[name] = PgLOG.get_environment(name, value) + +SETPGDBI('CDHOST', 'rda-db.ucar.edu') # common domain for db host for master server +SETPGDBI('DEFDB', 'rdadb') +SETPGDBI("DEFSC", 'dssdb') +SETPGDBI('DEFHOST', PgLOG.PGLOG['PSQLHOST']) +SETPGDBI("DEFPORT", 0) +SETPGDBI("DEFSOCK", '') +SETPGDBI("DBNAME", PGDBI['DEFDB']) +SETPGDBI("SCNAME", PGDBI['DEFSC']) +SETPGDBI("LNNAME", PGDBI['DEFSC']) +SETPGDBI("PWNAME", PGDBI['DEFSC']) +SETPGDBI("DBHOST", (os.environ['DSSDBHOST'] if os.environ.get('DSSDBHOST') else PGDBI['DEFHOST'])) +SETPGDBI("DBPORT", 0) +SETPGDBI("ERRLOG", PgLOG.LOGERR) # default error logact +SETPGDBI("EXITLG", PgLOG.LGEREX) # default exit logact +SETPGDBI("DBSOCK", '') +SETPGDBI("DATADIR", PgLOG.PGLOG['DSDHOME']) +SETPGDBI("BCKPATH", PgLOG.PGLOG['DSSDBHM'] + "/backup") +SETPGDBI("SQLPATH", PgLOG.PGLOG['DSSDBHM'] + "/sql") +SETPGDBI("VWNAME", PGDBI['DEFSC']) +SETPGDBI("VWPORT", 0) +SETPGDBI("VWSOCK", '') + +PGDBI['DBSHOST'] = PgLOG.get_short_host(PGDBI['DBHOST']) +PGDBI['DEFSHOST'] = PgLOG.get_short_host(PGDBI['DEFHOST']) +PGDBI['VWHOST'] = PgLOG.PGLOG['PVIEWHOST'] +PGDBI['VWSHOST'] = PgLOG.get_short_host(PGDBI['VWHOST']) +PGDBI['VWHOME'] = (VIEWHOMES[PgLOG.PGLOG['HOSTNAME']] if PgLOG.PGLOG['HOSTNAME'] in VIEWHOMES else VIEWHOMES['default']) +PGDBI['SCPATH'] = None # additional schema path for set search_path +PGDBI['VHSET'] = 0 +PGDBI['PGSIZE'] = 1000 # number of records for page_size +PGDBI['MTRANS'] = 5000 # max number of changes in one transactions +PGDBI['MAXICNT'] = 12000000 # maximum number of records in each table + +# +# create a pgddl command string with +# table name (tname), prefix (pre) and suffix (suf) +# +def get_pgddl_command(tname, pre = None, suf = None): + + ms = re.match(r'^(.+)\.(.+)$', tname) + if ms: + scname = ms.group(1) + tname = ms.group(2) + else: + scname = PGDBI['SCNAME'] + xy = '' + if suf: xy += ' -x ' + suf + if pre: xy += ' -y ' + pre + return "pgddl {} -aa -h {} -d {} -c {} -u {}{}".format(tname, PGDBI['DBHOST'], PGDBI['DBNAME'], scname, PGDBI['LNNAME'], xy) + +# +# set default connection for dssdb PostgreSQL Server +# +def dssdb_dbname(): + default_scinfo(PGDBI['DEFDB'], PGDBI['DEFSC'], PgLOG.PGLOG['PSQLHOST']) + +dssdb_scname = dssdb_dbname + +# +# set default connection for obsua PostgreSQL Server +# +def obsua_dbname(): + default_scinfo('upadb', 'obsua', "rda-pgdb-03.ucar.edu") + +obsua_scname = obsua_dbname + +# +# set default connection for ivaddb PostgreSQL Server +# +def ivaddb_dbname(): + default_scinfo('ivaddb', 'ivaddb', "rda-pgdb-03.ucar.edu") + +ivaddb_scname = ivaddb_dbname + +# +# set default connection for ispddb PostgreSQL Server +# +def ispddb_dbname(): + default_scinfo('ispddb', 'ispddb', "rda-pgdb-03.ucar.edu") + +ispddb_scname = ispddb_dbname + +# +# set a default schema info with hard coded info +# +def default_dbinfo(scname = None, dbhost = None, lnname = None, pwname = None, dbport = None, socket = None): + + return default_scinfo(get_dbname(scname), scname, dbhost, lnname, pwname, dbport, socket) + +# +# set default database/schema info with hard coded info +# +def default_scinfo(dbname = None, scname = None, dbhost = None, lnname = None, pwname = None, dbport = None, socket = None): + + if not dbname: dbname = PGDBI['DEFDB'] + if not scname: scname = PGDBI['DEFSC'] + if not dbhost: dbhost = PGDBI['DEFHOST'] + if dbport is None: dbport = PGDBI['DEFPORT'] + if socket is None: socket = PGDBI['DEFSOCK'] + + set_scname(dbname, scname, lnname, pwname, dbhost, dbport, socket) + +# +# get the datbase sock file name of a given dbname for local connection +# +def get_dbsock(dbname): + + return (DBSOCKS[dbname] if dbname in DBSOCKS else DBSOCKS['default']) + +# +# get the datbase port number of a given dbname for remote connection +# +def get_dbport(dbname): + + return (DBPORTS[dbname] if dbname in DBPORTS else DBPORTS['default']) + +# +# get the datbase name of a given schema name for remote connection +# +def get_dbname(scname): + + if scname: + if scname in DBNAMES: return DBNAMES[scname] + return DBNAMES['default'] + return None + +# +# set connection for viewing database information +# +def view_dbinfo(scname = None, lnname = None, pwname = None): + + return view_scinfo(get_dbname(scname), scname, lnname, pwname) + +# +# set connection for viewing database/schema information +# +def view_scinfo(dbname = None, scname = None, lnname = None, pwname = None): + + if not dbname: dbname = PGDBI['DEFDB'] + if not scname: scname = PGDBI['DEFSC'] + + set_scname(dbname, scname, lnname, pwname, PgLOG.PGLOG['PVIEWHOST'], PGDBI['VWPORT']) + +# +# set connection for given scname +# +def set_dbname(scname = None, lnname = None, pwname = None, dbhost = None, dbport = None, socket = None): + + if not scname: scname = PGDBI['DEFSC'] + return set_scname(get_dbname(scname), scname, lnname, pwname, dbhost, dbport, socket) + +# +# set connection for given database & schema names +# +def set_scname(dbname = None, scname = None, lnname = None, pwname = None, dbhost = None, dbport = None, socket = None): + + global pgdb + changed = 0 + + if dbname and dbname != PGDBI['DBNAME']: + PGDBI['DBNAME'] = dbname + changed = 1 + if scname and scname != PGDBI['SCNAME']: + PGDBI['PWNAME'] = PGDBI['LNNAME'] = PGDBI['SCNAME'] = scname + changed = 1 + if lnname and lnname != PGDBI['LNNAME']: + PGDBI['PWNAME'] = PGDBI['LNNAME'] = lnname + changed = 1 + if pwname and pwname != PGDBI['PWNAME']: + PGDBI['PWNAME'] = pwname + changed = 1 + if dbhost and dbhost != PGDBI['DBHOST']: + PGDBI['DBHOST'] = dbhost + PGDBI['DBSHOST'] = PgLOG.get_short_host(dbhost) + changed = 1 + if PGDBI['DBSHOST'] == PgLOG.PGLOG['HOSTNAME']: + if socket is None: socket = get_dbsock(dbname) + if socket != PGDBI['DBSOCK']: + PGDBI['DBSOCK'] = socket + changed = 1 + else: + if not dbport: dbport = get_dbport(dbname) + if dbport != PGDBI['DBPORT']: + PGDBI['DBPORT'] = dbport + changed = 1 + + if changed and pgdb is not None: pgdisconnect(1) + +# +# start a database transaction and exit if fails +# +def starttran(): + + global curtran + global pgdb + + if curtran == 1: endtran() # try to end previous transaction + if not pgdb: + pgconnect(0, 0, False) + else: + try: + pgdb.isolation_level + except OperationalError as e: + pgconnect(0, 0, False) + if pgdb.closed: + pgconnect(0, 0, False) + elif pgdb.autocommit: + pgdb.autocommit = False + curtran = 1 + +# +# end a transaction with changes committed and exit if fails +# +def endtran(autocommit = True): + + global curtran + global pgdb + if curtran and pgdb: + if not pgdb.closed: pgdb.commit() + pgdb.autocommit = autocommit + curtran = 0 if autocommit else 1 + +# +# end a transaction without changes committed and exit inside if fails +# +def aborttran(autocommit = True): + + global curtran + global pgdb + if curtran and pgdb: + if not pgdb.closed: pgdb.rollback() + pgdb.autocommit = autocommit + curtran = 0 if autocommit else 1 + +# +# record error message to dscheck record and clean the lock +# +def record_dscheck_error(errmsg): + + cnd = PgLOG.PGLOG['DSCHECK']['chkcnd'] + if PgLOG.PGLOG['NOQUIT']: PgLOG.PGLOG['NOQUIT'] = 0 + dflags = PgLOG.PGLOG['DSCHECK']['dflags'] + + pgrec = pgget("dscheck", "mcount, tcount, lockhost, pid", cnd, PgLOG.LGEREX) + if not pgrec: return 0 + if not pgrec['pid'] and not pgrec['lockhost']: return 0 + (chost, cpid) = PgLOG.current_process_info() + if pgrec['pid'] != cpid or pgrec['lockhost'] != chost: return 0 + + # update dscheck record only if it is still locked by the current process + record = {} + record['chktime'] = int(time.time()) + record['status'] = "E" + record['pid'] = 0 # release lock + if dflags: + record['dflags'] = dflags + record['mcount'] = pgrec['mcount'] + 1 + else: + record['dflags'] = '' + + if errmsg: + errmsg = PgLOG.break_long_string(errmsg, 512, None, 50, None, 50, 25) + if pgrec['tcount'] > 1: errmsg = "Try {}: {}".format(pgrec['tcount'], errmsg) + record['errmsg'] = errmsg + + return pgupdt("dscheck", record, cnd, PGDBI['ERRLOG']) + +# +# local function to log query error +# +def qelog(dberror, sleep, sqlstr, vals, pgcnt, logact = PGDBI['ERRLOG']): + + retry = " Sleep {}(sec) & ".format(sleep) if sleep else " " + if sqlstr: + if sqlstr.find("Retry ") == 0: + retry += "the {} ".format(PgLOG.int2order(pgcnt+1)) + elif sleep: + retry += "the {} Retry: \n".format(PgLOG.int2order(pgcnt+1)) + elif pgcnt: + retry = " Error the {} Retry: \n".format(PgLOG.int2order(pgcnt)) + else: + retry = "\n" + sqlstr = retry + sqlstr + else: + sqlstr = '' + + if vals: sqlstr += " with values: " + str(vals) + + if dberror: sqlstr = "{}\n{}".format(dberror, sqlstr) + if logact&PgLOG.EXITLG and PgLOG.PGLOG['DSCHECK']: record_dscheck_error(sqlstr) + PgLOG.pglog(sqlstr, logact) + if sleep: time.sleep(sleep) + + return PgLOG.FAILURE # if not exit in PgLOG.pglog() + +# +# try to add a new table according the table not exist error +# +def try_add_table(dberror, logact): + + ms = re.match(r'^42P01 ERROR: relation "(.+)" does not exist', dberror) + if ms: + tname = ms.group(1) + add_new_table(tname, logact = logact) + +# +# add a table for given table name +# +def add_a_table(tname, logact): + + add_new_table(tname, logact = logact) + +# +# add a new table for given table name +# +def add_new_table(tname, pre = None, suf = None, logact = 0): + + if pre: + tbname = '{}_{}'.format(pre, tname) + elif suf: + tbname = '{}_{}'.format(tname, suf) + else: + tbname = tname + if tbname in ADDTBLS: return + + PgLOG.pgsystem(get_pgddl_command(tname, pre, suf), logact) + ADDTBLS.append(tbname) + +# +# validate a table for given table name (tname), prefix (pre) and suffix (suf), +# and add it if not existing +# +def valid_table(tname, pre = None, suf = None, logact = 0): + + if pre: + tbname = '{}_{}'.format(pre, tbname) + elif suf: + tbname = '{}_{}'.format(tbname, suf) + else: + tbname = tname + if tbname in ADDTBLS: return tbname + + if not pgcheck(tbname, logact): PgLOG.pgsystem(get_pgddl_command(tname, pre, suf), logact) + ADDTBLS.append(tbname) + return tbname + +# +# local function to log query error +# +def check_dberror(pgerr, pgcnt, sqlstr, ary, logact = PGDBI['ERRLOG']): + + ret = PgLOG.FAILURE + + pgcode = pgerr.pgcode + pgerror = pgerr.pgerror + dberror = "{} {}".format(pgcode, pgerror) if pgcode and pgerror else str(pgerr) + if pgcnt < PgLOG.PGLOG['DBRETRY']: + if not pgcode: + if PGDBI['DBNAME'] == PGDBI['DEFDB'] and PGDBI['DBSHOST'] != PGDBI['DEFSHOST']: + default_dbinfo() + qelog(dberror, 0, "Retry Connecting to {} on {}".format(PGDBI['DBNAME'], PGDBI['DBHOST']), ary, pgcnt, PgLOG.MSGLOG) + else: + qelog(dberror, 5+5*pgcnt, "Retry Connecting", ary, pgcnt, PgLOG.LOGWRN) + return PgLOG.SUCCESS + elif re.match(r'^(08|57)', pgcode): + qelog(dberror, 0, "Retry Connecting", ary, pgcnt, PgLOG.LOGWRN) + pgconnect(1, pgcnt + 1) + return (PgLOG.FAILURE if not pgdb else PgLOG.SUCCESS) + elif re.match(r'^55', pgcode): # try to lock again + qelog(dberror, 10, "Retry Locking", ary, pgcnt, PgLOG.LOGWRN) + return PgLOG.SUCCESS + elif pgcode == '25P02': # try to add table + qelog(dberror, 0, "Rollback transaction", ary, pgcnt, PgLOG.LOGWRN) + pgdb.rollback() + return PgLOG.SUCCESS + elif pgcode == '42P01' and logact&PgLOG.ADDTBL: # try to add table + qelog(dberror, 0, "Retry after adding a table", ary, pgcnt, PgLOG.LOGWRN) + try_add_table(dberror, logact) + return PgLOG.SUCCESS + + if logact&PgLOG.DOLOCK and pgcode and re.match(r'^55\w\w\w$', pgcode): + logact &= ~PgLOG.EXITLG # no exit for lock error + return qelog(dberror, 0, sqlstr, ary, pgcnt, logact) + +# +# return hash reference to postgresql batch mode command and output file name +# +def pgbatch(sqlfile, foreground = 0): + +# if(PGDBI['VWHOST'] and PGDBI['VWHOME'] and +# PGDBI['DBSHOST'] == PGDBI['VWSHOST'] and PGDBI['SCNAME'] == PGDBI['VWNAME']): +# slave = "/{}/{}.slave".format(PGDBI['VWHOME'], PGDBI['VWHOST']) +# if not op.exists(slave): default_scname() + + dbhost = 'localhost' if PGDBI['DBSHOST'] == PgLOG.PGLOG['HOSTNAME'] else PGDBI['DBHOST'] + options = "-h {} -p {}".format(dbhost, PGDBI['DBPORT']) + os.environ['PGPASSWORD'] = PGDBI['PWNAME'] + options += " -U {} {}".format(PGDBI['LNNAME'], PGDBI['DBNAME']) + + if not sqlfile: return options + + if foreground: + batch = "psql {} < {} |".format(options, sqlfile) + else: + batch['out'] = sqlfile + if re.search(r'\.sql$', batch['out']): + batch['out'] = re.sub(r'\.sql$', '.out', batch['out']) + else: + batch['out'] += ".out" + + batch['cmd'] = "psql {} < {} > {} 2>&1".format(options, sqlfile , batch['out']) + + return batch + +# +# start a connection to dssdb database and return a DBI object; None if error +# force connect if connect > 0 +# +def pgconnect(reconnect = 0, pgcnt = 0, autocommit = True): + + global pgdb + + if pgdb: + if reconnect and not pgdb.closed: return pgdb # no need reconnect + elif reconnect: + reconnect = 0 # initial connection + +# if PGDBI['VWHOST'] and PGDBI['VWHOME'] and PGDBI['DBSHOST'] == PGDBI['VWSHOST'] and PGDBI['SCNAME'] == PGDBI['VWNAME']: +# slave = "/{}/{}.slave".format(PGDBI['VWHOME'], PGDBI['VWHOST']) +# if not op.exists(slave): default_scname() + + while True: + config = {'database' : PGDBI['DBNAME'], + 'user' : PGDBI['LNNAME'], + 'password' : PGDBI['PWNAME']} + if PGDBI['DBSHOST'] == PgLOG.PGLOG['HOSTNAME']: + config['host'] = 'localhost' + else: + config['host'] = PGDBI['DBHOST'] if PGDBI['DBHOST'] else PGDBI['CDHOST'] + if not PGDBI['DBPORT']: PGDBI['DBPORT'] = get_dbport(PGDBI['DBNAME']) + if PGDBI['DBPORT']: config['port'] = PGDBI['DBPORT'] + + sqlstr = "psycopg2.connect(**{})".format(config) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + try: + PgLOG.PGLOG['PGDBBUF'] = pgdb = PgSQL.connect(**config) + if reconnect: PgLOG.pglog("{} Reconnected at {}".format(sqlstr, PgLOG.current_datetime()), PgLOG.MSGLOG|PgLOG.FRCLOG) + if autocommit: pgdb.autocommit = autocommit + return pgdb + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, PGDBI['EXITLG']): return PgLOG.FAILURE + pgcnt += 1 + +# +# return a PostgreSQL cursor upon success +# +def pgcursor(): + + global pgdb + pgcur = None + + if not pgdb: + pgconnect() + if not pgdb: return PgLOG.FAILURE + + pgcnt = 0 + while True: + try: + pgcur = pgdb.cursor() + spath = "SET search_path = '{}'".format(PGDBI['SCNAME']) + if PGDBI['SCPATH'] and PGDBI['SCPATH'] != PGDBI['SCNAME']: + spath += ", '{}'".format(PGDBI['SCPATH']) + pgcur.execute(spath) + except PgSQL.Error as pgerr: + if pgcnt == 0 and pgdb.closed: + pgconnect(1) + elif not check_dberror(pgerr, pgcnt, '', None, PGDBI['EXITLG']): + return PgLOG.FAILURE + else: + break + pgcnt += 1 + + return pgcur + +# +# disconnect to dssdb database +# +def pgdisconnect(stopit = 1): + + global pgdb + if pgdb: + if stopit: pgdb.close() + PgLOG.PGLOG['PGDBBUF'] = pgdb = None + +# +# gather table field default information as hash array with field names as keys +# and default values as values +# the whole table information is cached to a hash array with table names as keys +# +def pgtable(tablename, logact = PGDBI['ERRLOG']): + + if tablename in TABLES: return TABLES[tablename].copy() # cached already + intms = r'^(smallint||bigint|integer)$' + fields = "column_name col, data_type typ, is_nullable nil, column_default def" + condition = table_condition(tablename) + pgcnt = 0 + while True: + pgrecs = pgmget('information_schema.columns', fields, condition, logact) + cnt = len(pgrecs['col']) if pgrecs else 0 + if cnt: break + if pgcnt == 0 and logact&PgLOG.ADDTBL: + add_new_table(tablename, logact = logact) + else: + return PgLOG.pglog(tablename + ": Table not exists", logact) + pgcnt += 1 + + pgdefs = {} + for i in range(cnt): + name = pgrecs['col'][i] + isint = re.match(intms, pgrecs['typ'][i]) + dflt = pgrecs['def'][i] + if dflt != None: + if re.match(r'^nextval\(', dflt): + dflt = 0 + else: + dflt = check_default_value(dflt, isint) + elif pgrecs['nil'][i] == 'YES': + dflt = None + elif isint: + dflt = 0 + else: + dflt = '' + pgdefs[name] = dflt + + TABLES[tablename] = pgdefs.copy() + return pgdefs + +# +# get sequence field name for given table name +# +def pgsequence(tablename, logact = PGDBI['ERRLOG']): + + if tablename in SEQUENCES: return SEQUENCES[tablename] # cached already + condition = table_condition(tablename) + " AND column_default LIKE 'nextval(%'" + pgrec = pgget('information_schema.columns', 'column_name', condition, logact) + seqname = pgrec['column_name'] if pgrec else None + SEQUENCES[tablename] = seqname + + return seqname + +# +# check default value for integer & string +# +def check_default_value(dflt, isint): + + if isint: + ms = re.match(r"^'{0,1}(\d+)", dflt) + if ms: dflt = int(ms.group(1)) + elif dflt[0] == "'": + ms = re.match(r"^(.+)::", dflt) + if ms: dflt = ms.group(1) + elif dflt != 'NULL': + dflt = "'{}'".format(dflt) + return dflt + +# +# local fucntion: insert prepare pgadd()/pgmadd() for given table and field names +# according to options of multiple place holds and returning sequence id +# +def prepare_insert(tablename, fields, multi = True, getid = None): + + strfld = pgnames(fields, '.', ',') + if multi: + strplc = "(" + ','.join(['%s']*len(fields)) + ")" + else: + strplc = '%s' + sqlstr = "INSERT INTO {} ({}) VALUES {}".format(tablename, strfld, strplc) + if getid: sqlstr += " RETURNING " + getid + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + + return sqlstr + +# +# local fucntion: prepare default value for single record +# +def prepare_default(tablename, record, logact = 0): + + table = pgtable(tablename, logact) + + for fld in record: + val = record[fld] + if val is None: + vlen = 0 + elif isinstance(val, str): + vlen = len(val) + else: + vlen = 1 + if vlen == 0: record[fld] = table[fld] + +# +# local fucntion: prepare default value for multiple records +# +def prepare_defaults(tablename, records, logact = 0): + + table = pgtable(tablename, logact) + + for fld in records: + vals = records[fld] + vcnt = len(vals) + for i in range(vcnt): + if vals[i] is None: + vlen = 0 + elif isinstance(vals[i], str): + vlen = len(vals[i]) + else: + vlen = 1 + if vlen == 0: records[fld][i] = table[fld] + +# +# insert one record into tablename +# tablename: add record for one table name each call +# record: hash reference with keys as field names and hash values as field values +# return PgLOG.SUCCESS or PgLOG.FAILURE +# +def pgadd(tablename, record, logact = PGDBI['ERRLOG'], getid = None): + + global curtran + if not record: return PgLOG.pglog("Nothing adds to " + tablename, logact) + if logact&PgLOG.DODFLT: prepare_default(tablename, record, logact) + if logact&PgLOG.AUTOID and not getid: getid = pgsequence(tablename, logact) + sqlstr = prepare_insert(tablename, list(record), True, getid) + values = tuple(record.values()) + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Insert: " + str(values)) + + ret = acnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + acnt = 1 + if getid: + ret = pgcur.fetchone()[0] + else: + ret = PgLOG.SUCCESS + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgadd: 1 record added to " + tablename + ", return " + str(ret)) + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += acnt + if curtran > PGDBI['MTRANS']: starttran() + + return ret + +# +# insert multiple records into tablename +# tablename: add records for one table name each call +# records: dict with field names as keys and each value is a list of field values +# return PgLOG.SUCCESS or PgLOG.FAILURE +# +def pgmadd(tablename, records, logact = PGDBI['ERRLOG'], getid = None): + + global curtran + if not records: return PgLOG.pglog("Nothing to insert to table " + tablename, logact) + if logact&PgLOG.DODFLT: prepare_defaults(tablename, records, logact) + if logact&PgLOG.AUTOID and not getid: getid = pgsequence(tablename, logact) + multi = True if getid else False + sqlstr = prepare_insert(tablename, list(records), multi, getid) + + v = records.values() + values = list(zip(*v)) + cntrow = len(values) + ids = [] if getid else None + + if PgLOG.PGLOG['DBGLEVEL']: + for row in values: PgLOG.pgdbg(1000, "Insert: " + str(row)) + + count = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + + if getid: + while count < cntrow: + record = values[count] + try: + pgcur.execute(sqlstr, record) + ids.append(pgcur.fetchone()[0]) + count += 1 + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, record, logact): return PgLOG.FAILURE + break + else: + try: + execute_values(pgcur, sqlstr, values, page_size=PGDBI['PGSIZE']) + count = cntrow + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values[0], logact): return PgLOG.FAILURE + if count >= cntrow: break + pgcnt += 1 + + pgcur.close() + if(PgLOG.PGLOG['DBGLEVEL']): PgLOG.pgdbg(1000, "pgmadd: {} of {} record(s) added to {}".format(count, cntrow, tablename)) + + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += count + if curtran > PGDBI['MTRANS']: starttran() + + return (ids if ids else count) + +# +# local function: select prepare for pgget() and pgmget() +# +def prepare_select(tablenames, fields = None, condition = None, cndflds = None, logact = 0): + + sqlstr = '' + if tablenames: + if fields: + sqlstr = "SELECT " + fields + else: + sqlstr = "SELECT count(*) cntrec" + + sqlstr += " FROM " + tablenames + if condition: + if re.match(r'^\s*(ORDER|GROUP|HAVING|OFFSET|LIMIT)\s', condition, re.I): + sqlstr += " " + condition # no where clause, append directly + else: + sqlstr += " WHERE " + condition + elif cndflds: + sep = 'WHERE' + for fld in cndflds: + sqlstr += " {} {}=%s".format(sep, fld) + sep = 'AND' + if logact&PgLOG.DOLOCK: + starttran() + sqlstr += " FOR UPDATE" + elif fields: + sqlstr = "SELECT " + fields + elif condition: + sqlstr = condition + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + + return sqlstr + +# +# tablenames: comma deliminated string of one or more tables and more than one table for joining, +# fields: comma deliminated string of one or more field names, +# condition: querry conditions for where clause +# return a dict reference with keys as field names upon success +# +def pgget(tablenames, fields, condition = None, logact = 0): + + if not logact: logact = PGDBI['ERRLOG'] + if fields and condition and not re.search(r'limit 1$', condition, re.I): condition += " LIMIT 1" + sqlstr = prepare_select(tablenames, fields, condition, None, logact) + if fields and not re.search(r'(^|\s)limit 1($|\s)', sqlstr, re.I): sqlstr += " LIMIT 1" + ucname = True if logact&PgLOG.UCNAME else False + pgcnt = 0 + record = {} + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr) + vals = pgcur.fetchone() + if vals: + colcnt = len(pgcur.description) + for i in range(colcnt): + col = pgcur.description[i] + colname = col[0].upper() if ucname else col[0] + val = vals[i] + if col[1] == CHCODE and val and val[-1] == ' ': val = val.rstrip() + record[colname] = val + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if record and tablenames and not fields: + if PgLOG.PGLOG['DBGLEVEL']: + PgLOG.pgdbg(1000, "pgget: {} record(s) found from {}".format(record['cntrec'], tablenames)) + return record['cntrec'] + elif PgLOG.PGLOG['DBGLEVEL']: + cnt = 1 if record else 0 + PgLOG.pgdbg(1000, "pgget: {} record retrieved from {}".format(cnt, tablenames)) + + return record + +# +# tablenames: comma deliminated string of one or more tables and more than one table for joining, +# fields: comma deliminated string of one or more field names, +# condition: querry conditions for where clause +# return a dict reference with keys as field names upon success, values for each field name +# are in a list. All lists are the same length with missing values set to None +# +def pgmget(tablenames, fields, condition = None, logact = PGDBI['ERRLOG']): + + sqlstr = prepare_select(tablenames, fields, condition, None, logact) + ucname = True if logact&PgLOG.UCNAME else False + count = pgcnt = 0 + records = {} + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr) + rowvals = pgcur.fetchall() + if rowvals: + colcnt = len(pgcur.description) + count = len(rowvals) + colvals = list(zip(*rowvals)) + for i in range(colcnt): + col = pgcur.description[i] + colname = col[0].upper() if ucname else col[0] + vals = list(colvals[i]) + if col[1] == CHCODE: + for j in range(count): + if vals[j] and vals[j][-1] == ' ': vals[j] = vals[j].rstrip() + records[colname] = vals + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: + PgLOG.pgdbg(1000, "pgmget: {} record(s) retrieved from {}".format(count, tablenames)) + + return records + +# +# tablenames: comma deliminated string of one or more tables +# fields: comma deliminated string of one or more field names, +# cnddict: condition dict with field names : values +# return a dict(field names : values) upon success +# +# retrieve one records from tablenames condition dict +# +def pghget(tablenames, fields, cnddict, logact = PGDBI['ERRLOG']): + + if not tablenames: return PgLOG.pglog("Miss Table name to query", logact) + if not fields: return PgLOG.pglog("Nothing to query " + tablenames, logact) + if not cnddict: return PgLOG.pglog("Miss condition dict values to query " + tablenames, logact) + sqlstr = prepare_select(tablenames, fields, None, list(cnddict), logact) + if fields and not re.search(r'limit 1$', sqlstr, re.I): sqlstr += " LIMIT 1" + ucname = True if logact&PgLOG.UCNAME else False + + values = tuple(cnddict.values()) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Query from {} for {}".format(tablenames, values)) + + pgcnt = 0 + record = {} + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + vals = pgcur.fetchone() + if vals: + colcnt = len(pgcur.description) + for i in range(colcnt): + col = pgcur.description[i] + colname = col[0].upper() if ucname else col[0] + val = vals[i] + if col[1] == CHCODE and val and val[-1] == ' ': val = val.rstrip() + record[colname] = val + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if record and tablenames and not fields: + if PgLOG.PGLOG['DBGLEVEL']: + PgLOG.pgdbg(1000, "pghget: {} record(s) found from {}".format(record['cntrec'], tablenames)) + return record['cntrec'] + elif PgLOG.PGLOG['DBGLEVEL']: + cnt = 1 if record else 0 + PgLOG.pgdbg(1000, "pghget: {} record retrieved from {}".format(cnt, tablenames)) + + return record + +# +# tablenames: comma deliminated string of one or more tables +# fields: comma deliminated string of one or more field names, +# cnddicts: condition dict with field names : value lists +# return a dict(field names : value lists) upon success +# +# retrieve multiple records from tablenames for condition dict +# +def pgmhget(tablenames, fields, cnddicts, logact = PGDBI['ERRLOG']): + + if not tablenames: return PgLOG.pglog("Miss Table name to query", logact) + if not fields: return PgLOG.pglog("Nothing to query " + tablenames, logact) + if not cnddicts: return PgLOG.pglog("Miss condition dict values to query " + tablenames, logact) + sqlstr = prepare_select(tablenames, fields, None, list(cnddicts), logact) + ucname = True if logact&PgLOG.UCNAME else False + + v = cnddicts.values() + values = list(zip(*v)) + cndcnt = len(values) + + if PgLOG.PGLOG['DBGLEVEL']: + for row in values: + PgLOG.pgdbg(1000, "Query from {} for {}".format(tablenames, row)) + + colcnt = ccnt = count = pgcnt = 0 + cols = [] + chrs = [] + records = {} + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + while ccnt < cndcnt: + cndvals = values[ccnt] + try: + pgcur.execute(sqlstr, cndvals) + ccnt += 1 + rowvals = pgcur.fetchall() + if rowvals: + if colcnt == 0: + for col in pgcur.description: + colname = col[0].upper() if ucname else col[0] + if col[1] == CHCODE: chrs.append(colname) + cols.append(colname) + records[colname] = [] + colcnt = len(cols) + rcnt = len(rowvals) + count += rcnt + colvals = list(zip(*rowvals)) + for i in range(colcnt): + vals = list(colvals[i]) + colname = cols[i] + if chrs and colname in chrs: + for j in range(rcnt): + if vals[j] and vals[j][-1] == ' ': vals[j] = vals[j].rstrip() + records[colname].extend(vals) + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, cndvals, logact): return PgLOG.FAILURE + break + if ccnt >= cndcnt: break + pgcnt += 1 + pgcur.close() + + if PgLOG.PGLOG['DBGLEVEL']: + PgLOG.pgdbg(1000, "pgmhget: {} record(s) retrieved from {}".format(count, tablenames)) + + return records + +# +# local fucntion: update prepare for pgupdt, pghupdt and pgmupdt +# +def prepare_update(tablename, fields, condition = None, cndflds = None): + + strset = [] + # build set string + for fld in fields: + strset.append("{}=%s".format(pgname(fld, '.'))) + strflds = ",".join(strset) + + # build condition string + if not condition: + cndset = [] + for fld in cndflds: + cndset.append("{}=%s".format(pgname(fld, '.'))) + condition = " AND ".join(cndset) + + sqlstr = "UPDATE {} SET {} WHERE {}".format(tablename, strflds, condition) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + + return sqlstr + +# +# update one or multiple rows in tablename +# tablename: update for one table name each call +# record: dict with field names : values +# condition: update conditions for where clause) +# return number of rows undated upon success +# +def pgupdt(tablename, record, condition, logact = PGDBI['ERRLOG']): + + global curtran + if not record: PgLOG.pglog("Nothing updates to " + tablename, logact) + if not condition or isinstance(condition, int): PgLOG.pglog("Miss condition to update " + tablename, logact) + sqlstr = prepare_update(tablename, list(record), condition) + if logact&PgLOG.DODFLT: prepare_default(tablename, record, logact) + + values = tuple(record.values()) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Update {} for {}".format(tablename, values)) + + ucnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + ucnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgupdt: {} record(s) updated to {}".format(ucnt, tablename)) + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += ucnt + if curtran > PGDBI['MTRANS']: starttran() + + return ucnt + +# +# update one or multiple records in tablename +# tablename: update for one table name each call +# record: update values, dict with field names : values +# cnddict: condition dict with field names : values +# return number of records updated upon success +# +def pghupdt(tablename, record, cnddict, logact = PGDBI['ERRLOG']): + + global curtran + if not record: PgLOG.pglog("Nothing updates to " + tablename, logact) + if not cnddict or isinstance(cnddict, int): PgLOG.pglog("Miss condition to update to " + tablename, logact) + if logact&PgLOG.DODFLT: prepare_defaults(tablename, record, logact) + sqlstr = prepare_update(tablename, list(record), None, list(cnddict)) + + values = tuple(record.values()) + tuple(cnddict.values()) + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Update {} for {}".format(tablename, values)) + + ucnt = count = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + count += 1 + ucnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgmupdt: {}/{} record(s) updated to {}".format(count, cntrow, tablename)) + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += ucnt + if curtran > PGDBI['MTRANS']: starttran() + + return ucnt + +# +# update multiple records in tablename +# tablename: update for one table name each call +# records: update values, dict with field names : value lists +# cnddicts: condition dict with field names : value lists +# return number of records updated upon success +# +def pgmupdt(tablename, records, cnddicts, logact = PGDBI['ERRLOG']): + + global curtran + if not records: PgLOG.pglog("Nothing updates to " + tablename, logact) + if not cnddicts or isinstance(cnddicts, int): PgLOG.pglog("Miss condition to update to " + tablename, logact) + if logact&PgLOG.DODFLT: prepare_defaults(tablename, records, logact) + sqlstr = prepare_update(tablename, list(records), None, list(cnddicts)) + + fldvals = tuple(records.values()) + cntrow = len(fldvals[0]) + cndvals = tuple(cnddicts.values()) + cntcnd = len(cndvals[0]) + if cntcnd != cntrow: return PgLOG.pglog("Field/Condition value counts Miss match {}/{} to update {}".format(cntrow, cntcnd, tablename), logact) + v = fldvals + cndvals + values = list(zip(*v)) + + if PgLOG.PGLOG['DBGLEVEL']: + for row in values: PgLOG.pgdbg(1000, "Update {} for {}".format(tablename, row)) + + ucnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + execute_batch(pgcur, sqlstr, values, page_size=PGDBI['PGSIZE']) + ucnt = cntrow + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values[0], logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + pgcur.close() + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgmupdt: {}/{} record(s) updated to {}".format(ucnt, cntrow, tablename)) + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += ucnt + if curtran > PGDBI['MTRANS']: starttran() + + return ucnt + +# +# local fucntion: delete prepare for pgdel, pghdel and del +# +def prepare_delete(tablename, condition = None, cndflds = None): + + # build condition string + if not condition: + cndset = [] + for fld in cndflds: + cndset.append("{}=%s".format(fld)) + condition = " AND ".join(cndset) + + sqlstr = "DELETE FROM {} WHERE {}".format(tablename, condition) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + + return sqlstr + +# +# delete one or mutiple records in tablename according condition +# tablename: delete for one table name each call +# condition: delete conditions for where clause +# return number of records deleted upon success +# +def pgdel(tablename, condition, logact = PGDBI['ERRLOG']): + + global curtran + if not condition or isinstance(condition, int): PgLOG.pglog("Miss condition to delete from " + tablename, logact) + sqlstr = prepare_delete(tablename, condition) + + dcnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr) + dcnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgdel: {} record(s) deleted from {}".format(dcnt, tablename)) + if logact&PgLOG.ENDLCK: + endtran() + elif curtran: + curtran += dcnt + if curtran > PGDBI['MTRANS']: starttran() + + return dcnt + +# +# delete one or mutiple records in tablename according condition +# tablename: delete for one table name each call +# cndict: delete condition dict for names : values +# return number of records deleted upon success +# +def pghdel(tablename, cnddict, logact = PGDBI['ERRLOG']): + + global curtran + if not cnddict or isinstance(cnddict, int): PgLOG.pglog("Miss condition dict to delete from " + tablename, logact) + sqlstr = prepare_delete(tablename, None, list(cnddict)) + + values = tuple(cnddicts.values()) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Delete from {} for {}".format(tablename, values)) + + dcnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + dcnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pghdel: {} record(s) deleted from {}".format(dcnt, tablename)) + if logact&PgLOG.ENDLCK: + endtran() + elif curtran: + curtran += dcnt + if curtran > PGDBI['MTRANS']: starttran() + + return dcnt + +# +# delete mutiple records in tablename according condition +# tablename: delete for one table name each call +# cndicts: delete condition dict for names : value lists +# return number of records deleted upon success +# +def pgmdel(tablename, cnddicts, logact = PGDBI['ERRLOG']): + + global curtran + if not cnddicts or isinstance(cnddicts, int): PgLOG.pglog("Miss condition dict to delete from " + tablename, logact) + sqlstr = prepare_delete(tablename, None, list(cnddicts)) + + v = cnddicts.values() + values = list(zip(*v)) + if PgLOG.PGLOG['DBGLEVEL']: + for row in values: + PgLOG.pgdbg(1000, "Delete from {} for {}".format(tablenames, row)) + + dcnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + execute_batch(pgcur, sqlstr, values, page_size=PGDBI['PGSIZE']) + dcnt = len(values) + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values[0], logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + pgcur.close() + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgmdel: {} record(s) deleted from {}".format(dcnt, tablename)) + if logact&PgLOG.ENDLCK: + endtran() + elif curtran: + curtran += dcnt + if curtran > PGDBI['MTRANS']: starttran() + + return dcnt + +# +# sqlstr: a complete sql string +# return number of record affected upon success +# +def pgexec(sqlstr, logact = PGDBI['ERRLOG']): + + global curtran + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(100, sqlstr) + + ret = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr) + ret = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgexec: {} record(s) affected for {}".format(ret, sqlstr)) + if logact&PgLOG.ENDLCK: + endtran() + elif curtran: + curtran += ret + if curtran > PGDBI['MTRANS']: starttran() + + return ret + +# +# tablename: one table name to a temporary table +# fromtable: table name data gathing from +# fields: table name data gathing from +# condition: querry conditions for where clause +# return number of records created upon success +# +def pgtemp(tablename, fromtable, fields, condition = None, logact = 0): + + sqlstr = "CREATE TEMPORARY TABLE {} SELECT {} FROM {}".format(tablename, fields, fromtable) + if condition: sqlstr += " WHERE " + condition + + return pgexec(sqlstr, logact) + +# +# get condition for given table name for accessing information_schema +# +def table_condition(tablename): + + ms = re.match(r'(.+)\.(.+)', tablename) + if ms: + scname = ms.group(1) + tbname = ms.group(2) + else: + scname = PGDBI['SCNAME'] + tbname = tablename + + return "table_schema = '{}' AND table_name = '{}'".format(scname, tbname) + +# +# check if a given table name exists or not +# tablename: one table name to check +# +def pgcheck(tablename, logact = 0): + + condition = table_condition(tablename) + + ret = pgget('information_schema.tables', None, condition, logact) + return (PgLOG.SUCCESS if ret else PgLOG.FAILURE) + +# +# group of functions to check parent records and add an empty one if missed +# return user.uid upon success, 0 otherwise +# +def check_user_uid(userno, date = None): + + if not userno: return 0 + if tyep(userno) is str: userno = int(userno) + + if date is None: + datecond = "until_date IS NULL" + date = 'today' + else: + datecond = "(start_date IS NULL OR start_date <= '{}') AND (until_date IS NULL OR until_date >= '{}')".format(date, date) + + pgrec = pgget("dssdb.user", "uid", "userno = {} AND {}".format(userno, datecond), PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + + if userno not in NMISSES: + PgLOG.pglog("{}: Scientist ID NOT on file for {}".format(userno, date), PgLOG.LGWNEM) + NMISSES.append(userno) + + # check again if a user is on file with different date range + pgrec = pgget("dssdb.user", "uid", "userno = {}".format(userno), PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + + pgrec = ucar_user_info(userno) + if not pgrec: pgrec = {'userno' : userno, 'stat_flag' : 'M'} + uid = pgadd("dssdb.user", pgrec, (PGDBI['EXITLG']|PgLOG.AUTOID)) + if uid: PgLOG.pglog("{}: Scientist ID Added as user.uid = {}".format(useno, uid), PgLOG.LGWNEM) + + return uid + +# +# return user.uid upon success, 0 otherwise +# +def get_user_uid(logname, date = None): + + if not logname: return 0 + if not date: + date = 'today' + datecond = "until_date IS NULL" + else: + datecond = "(start_date IS NULL OR start_date <= '{}') AND (until_date IS NULL OR until_date >= '{}')".format(date, date) + + pgrec = pgget("dssdb.user", "uid", "logname = '{}' AND {}".format(logname, datecond), PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + + if logname not in LMISSES: + PgLOG.pglog("{}: UCAR Login Name NOT on file for {}".format(logname, date), PgLOG.LGWNEM) + LMISSES.append(logname) + + # check again if a user is on file with different date range + pgrec = pgget("dssdb.user", "uid", "logname = '{}'".format(logname), PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + + pgrec = ucar_user_info(0, logname) + if not pgrec: pgrec = {'logname' : logname, 'stat_flag' : 'M'} + uid = pgadd("dssdb.user", pgrec, (PGDBI['EXITLG']|PgLOG.AUTOID)) + if uid: PgLOG.pglog("{}: UCAR Login Name Added as user.uid = {}".format(logname, uid), PgLOG.LGWNEM) + + return uid + +# +# get ucar user info for given userno (scientist number) or logname (Ucar login) +# +def ucar_user_info(userno, logname = None): + + MATCH = { + 'upid' : "upid", + 'uid' : "userno", + 'username' : "logname", + 'lastName' : "lstname", + 'firstName' : "fstname", + 'active' : "stat_flag", + 'internalOrg' : "division", + 'externalOrg' : "org_name", + 'country' : "country", + 'forwardEmail' : "email", + 'email' : "ucaremail", + 'phone' : "phoneno" + } + + buf = PgLOG.pgsystem("pgperson " + ("-uid {}".format(userno) if userno else "-username {}".format(logname)), PgLOG.LOGWRN, 20) + if not buf: return None + + pgrec = {} + for line in buf.split('\n'): + ms = re.match(r'^(.+)<=>(.*)$', line) + if ms: + (key, val) = ms.groups() + if key in MATCH: + if key == 'upid' and 'upid' in pgrec: break # get one record only + pgrec[MATCH[key]] = val + + if not pgrec: return None + + if userno: + pgrec['userno'] = userno + elif pgrec['userno']: + pgrec['userno'] = userno = int(pgrec['userno']) + if pgrec['upid']: pgrec['upid'] = int(pgrec['upid']) + if pgrec['stat_flag']: pgrec['stat_flag'] = 'A' if pgrec['stat_flag'] == '1' else 'C' + if pgrec['email'] and re.search(r'(@|\.)ucar\.edu$', pgrec['email'], re.I): + pgrec['email'] = pgrec['ucaremail'] + pgrec['org_name'] = 'NCAR' + country = pgrec['country'] if 'country' in pgrec else None + pgrec['country'] = set_country_code(pgrec['email'], country) + if pgrec['division']: + val = "NCAR" + else: + val = None + pgrec['org_type'] = get_org_type(val, pgrec['email']) + + buf = PgLOG.pgsystem("pgusername {}".format(pgrec['logname']), PgLOG.LOGWRN, 20) + if not buf: return pgrec + + for line in buf.split('\n'): + ms = re.match(r'^(.+)<=>(.*)$', line) + if ms: + (key, val) = ms.groups() + if key == 'startDate': + m = re.match(r'^(\d+-\d+-\d+)\s', val) + if m: + pgrec['start_date'] = m.group(1) + else: + pgrec['start_date'] = val + + if key == 'endDate': + m = re.match(r'^(\d+-\d+-\d+)\s', val) + if m: + pgrec['until_date'] = m.group(1) + else: + pgrec['until_date'] = val + + return pgrec + +# +# set country code for given coutry name or email address +# +def set_country_code(email, country = None): + + codes = { + 'CHINA' : "P.R.CHINA", + 'ENGLAND' : "UNITED.KINGDOM", + 'FR' : "FRANCE", + 'KOREA' : "SOUTH.KOREA", + 'USSR' : "RUSSIA", + 'US' : "UNITED.STATES", + 'U.S.A.' : "UNITED.STATES" + } + + if country: + country = country.upper() + ms = re.match(r'^(\w+)\s(\w+)$', country) + if ms: + country = ms.group(1) + '.' + ms.group(2) + elif country in codes: + country = codes[country] + else: + country = email_to_country(email) + + return country + +# return wuser.wuid upon success, 0 otherwise +def check_wuser_wuid(email, date = None): + + if not email: return 0 + emcond = "email = '{}'".format(email) + if not date: + date = 'today' + datecond = "until_date IS NULL" + else: + datecond = "(start_date IS NULL OR start_date <= '{}') AND (until_date IS NULL OR until_date >= '{}')".format(date, date) + + pgrec = pgget("wuser", "wuid", "{} AND {}".format(emcond, datecond), PGDBI['ERRLOG']) + if pgrec: return pgrec['wuid'] + + # check again if a user is on file with different date range + pgrec = pgget("wuser", "wuid", emcond, PgLOG.LOGERR) + if pgrec: return pgrec['wuid'] + + # now add one in + record = {'email' : email} + # check again if a ruser is on file + pgrec = pgget("ruser", "*", emcond + " AND end_date IS NULL", PGDBI['ERRLOG']) + if not pgrec: pgrec = pgget("ruser", "*", emcond, PGDBI['ERRLOG']) + + if pgrec: + record['ruid'] = pgrec['id'] + record['fstname'] = pgrec['fname'] + record['lstname'] = pgrec['lname'] + record['country'] = pgrec['country'] + record['org_type'] = get_org_type(pgrec['org_type'], pgrec['email']) + record['start_date'] = str(pgrec['rdate']) + if pgrec['end_date']: + record['until_date'] = str(pgrec['end_date']) + record['stat_flag'] = 'C' + else: + record['stat_flag'] = 'A' + + if pgrec['title']: record['utitle'] = pgrec['title'] + if pgrec['mname']: record['midinit'] = pgrec['mname'][0] + if pgrec['org']: record['org_name'] = pgrec['org'] + else: + record['stat_flag'] = 'M' + record['org_type'] = get_org_type('', email) + record['country'] = email_to_country(email) + + wuid = pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID) + if wuid: + if pgrec: + PgLOG.pglog("{}({}, {}) Added as wuid({})".format(email, pgrec['lname'], pgrec['fname'], wuid), PgLOG.LGWNEM) + else: + PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM) + return wuid + + return 0 + +# return wuser.wuid upon success, 0 otherwise +def check_cdp_wuser(username): + + pgrec = pgget("wuser", "wuid", "cdpname = '{}'".format(username), PGDBI['EXITLG']) + if pgrec: return pgrec['wuid'] + + # missing wuser record add one in + pgrec = get_cdp_user(None, None, username) + if not pgrec: + if username not in LMISSES: + PgLOG.pglog("Missing CDP User '{}'".format(username), PgLOG.LGWNEM) + LMISSES['username'] = 1 + return 0 + + idrec = pgget("wuser", "wuid", "email = '{}'".format(pgrec['email']), PGDBI['EXITLG']) + wuid = idrec['wuid'] if idrec else 0 + if wuid > 0: + idrec = {} + idrec['cdpid'] = pgrec['cdpid'] + idrec['cdpname'] = pgrec['cdpname'] + pgupdt("wuser", idrec, "wuid = {}".format(wuid) , PGDBI['EXITLG']) + else: + pgrec['stat_flag'] = 'A' + pgrec['org_type'] = get_org_type(pgrec['org_type'], pgrec['email']) + pgrec['country'] = email_to_country(pgrec['email']) + wuid = pgadd("wuser", pgrec, PGDBI['EXITLG']|PgLOG.AUTOID) + if wuid > 0: + PgLOG.pglog("CDP User {} added as wuid = {} in RDADB".format(username, wuid), PgLOG.LGWNEM) + + return wuid + +# +# for given email to get long country name +# +def email_to_country(email): + + ms = re.search(r'\.(\w\w)$', email) + if ms: + pgrec = pgget("countries", "token", "domain_id = '{}'".format(ms.group(1)), PGDBI['EXITLG']) + if pgrec: return pgrec['token'] + elif re.search(r'\.(gov|edu|mil|org|com|net)$', email): + return "UNITED.STATES" + else: + return "UNKNOWN" + +# +# if filelists is published for given dataset, reset it to 'P' +# +def reset_rdadb_version(dsid): + + pgexec("UPDATE dataset SET version = version + 1 WHERE dsid = '{}'".format(dsid), PGDBI['ERRLOG']) + +# +# check the use rdadb flag in table dataset for a given dataset and given values +# +def use_rdadb(dsid, logact = 0, vals = None): + + ret = '' # default to empty in case dataset not in RDADB + if dsid: + pgrec = pgget("dataset", "use_rdadb", "dsid = '{}'".format(dsid), PGDBI['EXITLG']) + if pgrec: + ret = 'N' # default to 'N' if dataset record in RDADB already + if pgrec['use_rdadb']: + if not vals: vals = "IPYMW" # default to Internal; Publishable; Yes RDADB + if vals.find(pgrec['use_rdadb']) > -1: + ret = pgrec['use_rdadb'] + elif logact: + PgLOG.pglog("Dataset '{}' is not in RDADB!".format(dsid), logact) + + return ret + +# +# fld: field name for querry condition +# vals: reference to aaray of values +# isstr: 1 for string values requires quotes and support wildcard +# noand: 1 for skiping the leading ' AND ' for condition +# return a condition string for a given field +# +def get_field_condition(fld, vals, isstr = 0, noand = 0): + + cnd = wcnd = negative = '' + sign = "=" + logic = " OR " + count = len(vals) if vals else 0 + if count == 0: return '' + ncnt = scnt = wcnt = cnt = 0 + for i in range(count): + val = vals[i] + if val is None or (i > 0 and val == vals[i-1]): continue + if i == 0 and val == PGSIGNS[0]: + negative = "NOT " + logic = " AND " + continue + if scnt == 0 and isinstance(val, str): + ms = re.match(r'^({})$'.format('|'.join(PGSIGNS[1:])), val) + if ms: + osign = sign = ms.group(1) + scnt += 1 + if sign == "<>": + scnt += 1 + sign = negative + "BETWEEN" + elif negative: + sign = "<=" if (sign == ">") else ">=" + continue + if isstr: + if not isinstance(val, str): val = str(val) + if sign == "=": + if not val: + ncnt += 1 # found null string + elif val.find('%') > -1: + sign = negative + "LIKE" + elif re.search(r'[\[\(\?\.]', val): + sign = negative + "SIMILAR TO" + if val.find("'") != 0: + val = "'{}'".format(val) + elif isinstance(val, str): + if val.find('.') > -1: + val = float(val) + else: + val = int(val) + if sign == "=": + if cnt > 0: cnd += ", " + cnd += str(val) + cnt += 1 + else: + if sign == "AND": + wcnd += " {} {}".format(sign, val) + else: + if wcnt > 0: wcnd += logic + wcnd += "{} {} {}".format(fld, sign, val) + wcnt += 1 + if re.search(r'BETWEEN$', sign): + sign = "AND" + else: + sign = "=" + scnt = 0 + + if scnt > 0: + s = 's' if scnt > 1 else '' + PgLOG.pglog("Need {} value{} after sign '{}'".format(scnt, s, osign), PgLOG.LGEREX) + if wcnt > 1: wcnd = "({})".format(wcnd) + if cnt > 0: + if cnt > 1: + cnd = "{} {}IN ({})".format(fld, negative, cnd) + else: + cnd = "{} {} {}".format(fld, ("<>" if negative else "="), cnd) + if ncnt > 0: + ncnd = "{} IS {}NULL".format(fld, negative) + cnd = "({}{}{})".format(cnd, logic, ncnd) + if wcnt > 0: cnd = "({}{}{})".format(cnd, logic, wcnd) + elif wcnt > 0: + cnd = wcnd + if cnd and not noand: cnd = " AND " + cnd + + return cnd + +# +# build up fieldname string for given or default condition +# +def fieldname_string(fnames, dnames = None, anames = None, wflds = None): + + if not fnames: + fnames = dnames # include default fields names + elif re.match(r'^all$', fnames, re.I): + fnames = anames # include all field names + + if not wflds: return fnames + + for wfld in wflds: + if not wfld or fnames.find(wfld) > -1: continue # empty field, or included already + if wfld == "Q": + pos = fnames.find("R") # request name + elif wfld == "Y": + pos = fnames.find("X") # parent group name + elif wfld == "G": + pos = fnames.find("I") # group name + else: + pos = -1 # prepend other with-field names + + if pos == -1: + fnames = wfld + fnames # prepend with-field + else: + fnames = fnames[0:pos] + wfld + fnames[pos:] # insert with-field + + return fnames + +# +# Function get_group_field_path(gindex: group index +# dsid: dataset id +# field: path field name: webpath or savedpath) +# go through group tree upward to find a none-empty path, return it or null +# +def get_group_field_path(gindex, dsid, field): + + if gindex: + pgrec = pgget("dsgroup", "pindex, {}".format(field), + "dsid = '{}' AND gindex = {}".format(dsid, gindex), PGDBI['EXITLG']) + else: + pgrec = pgget("dataset", field, + "dsid = '{}'".format(dsid), PGDBI['EXITLG']) + if pgrec: + if pgrec[field]: + return pgrec[field] + elif gindex: + return get_group_field_path(pgrec['pindex'], dsid, field) + else: + return None + +# +# get the specialist info for a given dataset +# +def get_specialist(dsid, logact = PGDBI['ERRLOG']): + + if dsid in SPECIALIST: return SPECIALIST['dsid'] + + pgrec = pgget("dsowner, dssgrp", "specialist, lstname, fstname", + "specialist = logname AND dsid = '{}' AND priority = 1".format(dsid), logact) + if pgrec: + if pgrec['specialist'] == "datahelp" or pgrec['specialist'] == "dss": + pgrec['lstname'] = "Help" + pgrec['fstname'] = "Data" + else: + pgrec['specialist'] = "datahelp" + pgrec['lstname'] = "Help" + pgrec['fstname'] = "Data" + + SPECIALIST['dsid'] = pgrec # cache specialist info for dsowner of dsid + return pgrec + +# +# build customized email from get_email() +# +def build_customized_email(table, field, condition, subject, logact = 0): + + msg = PgLOG.get_email() + + if not msg: return PgLOG.FAILURE + + sender = PgLOG.PGLOG['CURUID'] + "@ucar.edu" + receiver = PgLOG.PGLOG['EMLADDR'] if PgLOG.PGLOG['EMLADDR'] else (PgLOG.PGLOG['CURUID'] + "@ucar.edu") + if receiver.find(sender) < 0: PgLOG.add_carbon_copy(sender, 1) + ebuf = "From: {}\nTo: {}\n".format(sender, receiver) + if PgLOG.PGLOG['CCDADDR']: ebuf += "Cc: {}\n".format(PgLOG.PGLOG['CCDADDR']) + if not subject: subject = "Message from {}-{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.get_command()) + ebuf += "Subject: {}!\n\n{}\n".format(subject, msg) + + estat = cache_customized_email(table, field, condition, ebuf, logact) + if estat and logact: + PgLOG.pglog("Email {} cached to '{}.{}' for {}, Subject: {}".format(receiver, table, field, condition, subject), logact) + + return estat + +# +# email: full user email address +# +# get user real name from table ruser for a given email address +# opts == 1 : include email +# opts == 2 : include org_type +# opts == 4 : include country +# opts == 8 : include valid_email +# opts == 16 : include org +# +def get_ruser_names(email, opts = 0, date = None): + + fields = "lname lstname, fname fstname" + + if opts&1: fields += ", email" + if opts&2: fields += ", org_type" + if opts&4: fields += ", country" + if opts&8: fields += ", valid_email" + if opts&16: fields += ", org" + + if date: + datecond = "rdate <= '{}' AND (end_date IS NULL OR end_date >= '{}')".format(date, date) + else: + datecond = "end_date IS NULL" + date = time.strftime("%Y-%m-%d", (time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime())) + emcnd = "email = '{}'".format(email) + pgrec = pgget("ruser", fields, "{} AND {}".format(emcnd, datecond), PgLOG.LGEREX) + if not pgrec: # missing user record add one in + PgLOG.pglog("{}: email not in ruser for {}".format(email, date), PgLOG.LOGWRN) + # check again if a user is on file with different date range + pgrec = pgget("ruser", fields, emcnd, PgLOG.LGEREX) + if not pgrec and pgget("dssdb.user", '', emcnd): + fields = "lstname, fstname" + if opts&1: fields += ", email" + if opts&2: fields += ", org_type" + if opts&4: fields += ", country" + if opts&8: fields += ", email valid_email" + if opts&16: fields += ", org_name org" + pgrec = pgget("dssdb.user", fields, emcnd, PgLOG.LGEREX) + + if pgrec and pgrec['lstname']: + pgrec['name'] = (pgrec['fstname'].capitalize() + ' ') if pgrec['fstname'] else '' + pgrec['name'] += pgrec['lstname'].capitalize() + else: + if not pgrec: pgrec = {} + pgrec['name'] = email.split('@')[0] + if opts&1: pgrec['email'] = email + + return pgrec + +# +# cache a customized email for sending it later +# +def cache_customized_email(table, field, condition, emlmsg, logact = 0): + + pgrec = {field: emlmsg} + if pgupdt(table, pgrec, condition, logact|PgLOG.ERRLOG): + if logact: PgLOG.pglog("Email cached to '{}.{}' for {}".format(table, field, condition), logact&(~PgLOG.EXITLG)) + return PgLOG.SUCCESS + else: + msg = "cache email to '{}.{}' for {}".format(table, field, condition) + PgLOG.pglog("Error msg, try to send directly now", logact|PgLOG.ERRLOG) + return PgLOG.send_customized_email(msg, emlmsg, logact) + +# +# otype: user organization type +# email: user email address) +# +# return: orgonizaion type like DSS, NCAR, UNIV... +# +def get_org_type(otype, email): + + if not otype: otype = "OTHER" + if email: + ms = re.search(r'(@|\.)ucar\.edu$', email) + if ms: + mc = ms.group(1) + if otype == 'UCAR' or otype == 'OTHER': otype = 'NCAR' + if otype == 'NCAR' and mc == '@': + ms = re.match(r'^(.+)@', email) + if ms and pgget("dssgrp", "", "logname = '{}'".format(ms.group(1))): otype = 'DSS' + else: + ms = re.search(r'\.(mil|org|gov|edu|com|net)(\.\w\w|$)', email) + if ms: + otype = ms.group(1).upper() + if otype == 'EDU': otype = "UNIV" + + return otype + +# +# join values and handle the null values +# +def join_values(vstr, vals): + + if vstr: + vstr += "\n" + elif vstr is None: + vstr = '' + + return "{}Value{}({})".format(vstr, ('s' if len(vals) > 1 else ''), ', '.join(map(str, vals))) + +# +# check table hostname to find the system down times. Cache the result for 10 minutes +# +def get_system_downs(hostname, logact = 0): + + curtime = int(time.time()) + newhost = 0 + + if hostname not in SYSDOWN: + SYSDOWN[hostname] = {} + newhost = 1 + if newhost or (curtime - SYSDOWN[hostname]['chktime']) > 600: + SYSDOWN[hostname]['chktime'] = curtime + SYSDOWN[hostname]['start'] = 0 + SYSDOWN[hostname]['end'] = 0 + SYSDOWN[hostname]['active'] = 1 + SYSDOWN[hostname]['path'] = None + + pgrec = pgget('hostname', 'service, domain, downstart, downend', + "hostname = '{}'".format(hostname), logact) + if pgrec: + if pgrec['service'] == 'N': + SYSDOWN[hostname]['start'] = curtime + SYSDOWN[hostname]['active'] = 0 + else: + start = int(datetime.timestamp(pgrec['downstart'])) if pgrec['downstart'] else 0 + end = int(datetime.timestamp(pgrec['downend'])) if pgrec['downend'] else 0 + if start > 0 and (end == 0 or end > curtime): + SYSDOWN[hostname]['start'] = start + SYSDOWN[hostname]['end'] = end + if pgrec['service'] == 'S' and pgrec['domain'] and re.match(r'^/', pgrec['domain']): + SYSDOWN[hostname]['path'] = pgrec['domain'] + + SYSDOWN[hostname]['curtime'] = curtime + + return SYSDOWN[hostname] + +# +# return seconds for how long the system will continue to be down +# +def system_down_time(hostname, offset, logact = 0): + + down = get_system_downs(hostname, logact) + if down['start'] and down['curtime'] >= (down['start'] - offset): + if not down['end']: + if PgLOG.PGLOG['PGBATCH'] == PgLOG.PGLOG['PBSNAME']: + return PgLOG.PGLOG['PBSTIME'] + elif down['curtime'] <= down['end']: + return (down['end'] - down['curtime']) + + return 0 # the system is not down + +# +# return string message if the system is down +# +def system_down_message(hostname, path, offset, logact = 0): + + down = get_system_downs(hostname, logact) + msg = None + if down['start'] and down['curtime'] >= (down['start'] - offset): + match = match_down_path(path, down['path']) + if match: + msg = "{}{}:".format(hostname, ('-' + path) if match > 0 else '') + if not down['active']: + msg += " Not in Service" + else: + msg += " Planned down, started at " + PgLOG.current_datetime(down['start']) + if not down['end']: + msg += " And no end time specified" + elif down['curtime'] <= down['end']: + msg = " And will end by " + PgLOG.current_datetime(down['end']) + + return msg + +# +# return 1 if given path match daemon paths, 0 if not; -1 if cannot compare +# +def match_down_path(path, dpaths): + + if not (path and dpaths): return -1 + + paths = re.split(':', dpaths) + + for p in paths: + if re.match(r'^{}'.format(p), path): return 1 + + return 0 + +# validate is login user is in DECS group +# check all node if skpdsg is false, otherwise check non-DSG nodes +def validate_decs_group(cmdname, logname, skpdsg): + + if skpdsg and PgLOG.PGLOG['DSGHOSTS'] and re.search(r'(^|:){}'.format(PgLOG.PGLOG['HOSTNAME']), PgLOG.PGLOG['DSGHOSTS']): return + if not logname: lgname = PgLOG.PGLOG['CURUID'] + + if not pgget("dssgrp", '', "logname = '{}'".format(logname), PgLOG.LGEREX): + PgLOG.pglog("{}: Must be in DECS Group to run '{}' on {}".format(logname, cmdname, PgLOG.PGLOG['HOSTNAME']), PgLOG.LGEREX) + +# +# add an allusage record into yearly table; create a new yearly table if it does not exist +# year -- year to identify the yearly table, evaluated if missing +# records -- hash to hold one or multiple records. +# Dict keys: email -- user email address, +# org_type -- organization type +# country -- country code +# dsid -- dataset ID +# date -- date data accessed +# time -- time data accessed +# quarter -- quarter of the year data accessed +# size -- bytes of data accessed +# method -- delivery methods: MSS,Web,Ftp,Tape,Cd,Disk,Paper,cArt,Micro +# source -- usage source flag: W - wusage, O - ordusage +# midx -- refer to mbr2loc.midx if not 0 +# ip -- user IP address +# region -- user region name; for example, Colorado +# +# isarray -- if true, mutiple records provided via arrays for each hash key +# docheck -- if 1, check and add only if record is not on file +# docheck -- if 2, check and add if record is not on file, and update if exists +# docheck -- if 4, check and add if record is not on file, and update if exists, +# and also checking NULL email value too +# +def add_yearly_allusage(year, records, isarray = 0, docheck = 0): + + acnt = 0 + if not year: + ms = re.match(r'^(\d\d\d\d)', str(records['date'][0] if isarray else records['date'])) + if ms: year = ms.group(1) + tname = "allusage_{}".format(year) + if isarray: + cnt = len(records['email']) + if 'quarter' not in records: records['quarter'] = [0]*cnt + for i in range(cnt): + if not records['quarter'][i]: + ms = re.search(r'-(\d+)-', str(records['date'][i])) + if ms: records['quarter'][i] = int((int(ms.group(1))-1)/3)+1 + if docheck: + for i in range(cnt): + record = {} + for key in records: + record[key] = records[key][i] + cnd = "email = '{}' AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['email'], record['dsid'], record['method'], record['date'], record['time']) + pgrec = pgget(tname, 'aidx', cnd, PgLOG.LOGERR|PgLOG.ADDTBL) + if docheck == 4 and not pgrec: + cnd = "email IS NULL AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['dsid'], record['method'], record['date'], record['time']) + pgrec = pgget(tname, 'aidx', cnd, PgLOG.LOGERR|PgLOG.ADDTBL) + if pgrec: + if docheck > 1: acnt += pgupdt(tname, record, "aidx = {}".format(pgrec['aidx']), PgLOG.LGEREX) + else: + acnt += pgadd(tname, record, PgLOG.LGEREX|PgLOG.ADDTBL) + else: + acnt = pgmadd(tname, records, PgLOG.LGEREX|PgLOG.ADDTBL) + else: + record = records + if not ('quarter' in record and record['quarter']): + ms = re.search(r'-(\d+)-', str(record['date'])) + if ms: record['quarter'] = int((int(ms.group(1))-1)/3)+1 + if docheck: + cnd = "email = '{}' AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['email'], record['dsid'], record['method'], record['date'], record['time']) + pgrec = pgget(tname, 'aidx', cnd, PgLOG.LOGERR|PgLOG.ADDTBL) + if docheck == 4 and not pgrec: + cnd = "email IS NULL AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['dsid'], record['method'], record['date'], record['time']) + pgrec = pgget(tname, 'aidx', cnd, PgLOG.LOGERR|PgLOG.ADDTBL) + if pgrec: + if docheck > 1: acnt = pgupdt(tname, record, "aidx = {}".format(pgrec['aidx']), PgLOG.LGEREX) + return acnt + acnt = pgadd(tname, record, PgLOG.LGEREX|PgLOG.ADDTBL) + + return acnt + +# +# add a wusage record into yearly table; create a new yearly table if it does not exist +# year -- year to identify the yearly table, evaluated if missing +# records -- hash to hold one or multiple records. +# Dict keys: wid - reference to wfile.wid +# wuid_read - reference to wuser.wuid, 0 if missing email +# dsid - reference to dataset.dsid at the time of read +# date_read - date file read +# time_read - time file read +# quarter - quarter of the year data accessed +# size_read - bytes of data read +# method - download methods: WEB, CURL, MGET, FTP and MGET +# locflag - location flag: Glade or Object +# ip - IP address +# +# isarray -- if true, mutiple records provided via arrays for each hash key +# +def add_yearly_wusage(year, records, isarray = 0): + + acnt = 0 + if not year: + ms = re.match(r'^(\d\d\d\d)', str(records['date_read'][0] if isarray else records['date_read'])) + if ms: year = ms.group(1) + tname = "wusage_{}".format(year) + if isarray: + if 'quarter' not in records: + cnt = len(records['wid']) + records['quarter'] = [0]*cnt + for i in range(cnt): + ms = re.search(r'-(\d+)-', str(records['date_read'][i])) + if ms: records['quarter'][i] = (int((int(ms.group(1))-1)/3)+1) + acnt = pgmadd(tname, records, PgLOG.LGEREX|PgLOG.ADDTBL) + else: + record = records + if 'quarter' not in record: + ms = re.search(r'-(\d+)-', str(record['date_read'])) + if ms: record['quarter'] = (int((int(ms.group(1))-1)/3)+1) + acnt = pgadd(tname, record, PgLOG.LGEREX|PgLOG.ADDTBL) + + return acnt + +# +# double quote a array of single or sign delimited strings +# +def pgnames(ary, sign = None, joinstr = None): + + pgary = [] + for a in ary: + pgary.append(pgname(a, sign)) + + if joinstr == None: + return pgary + else: + return joinstr.join(pgary) + +# +# double quote a single or sign delimited string +# +def pgname(str, sign = None): + + if sign: + nstr = '' + names = str.split(sign[0]) + for name in names: + if nstr: nstr += sign[0] + nstr += pgname(name, sign[1:]) + else: + nstr = str.strip() + if nstr and nstr.find('"') < 0: + if not re.match(r'^[a-z_][a-z0-9_]*$', nstr) or nstr in PGRES: + nstr = '"{}"'.format(nstr) + + return nstr diff --git a/README.md b/README.md new file mode 100644 index 0000000..d728563 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# rda-python-common +Python common library codes to be shared by other RDA python utility programs. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c9a5df1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,22 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "rda_python_common" +version = "1.0.0" +authors = [ + { name="Zaihua Ji", email="zji@ucar.edu" }, +] +description = "RDA Python common library codes shared by other RDA python packages" +readme = "README.md" +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Development Status :: 5 - Production/Stable", +] + +[project.urls] +"Homepage" = "https://github.com/NCAR/rda-python-common" diff --git a/scripts/fill_ispddb.py b/scripts/fill_ispddb.py new file mode 100644 index 0000000..6b58077 --- /dev/null +++ b/scripts/fill_ispddb.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 + +""" +Script to read observation records from the original ISPD HDF5 files and +insert the records into the ISPD database (ISPDDB) at the NCAR RDA. +""" + +import logging +import logging.handlers +import os, sys + +from rda_ispd_python.ispddb import FillISPD + +#========================================================================================= +def main(args): + + add_inventory = args.addinventory + lead_uid = args.leaduid + check_existing = args.checkexisting + + fill_ispd = FillISPD(add_inventory=add_inventory, lead_uid=lead_uid, check_existing=check_existing) + fill_ispd.initialize_db() + fill_ispd.get_input_files(args.files) + fill_ispd.initialize_indices() + fill_ispd.fill_ispd_data() + fill_ispd.close_db() + +#========================================================================================= +def configure_log(**kwargs): + """ Congigure logging """ + logpath = '/glade/scratch/tcram/logs/ispd/' + file = os.path.basename(__file__) + logfile = '{}/{}.log'.format(logpath, os.path.splitext(file)[0]) + + if 'loglevel' in kwargs: + loglevel = kwargs['loglevel'] + else: + loglevel = 'info' + + level = getattr(logging, loglevel.upper()) + format = '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s' + logging.basicConfig(filename=logfile, level=level, format=format) + + return + +#========================================================================================= +def parse_opts(): + """ Parse command line arguments """ + import argparse + import textwrap + + desc = "Read ISPD records from pre-processed ASCII data files and store information in ISPDDB." + epilog = textwrap.dedent('''\ + Example: + - Read the ISPD records from ispd_v4_1950-01.txt and store the information in ISPDDB: + fill_ispddb.py -i -e ispd_v4_1950-01.txt + ''') + + parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=desc, epilog=textwrap.dedent(epilog)) + parser.add_argument('files', nargs="+", help="Input ISPD file names (ASCII format). A minimum of one file name is required.") + parser.add_argument('-i', '--addinventory', action="store_true", default="False", help='Add daily counting records into inventory table.') + parser.add_argument('-u', '--leaduid', action="store_true", default="False", help='Standalone attachment records with leading 6-character UID.') + parser.add_argument('-e', '--checkexisting', action="store_true", default="False", help='Check for existing record before adding record to DB.') + parser.add_argument('-l', '--loglevel', default="info", choices=['debug', 'info', 'warning', 'error', 'critical'], help='Set the logging level. Default = info.') + + if len(sys.argv)==1: + parser.print_help() + sys.exit(1) + + args = parser.parse_args(sys.argv[1:]) + + return args + +#========================================================================================= + +if __name__ == "__main__": + args = parse_opts() + configure_log(loglevel=args.loglevel) + logger = logging.getLogger(__name__) + main(args) \ No newline at end of file diff --git a/src/rda_python_common/PgCMD.py b/src/rda_python_common/PgCMD.py new file mode 100644 index 0000000..225eb0f --- /dev/null +++ b/src/rda_python_common/PgCMD.py @@ -0,0 +1,599 @@ +# +############################################################################### +# +# Title : PgCMD.py +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/25/2020 +# Purpose : python library module for functions to record commands for delayed +# mode or command recovery +# +# Work File : $DSSHOME/lib/python/PgCMD.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### +# +import os +import re +import sys +import time +import PgLOG +import PgSIG +import PgUtil +import PgLock +import PgDBI + +# cached dscheck info +DSCHK = {} +BOPTIONS = {"hostname" : None, "qoptions" : None, "modules" : None, "environments" : None} +BFIELDS = ', '.join(BOPTIONS) + +TRYLMTS = { + 'dsquasar' : 3, + 'dsarch' : 2, + 'default' : 1 +} + +DLYPTN = r'(^|\s)-(d|BP|BatchProcess|DelayedMode)(\s|$)' +DLYOPT = { + 'dsarch' : ' -d', + 'dsupdt' : ' -d', + 'dsrqst' : ' -d' +} +# +# params: dict array holding option values +# opt: 2 - each value of the dict array is a list; otherwise 1 +# addhost: 1 to add host name too +# initial set Batch options passed in from command line +# +def set_batch_options(params, opt, addhost = 0): + + if 'QS' in params: BOPTIONS['qoptions'] = (params['QS'][0] if opt == 2 else params['QS']) + if 'MO' in params: BOPTIONS['modules'] = (params['MO'][0] if opt == 2 else params['MO']) + if 'EV' in params: BOPTIONS['environments'] = (params['EV'][0] if opt == 2 else params['EV']) + if addhost and 'HN' in params: BOPTIONS['hostname'] = (params['HN'][0] if opt == 2 else params['HN']) + +# +# boptions: dict array holding batch options +# refresh: 1 to clean the previous cached global batch options +# checkkey: 1 to check and valid pre-defined fields +# +# fill Batch options recorded in RDADB +# +def fill_batch_options(boptions, refresh = 0, checkkey = 0): + + if refresh: + for bkey in BOPTIONS: + BOPTIONS[bkey] = None # clean the hash before filling it up + + if not boptions: return + for bkey in boptions: + if not checkkey or bkey in BOPTIONS: + BOPTIONS[bkey] = boptions[bkey] + +# +# bkey: batch option field name +# bval: batch option value +# override: 1 to override an existing option +# +# fill a single Batch option +# +def set_one_boption(bkey, bval, override = 0): + + if bval: + if override or not ( bkey in BOPTIONS and BOPTIONS[bkey]): BOPTIONS[bkey] = bval + elif override and bkey in BOPTIONS and BOPTIONS[bkey]: + BOPTIONS[bkey] = None + +# +# fill the passed in dict record with the pre-saved batch options +# +def get_batch_options(pgrec = None): + + record = {} + for bkey in BOPTIONS: + if pgrec and bkey in pgrec and pgrec[bkey]: + record[bkey] = pgrec[bkey] + elif BOPTIONS[bkey]: + record[bkey] = BOPTIONS[bkey] + + return record + +# +# return delay mode option to append to argv string for a specified cmd +# +def append_delayed_mode(cmd, argv): + + if cmd in DLYOPT and not re.search(DLYPTN, argv, re.I): + return DLYOPT[cmd] + else: + return '' + +# +# check given doptions and cmd, and return the try limit and specified hosts +# +def get_delay_options(doptions, cmd): + + mcount = 0 + hosts = None + + if doptions: + for bval in doptions: + if re.match(r'^(\d+)$', bval): + mcount = int(bval) + if mcount > 99: mcount = 99 + else: + hosts = bval + + if mcount == 0: mcount = get_try_limit(cmd) + if hosts: set_one_boption('hostname', hosts, 1) + + return (mcount, hosts) + +# +# find an existing dscheck record from the cached command argument; create and initialize one if not exist +# +def init_dscheck(oindex, otype, cmd, dsid, action, workdir = None, specialist = None, doptions = None, logact = 0): + + cidx = 0 + argv = PgLOG.argv_to_string(sys.argv[1:], 0, "Process in Delayed Mode") + argextra = None + + if not logact: logact = PgLOG.LGEREX + if not workdir: workdir = os.getcwd() + if not specialist: specialist = PgLOG.PGLOG['CURUID'] + + (mcount, hosts) = get_delay_options(doptions, cmd) + + if len(argv) > 100: + argextra = argv[100:] + argv = argv[0:100] + + bck = PgLOG.PGLOG['BCKGRND'] + PgLOG.PGLOG['BCKGRND'] = 0 + cinfo = "{}-{}-Chk".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime()) + pgrec = get_dscheck(cmd, argv, workdir, specialist, argextra, logact) + if pgrec: # found existing dscheck record + cidx = pgrec['cindex'] + cmsg = "{}{}: {} batch process ".format(cinfo, cidx, get_command_info(pgrec)) + cidx = PgLock.lock_dscheck(cidx, 1, PgLOG.LOGWRN) + if cidx < 0: + PgLOG.pglog(cmsg + "is Running, No restart", PgLOG.LOGWRN) + sys.exit(0) + if cidx > 0: + if not hosts and pgrec['hostname']: + hosts = pgrec['hostname'] + set_one_boption('hostname', hosts, 0) + if mcount: pgrec['mcount'] = mcount + DSCHK['chkcnd'] = "cindex = {}".format(cidx) + if(pgrec['status'] == 'D' or pgrec['fcount'] and pgrec['dcount'] >= pgrec['fcount'] or + pgrec['tcount'] > pgrec['mcount'] or not pgrec['pid'] and pgrec['tcount'] == pgrec['mcount']): + PgLOG.pglog("{}is {}".format(cmsg, ('Done' if pgrec['status'] == 'D' else 'Finished')), PgLOG.LOGWRN) + PgLock.lock_dscheck(cidx, 0, logact) + sys.exit(0) + + if not cidx: # add new dscheck record + record = {} + if hosts and re.match(r'^(ds\d|\d)\d\d.\d$', hosts): + PgLOG.pglog(hosts + ": Cannot pass DSID for hostname to submit batch process", PgLOG.LGEREX) + if oindex: set_command_control(oindex, otype, cmd, logact) + record['oindex'] = oindex + record['dsid'] = dsid + record['action'] = action + record['otype'] = otype + (record['date'], record['time']) = PgUtil.get_date_time() + record['command'] = cmd + record['argv'] = argv + if mcount > 0: record['mcount'] = mcount + record['specialist'] = specialist + record['workdir'] = workdir + if argextra: record['argextra'] = argextra + record.update(get_batch_options()) + cidx = PgDBI.pgadd("dscheck", record, logact|PgLOG.AUTOID) + if cidx: + cmsg = "{}{}: {} Adds a new check".format(cinfo, cidx, get_command_info(record)) + PgLOG.pglog(cmsg, PgLOG.LOGWRN) + sys.exit(0) + + (chost, cpid) = PgLOG.current_process_info() + (rhost, rpid) = PgLOG.current_process_info(1) + + if not check_command_specialist_host(hosts, chost, specialist, cmd, action, PgLOG.LOGERR): + PgLock.lock_dscheck(cidx, 0, logact) + sys.exit(1) + + record = {} + record['status'] = "R" + if mcount > 0: record['mcount'] = mcount + record['bid'] = (cpid if PgLOG.PGLOG['CURBID'] else 0) + if pgrec['stttime'] and pgrec['chktime'] > pgrec['stttime']: + (record['ttltime'], record['quetime']) = get_dscheck_runtime(pgrec) + record['chktime'] = record['stttime'] = int(time.time()) + if not pgrec['subtime']: record['subtime'] = record['stttime'] + if dsid and not pgrec['dsid']: record['dsid'] = dsid + if action and not pgrec['action']: record['action'] = action + if oindex and not pgrec['oindex']: record['oindex'] = oindex + if otype and not pgrec['otype']: record['otype'] = otype + if argv and not pgrec['argv']: record['argv'] = argv + record['runhost'] = rhost + if pgrec['command'] == "dsrqst" and pgrec['oindex']: + (record['fcount'], record['dcount'], record['size']) = get_dsrqst_counts(pgrec, logact) + PgDBI.pgupdt("dscheck", record, DSCHK['chkcnd'], logact) + + DSCHK['dcount'] = pgrec['dcount'] + DSCHK['fcount'] = pgrec['fcount'] + DSCHK['size'] = pgrec['size'] + DSCHK['cindex'] = cidx + DSCHK['dflags'] = pgrec['dflags'] + PgLOG.PGLOG['DSCHECK'] = DSCHK # add global access link + if not PgLOG.PGLOG['BCKGRND']: PgLOG.PGLOG['BCKGRND'] = 1 # turn off screen output if not yet + tcnt = pgrec['tcount'] + if not pgrec['pid']: tcnt += 1 + tstr = "the {} run".format(PgLOG.int2order(tcnt)) if tcnt > 1 else "running" + pstr = "{}<{}>".format(chost, cpid) + if rhost != chost: pstr += "/{}<{}>".format(rhost, rpid) + PgLOG.pglog("{}Starts {} ({})".format(cmsg, tstr, pstr), PgLOG.LOGWRN) + PgLOG.PGLOG['BCKGRND'] = bck + + return cidx + +# +# check and validate if the current host is configured for the specialist +# +def check_command_specialist_host(hosts, chost, specialist, cmd, act = 0, logact = 0): + + if cmd == 'dsrqst' and act == 'PR': + mflag = 'G' + else: + cnd = "command = '{}' AND specialist = '{}' AND hostname = '{}'".format(cmd, specialist, chost) + pgrec = PgDBI.pgget("dsdaemon", 'matchhost', cnd, logact) + mflag = (pgrec['matchhost'] if pgrec else 'G') + + return PgLOG.check_process_host(hosts, chost, mflag, "{}-{}".format(specialist, cmd), logact) + +# +# set command control info +# +def set_command_control(oindex, otype, cmd, logact = 0): + + if not oindex: return + + pgctl = None + if cmd == "dsrqst": + if otype == 'P': + pgrec = PgDBI.pgget("ptrqst", "rindex", "pindex = {}".format(oindex), logact) + if pgrec: pgctl = get_partition_control(pgrec, None, None, logact) + else: + pgrec = PgDBI.pgget("dsrqst", "dsid, gindex, cindex, rqsttype", "rindex = {}".format(oindex), logact) + if pgrec: pgctl = get_dsrqst_control(pgrec, logact) + elif cmd == "dsupdt": + if otype == 'L': + pgrec = PgDBI.pgget("dlupdt", "cindex", "lindex = {}".format(oindex), logact) + if not (pgrec and pgrec['cindex']): return + oindex = pgrec['cindex'] + pgctl = PgDBI.pgget("dcupdt", BFIELDS, "cindex = {}".format(oindex), logact) + if pgctl: + for bkey in pgctl: + set_one_boption(bkey, pgctl[bkey], 0) + +# +# get dsrqst control info +# +def get_dsrqst_control(pgrqst, logact = 0): + + cflds = BFIELDS + if 'ptcount' in pgrqst and pgrqst['ptcount'] == 0: cflds += ", ptlimit, ptsize" + if pgrqst['cindex']: + pgctl = PgDBI.pgget("rcrqst", cflds, "cindex = {}".format(pgrqst['cindex']), logact) + else: + pgctl = None + if not pgctl: + gcnd = "dsid = '{}' AND gindex = ".format(pgrqst['dsid']) + tcnd = " AND rqsttype = '{}'".format(pgrqst['rqsttype']) + gindex = pgrqst['gindex'] + while True: + pgctl = PgDBI.pgget("rcrqst", cflds, "{}{}{}".format(gcnd, gindex, tcnd), logact) + if pgctl or not gindex: break + pgctl = PgDBI.pgget("dsgroup", "pindex", "{}{}".format(gcnd, gindex), logact) + if not pgctl: break + gindex = pgctl['pindex'] + + return pgctl + +# +# get dsrqst partition control info +# +def get_partition_control(pgpart, pgrqst = None, pgctl = None, logact = 0): + + if not pgctl: + if not pgrqst and pgpart['rindex']: + pgrqst = PgDBI.pgget("dsrqst", "dsid, gindex, cindex, rqsttype", "rindex = {}".format(pgpart['rindex']), logact) + if pgrqst: pgctl = get_dsrqst_control(dsrqst, logact) + + return pgctl + +# +# build the dynamic options +# +def get_dynamic_options(cmd, oindex, otype): + + if oindex: cmd += " {}".format(oindex) + if otype: cmd += ' ' + otype + ret = options = '' + for loop in range(3): + ret = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 279) # 1+2+4+16+256 + if loop < 2 and PgLOG.PGLOG['SYSERR'] and 'Connection timed out' in PgLOG.PGLOG['SYSERR']: + time.sleep(PgSIG.PGSIG['ETIME']) + else: + break + if ret: + ret = ret.strip() + ms = re.match(r'^(-.+)/(-.+)$', ret) + if ms: + options = ms.group(1) if otype == 'R' else ms.group(2) + elif re.match(r'^(-.+)$', ret): + options = ret + if not options: + if ret: PgLOG.PGLOG['SYSERR'] += ret + PgLOG.PGLOG['SYSERR'] += " for {}".format(cmd) + + return options + +# +# retrieve a dscheck record for provided cmd, argv and other conditions +# +def get_dscheck(cmd, argv, workdir, specialist, argextra = None, logact = 0): + + cnd = "command = '{}' AND specialist = '{}' AND argv = '{}'".format(cmd, specialist, argv) + pgrecs = PgDBI.pgmget("dscheck", "*", cnd, logact) + cnt = len(pgrecs['cindex']) if pgrecs else 0 + if cnt == 0 and cmd in DLYOPT: + ms = re.match(r'^(.+){}$'.format(DLYOPT[cmd]), argv) + if ms: + argv = ms.group(1) + cnt = 1 + elif not argextra: + dopt = append_delayed_mode(cmd, argv) + if dopt: + argv += dopt + cnt = 1 + if cnt: + cnd = "command = '{}' AND specialist = '{}' AND argv = '{}'".format(cmd, specialist, argv) + pgrecs = PgDBI.pgmget("dscheck", "*", cnd, logact) + cnt = len(pgrecs['cindex']) if pgrecs else 0 + + for i in range(cnt): + pgrec = PgUtil.onerecord(pgrecs, i) + if pgrec['workdir'] and PgUtil.pgcmp(workdir, pgrec['workdir']): continue + if PgUtil.pgcmp(argextra, pgrec['argextra']): continue + return pgrec + + return None + +# +# delete one dsceck record fo given cindex +# +def delete_dscheck(pgrec, chkcnd, logact = 0): + + if not chkcnd: + if pgrec: + chkcnd = "cindex = {}".format(pgrec['cindex']) + elif 'chkcnd' in DSCHK: + chkcnd = DSCHK['chkcnd'] + else: + return 0 # nothing to delete + if not pgrec: + pgrec = PgDBI.pgget("dscheck", "*", chkcnd, logact) + if not pgrec: return 0 # dscheck record is gone + + record = {} + record['cindex'] = pgrec['cindex'] + record['command'] = pgrec['command'] + record['dsid'] = (pgrec['dsid'] if pgrec['dsid'] else PgLOG.PGLOG['DEFDSID']) + record['action'] = (pgrec['action'] if pgrec['action'] else "UN") + record['specialist'] = pgrec['specialist'] + record['hostname'] = pgrec['runhost'] + if pgrec['bid']: record['bid'] = pgrec['bid'] + if pgrec['command'] == "dsrqst" and pgrec['oindex']: + (record['fcount'], record['dcount'], record['size']) = get_dsrqst_counts(pgrec, logact) + else: + record['fcount'] = pgrec['fcount'] + record['dcount'] = pgrec['dcount'] + record['size'] = pgrec['size'] + record['tcount'] = pgrec['tcount'] + record['date'] = pgrec['date'] + record['time'] = pgrec['time'] + record['closetime'] = PgUtil.curtime(1) + (record['ttltime'], record['quetime']) = get_dscheck_runtime(pgrec) + record['argv'] = pgrec['argv'] + if pgrec['argextra']: + record['argv'] += pgrec['argextra'] + if len(record['argv']) > 255: record['argv'] = record['argv'][0:255] + if pgrec['errmsg']: record['errmsg'] = pgrec['errmsg'] + record['status'] = ('F' if pgrec['status'] == "R" else pgrec['status']) + + if PgDBI.pgget("dschkhist", "", chkcnd): + stat = PgDBI.pgupdt("dschkhist", record, chkcnd, logact) + else: + stat = PgDBI.pgadd("dschkhist", record, logact) + if stat: + cmsg = "{} cleaned as '{}' at {} on {}".format(get_command_info(pgrec), record['status'], PgLOG.current_datetime(), PgLOG.PGLOG['HOSTNAME']) + PgLOG.pglog("Chk{}: {}".format(pgrec['cindex'], cmsg), PgLOG.LOGWRN|PgLOG.FRCLOG) + stat = PgDBI.pgdel("dscheck", chkcnd, logact) + if record['status'] == "E" and 'errmsg' in record: + PgLOG.pglog("Chk{}: {} Exits with Error\n{}".format(pgrec['cindex'], get_command_info(pgrec), record['errmsg']), logact) + + return stat + +# +# get dsrqst fcount and dcount +# +def get_dsrqst_counts(pgchk, logact = 0): + + fcount = pgchk['fcount'] + dcount = pgchk['dcount'] + size = pgchk['size'] + + if pgchk['otype'] == 'P': + table = 'ptrqst' + cnd = "pindex = {}".format(pgchk['oindex']) + fields = "fcount" + else: + table = 'dsrqst' + cnd = "rindex = {}".format(pgchk['oindex']) + fields = "fcount, pcount, size_input, size_request" + pgrec = PgDBI.pgget(table, fields, cnd, logact) + if pgrec: + fcnt = pgrec['fcount'] + else: + fcnt = 0 + pgrec = {'fcount' : 0} + if not fcnt: fcnt = PgDBI.pgget("wfrqst", "", cnd, logact) + if fcnt and fcount != fcnt: fcount = fcnt + if fcount: + if 'pcount' in pgrec and pgrec['pcount']: + dcnt = pgrec['pcount'] + else: + dcnt = PgDBI.pgget("wfrqst", "", cnd + " AND status = 'O'", logact) + if dcnt and dcnt != dcount: dcount = dcnt + if not size: + if 'size_input' in pgrec and pgrec['size_input']: + if size != pgrec['size_input']: size = pgrec['size_input'] + elif 'size_request' in pgrec and pgrec['size_request']: + if size != pgrec['size_request']: size = pgrec['size_request'] + elif fcnt: # evaluate total size only if file count is set in request/partition record + pgrec = PgDBI.pgget("wfrqst", "sum(size) data_size", cnd, logact) + if pgrec and pgrec['data_size']: size = pgrec['data_size'] + + return (fcount, dcount, size) + +# +# set dscheck fcount +# +def set_dscheck_fcount(count, logact = 0): + + record = {'fcount' : count, 'chktime' : int(time.time())} + PgDBI.pgupdt("dscheck", record, DSCHK['chkcnd'], logact) + DSCHK['fcount'] = count + + return DSCHK['dcount'] # return Done count + +# +# set dscheck dcount +# +def set_dscheck_dcount(count, size, logact = 0): + + record = {'dcount' : count, 'size' : size, 'chktime' : int(time.time())} + PgDBI.pgupdt("dscheck", record, DSCHK['chkcnd'], logact) + DSCHK['dcount'] = count + DSCHK['size'] = size + + return DSCHK['dcount'] # return Done count + +# +# add dscheck dcount +# +def add_dscheck_dcount(count, size, logact = 0): + + record = {} + if count: + DSCHK['dcount'] += count + record['dcount'] = DSCHK['dcount'] + if size: + DSCHK['size'] += size + record['size'] = DSCHK['size'] + record['chktime'] = int(time.time()) + PgDBI.pgupdt("dscheck", record, DSCHK['chkcnd'], logact) + + return DSCHK['dcount'] # return Done count + +# +# set dscheck source information +# +def set_dscheck_attribute(fname, value, logact = 0): + + record = {} + if value: record[fname] = value + record['chktime'] = int(time.time()) + PgDBI.pgupdt("dscheck", record, DSCHK['chkcnd'], logact) + +# +# update dscheck status +# +def record_dscheck_status(stat, logact = 0): + + pgrec = PgDBI.pgget("dscheck", "lockhost, pid", DSCHK['chkcnd'], logact) + if not pgrec: return 0 + if not (pgrec['pid'] and pgrec['lockhost']): return 0 + (chost, cpid) = PgLOG.current_process_info() + if pgrec['pid'] != cpid or pgrec['lockhost'] != chost: return 0 + + # update dscheck status only if it is still locked by the current process + record = {'status' : stat, 'chktime' : int(time.time()), 'pid' : 0} + return PgDBI.pgupdt("dscheck", record, DSCHK['chkcnd'], logact) + +# +# get the number of tries to execute for a given cmd under dscheck control +# +def get_try_limit(cmd): + + return (TRYLMTS[cmd] if cmd in TRYLMTS else TRYLMTS['default']) + +# +# get the execution time for a dscheck command +# +def get_dscheck_runtime(pgrec, current = 0): + + ttime = (0 if current else pgrec['ttltime']) + qtime = (0 if current else pgrec['quetime']) + +# if pgrec['bid'] and PgLOG.PGLOG['CURBID']: +# if PgLOG.PGLOG['PGBATCH'] == PgLOG.PGLOG['SLMNAME']: +# stat = PgSIG.check_slurm_status(pgrec['bid'], PgLOG.LOGERR) +# if stat: +# if stat['PEND']: qtime += stat['PEND'] +# if stat['TOTAL']: ttime += stat['TOTAL'] +# return (ttime, qtime) + + if pgrec['subtime']: + ttime += (pgrec['chktime'] - pgrec['subtime']) + if pgrec['stttime']: qtime += (pgrec['stttime'] - pgrec['subtime']) + + return (ttime, qtime) + +# +# retrieve a command string from a given dscheck record +# +def get_command_info(pgrec): + + if pgrec['oindex']: + if pgrec['command'] == "dsupdt": + cinfo = "UC{}".format(pgrec['oindex']) + elif pgrec['command'] == "dsrqst": + if pgrec['otype'] == "P": + cinfo = "RPT{}".format(pgrec['oindex']) + else: + cinfo = "Rqst{}".format(pgrec['oindex']) + else: + cinfo ="{}-{}".format(pgrec['command'], pgrec['oindex']) + else: + cinfo =pgrec['command'] + if pgrec['dsid']: cinfo += " " + pgrec['dsid'] + if pgrec['action']: cinfo += " " + pgrec['action'] + cinfo += " of " + pgrec['specialist'] + + return cinfo + +# +# change the dscheck original command information +# +def change_dscheck_oinfo(oidx, otype, nidx, ntype): + + cnd = "oindex = {} AND otype = '{}'".format(oidx, otype) + pgchk = PgDBI.pgget('dscheck', 'cindex, oindex, otype', cnd, PgLOG.LGEREX) + if not pgchk: return 0 # miss dscheck record to change + + record = {} + DSCHK['oindex'] = record['oindex'] = nidx + DSCHK['otype'] = record['otype'] = ntype + cnd = "cindex = {}".format(pgchk['cindex']) + return PgDBI.pgupdt('dscheck', record, cnd, PgLOG.LGEREX) diff --git a/src/rda_python_common/PgDBI.py b/src/rda_python_common/PgDBI.py new file mode 100644 index 0000000..dee4e23 --- /dev/null +++ b/src/rda_python_common/PgDBI.py @@ -0,0 +1,2225 @@ +# +############################################################################### +# +# Title : PgDBI.py -- PostgreSQL DataBase Interface +# Author : Zaihua Ji, zji@ucar.edu +# Date : 06/07/2022 +# Purpose : Python library module to handle query and manipulate PostgreSQL database +# +# Work File : $DSSHOME/lib/python/PgDBI.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### + +import os +import re +import time +from datetime import datetime +import psycopg2 as PgSQL +from psycopg2.extras import execute_values +from psycopg2.extras import execute_batch +from os import path as op +import PgLOG + +pgdb = None # reference to a connected database object +curtran = 0 # 0 - no transaction, 1 - in transaction +NMISSES = [] # array of mising userno +LMISSES = [] # array of mising logname +TABLES = {} # record table field information +SEQUENCES = {} # record table sequence fielnames +SPECIALIST = {} # hash array refrences to specialist info of dsids +SYSDOWN = {} +PGDBI = {} +ADDTBLS = [] +PGSIGNS = ['!', '<', '>', '<>'] +CHCODE = 1042 + +# hard coded db ports for dbnames +DBPORTS = { + 'default' : 0 # skip default port number 5432 +} + +# hard coded db names for given schema names +DBNAMES = { + 'ivaddb' : 'ivaddb', + 'cntldb' : 'ivaddb', + 'cdmsdb' : 'ivaddb', + 'ispddb' : 'ispddb', + 'obsua' : 'upadb', + 'default' : 'rdadb', +} + +# hard coded socket paths for machine_dbnames +DBSOCKS = { + 'default' : '', +} + +# home path for check db on alter host +VIEWHOMES = { + 'default' : PgLOG.PGLOG['DSSDBHM'] +} + +# add more to the list if used for names +PGRES = ['end', 'window'] + +# +# PostgreSQL specified query timestamp format +# +fmtyr = lambda fn: "extract(year from {})::int".format(fn) +fmtqt = lambda fn: "extract(quarter from {})::int".format(fn) +fmtmn = lambda fn: "extract(month from {})::int".format(fn) +fmtdt = lambda fn: "date({})".format(fn) +fmtym = lambda fn: "to_char({}, 'yyyy-mm')".format(fn) +fmthr = lambda fn: "extract(hour from {})::int".format(fn) + +# +# set environments and defaults +# +def SETPGDBI(name, value): + PGDBI[name] = PgLOG.get_environment(name, value) + +SETPGDBI('CDHOST', 'rda-db.ucar.edu') # common domain for db host for master server +SETPGDBI('DEFDB', 'rdadb') +SETPGDBI("DEFSC", 'dssdb') +SETPGDBI('DEFHOST', PgLOG.PGLOG['PSQLHOST']) +SETPGDBI("DEFPORT", 0) +SETPGDBI("DEFSOCK", '') +SETPGDBI("DBNAME", PGDBI['DEFDB']) +SETPGDBI("SCNAME", PGDBI['DEFSC']) +SETPGDBI("LNNAME", PGDBI['DEFSC']) +SETPGDBI("PWNAME", PGDBI['DEFSC']) +SETPGDBI("DBHOST", (os.environ['DSSDBHOST'] if os.environ.get('DSSDBHOST') else PGDBI['DEFHOST'])) +SETPGDBI("DBPORT", 0) +SETPGDBI("ERRLOG", PgLOG.LOGERR) # default error logact +SETPGDBI("EXITLG", PgLOG.LGEREX) # default exit logact +SETPGDBI("DBSOCK", '') +SETPGDBI("DATADIR", PgLOG.PGLOG['DSDHOME']) +SETPGDBI("BCKPATH", PgLOG.PGLOG['DSSDBHM'] + "/backup") +SETPGDBI("SQLPATH", PgLOG.PGLOG['DSSDBHM'] + "/sql") +SETPGDBI("VWNAME", PGDBI['DEFSC']) +SETPGDBI("VWPORT", 0) +SETPGDBI("VWSOCK", '') + +PGDBI['DBSHOST'] = PgLOG.get_short_host(PGDBI['DBHOST']) +PGDBI['DEFSHOST'] = PgLOG.get_short_host(PGDBI['DEFHOST']) +PGDBI['VWHOST'] = PgLOG.PGLOG['PVIEWHOST'] +PGDBI['VWSHOST'] = PgLOG.get_short_host(PGDBI['VWHOST']) +PGDBI['VWHOME'] = (VIEWHOMES[PgLOG.PGLOG['HOSTNAME']] if PgLOG.PGLOG['HOSTNAME'] in VIEWHOMES else VIEWHOMES['default']) +PGDBI['SCPATH'] = None # additional schema path for set search_path +PGDBI['VHSET'] = 0 +PGDBI['PGSIZE'] = 1000 # number of records for page_size +PGDBI['MTRANS'] = 5000 # max number of changes in one transactions +PGDBI['MAXICNT'] = 12000000 # maximum number of records in each table + +# +# create a pgddl command string with +# table name (tname), prefix (pre) and suffix (suf) +# +def get_pgddl_command(tname, pre = None, suf = None): + + ms = re.match(r'^(.+)\.(.+)$', tname) + if ms: + scname = ms.group(1) + tname = ms.group(2) + else: + scname = PGDBI['SCNAME'] + xy = '' + if suf: xy += ' -x ' + suf + if pre: xy += ' -y ' + pre + return "pgddl {} -aa -h {} -d {} -c {} -u {}{}".format(tname, PGDBI['DBHOST'], PGDBI['DBNAME'], scname, PGDBI['LNNAME'], xy) + +# +# set default connection for dssdb PostgreSQL Server +# +def dssdb_dbname(): + default_scinfo(PGDBI['DEFDB'], PGDBI['DEFSC'], PgLOG.PGLOG['PSQLHOST']) + +dssdb_scname = dssdb_dbname + +# +# set default connection for obsua PostgreSQL Server +# +def obsua_dbname(): + default_scinfo('upadb', 'obsua', "rda-pgdb-03.ucar.edu") + +obsua_scname = obsua_dbname + +# +# set default connection for ivaddb PostgreSQL Server +# +def ivaddb_dbname(): + default_scinfo('ivaddb', 'ivaddb', "rda-pgdb-03.ucar.edu") + +ivaddb_scname = ivaddb_dbname + +# +# set default connection for ispddb PostgreSQL Server +# +def ispddb_dbname(): + default_scinfo('ispddb', 'ispddb', "rda-pgdb-03.ucar.edu") + +ispddb_scname = ispddb_dbname + +# +# set a default schema info with hard coded info +# +def default_dbinfo(scname = None, dbhost = None, lnname = None, pwname = None, dbport = None, socket = None): + + return default_scinfo(get_dbname(scname), scname, dbhost, lnname, pwname, dbport, socket) + +# +# set default database/schema info with hard coded info +# +def default_scinfo(dbname = None, scname = None, dbhost = None, lnname = None, pwname = None, dbport = None, socket = None): + + if not dbname: dbname = PGDBI['DEFDB'] + if not scname: scname = PGDBI['DEFSC'] + if not dbhost: dbhost = PGDBI['DEFHOST'] + if dbport is None: dbport = PGDBI['DEFPORT'] + if socket is None: socket = PGDBI['DEFSOCK'] + + set_scname(dbname, scname, lnname, pwname, dbhost, dbport, socket) + +# +# get the datbase sock file name of a given dbname for local connection +# +def get_dbsock(dbname): + + return (DBSOCKS[dbname] if dbname in DBSOCKS else DBSOCKS['default']) + +# +# get the datbase port number of a given dbname for remote connection +# +def get_dbport(dbname): + + return (DBPORTS[dbname] if dbname in DBPORTS else DBPORTS['default']) + +# +# get the datbase name of a given schema name for remote connection +# +def get_dbname(scname): + + if scname: + if scname in DBNAMES: return DBNAMES[scname] + return DBNAMES['default'] + return None + +# +# set connection for viewing database information +# +def view_dbinfo(scname = None, lnname = None, pwname = None): + + return view_scinfo(get_dbname(scname), scname, lnname, pwname) + +# +# set connection for viewing database/schema information +# +def view_scinfo(dbname = None, scname = None, lnname = None, pwname = None): + + if not dbname: dbname = PGDBI['DEFDB'] + if not scname: scname = PGDBI['DEFSC'] + + set_scname(dbname, scname, lnname, pwname, PgLOG.PGLOG['PVIEWHOST'], PGDBI['VWPORT']) + +# +# set connection for given scname +# +def set_dbname(scname = None, lnname = None, pwname = None, dbhost = None, dbport = None, socket = None): + + if not scname: scname = PGDBI['DEFSC'] + return set_scname(get_dbname(scname), scname, lnname, pwname, dbhost, dbport, socket) + +# +# set connection for given database & schema names +# +def set_scname(dbname = None, scname = None, lnname = None, pwname = None, dbhost = None, dbport = None, socket = None): + + global pgdb + changed = 0 + + if dbname and dbname != PGDBI['DBNAME']: + PGDBI['DBNAME'] = dbname + changed = 1 + if scname and scname != PGDBI['SCNAME']: + PGDBI['PWNAME'] = PGDBI['LNNAME'] = PGDBI['SCNAME'] = scname + changed = 1 + if lnname and lnname != PGDBI['LNNAME']: + PGDBI['PWNAME'] = PGDBI['LNNAME'] = lnname + changed = 1 + if pwname and pwname != PGDBI['PWNAME']: + PGDBI['PWNAME'] = pwname + changed = 1 + if dbhost and dbhost != PGDBI['DBHOST']: + PGDBI['DBHOST'] = dbhost + PGDBI['DBSHOST'] = PgLOG.get_short_host(dbhost) + changed = 1 + if PGDBI['DBSHOST'] == PgLOG.PGLOG['HOSTNAME']: + if socket is None: socket = get_dbsock(dbname) + if socket != PGDBI['DBSOCK']: + PGDBI['DBSOCK'] = socket + changed = 1 + else: + if not dbport: dbport = get_dbport(dbname) + if dbport != PGDBI['DBPORT']: + PGDBI['DBPORT'] = dbport + changed = 1 + + if changed and pgdb is not None: pgdisconnect(1) + +# +# start a database transaction and exit if fails +# +def starttran(): + + global curtran + global pgdb + + if curtran == 1: endtran() # try to end previous transaction + if not pgdb: + pgconnect(0, 0, False) + else: + try: + pgdb.isolation_level + except OperationalError as e: + pgconnect(0, 0, False) + if pgdb.closed: + pgconnect(0, 0, False) + elif pgdb.autocommit: + pgdb.autocommit = False + curtran = 1 + +# +# end a transaction with changes committed and exit if fails +# +def endtran(autocommit = True): + + global curtran + global pgdb + if curtran and pgdb: + if not pgdb.closed: pgdb.commit() + pgdb.autocommit = autocommit + curtran = 0 if autocommit else 1 + +# +# end a transaction without changes committed and exit inside if fails +# +def aborttran(autocommit = True): + + global curtran + global pgdb + if curtran and pgdb: + if not pgdb.closed: pgdb.rollback() + pgdb.autocommit = autocommit + curtran = 0 if autocommit else 1 + +# +# record error message to dscheck record and clean the lock +# +def record_dscheck_error(errmsg): + + cnd = PgLOG.PGLOG['DSCHECK']['chkcnd'] + if PgLOG.PGLOG['NOQUIT']: PgLOG.PGLOG['NOQUIT'] = 0 + dflags = PgLOG.PGLOG['DSCHECK']['dflags'] + + pgrec = pgget("dscheck", "mcount, tcount, lockhost, pid", cnd, PgLOG.LGEREX) + if not pgrec: return 0 + if not pgrec['pid'] and not pgrec['lockhost']: return 0 + (chost, cpid) = PgLOG.current_process_info() + if pgrec['pid'] != cpid or pgrec['lockhost'] != chost: return 0 + + # update dscheck record only if it is still locked by the current process + record = {} + record['chktime'] = int(time.time()) + record['status'] = "E" + record['pid'] = 0 # release lock + if dflags: + record['dflags'] = dflags + record['mcount'] = pgrec['mcount'] + 1 + else: + record['dflags'] = '' + + if errmsg: + errmsg = PgLOG.break_long_string(errmsg, 512, None, 50, None, 50, 25) + if pgrec['tcount'] > 1: errmsg = "Try {}: {}".format(pgrec['tcount'], errmsg) + record['errmsg'] = errmsg + + return pgupdt("dscheck", record, cnd, PGDBI['ERRLOG']) + +# +# local function to log query error +# +def qelog(dberror, sleep, sqlstr, vals, pgcnt, logact = PGDBI['ERRLOG']): + + retry = " Sleep {}(sec) & ".format(sleep) if sleep else " " + if sqlstr: + if sqlstr.find("Retry ") == 0: + retry += "the {} ".format(PgLOG.int2order(pgcnt+1)) + elif sleep: + retry += "the {} Retry: \n".format(PgLOG.int2order(pgcnt+1)) + elif pgcnt: + retry = " Error the {} Retry: \n".format(PgLOG.int2order(pgcnt)) + else: + retry = "\n" + sqlstr = retry + sqlstr + else: + sqlstr = '' + + if vals: sqlstr += " with values: " + str(vals) + + if dberror: sqlstr = "{}\n{}".format(dberror, sqlstr) + if logact&PgLOG.EXITLG and PgLOG.PGLOG['DSCHECK']: record_dscheck_error(sqlstr) + PgLOG.pglog(sqlstr, logact) + if sleep: time.sleep(sleep) + + return PgLOG.FAILURE # if not exit in PgLOG.pglog() + +# +# try to add a new table according the table not exist error +# +def try_add_table(dberror, logact): + + ms = re.match(r'^42P01 ERROR: relation "(.+)" does not exist', dberror) + if ms: + tname = ms.group(1) + add_new_table(tname, logact = logact) + +# +# add a table for given table name +# +def add_a_table(tname, logact): + + add_new_table(tname, logact = logact) + +# +# add a new table for given table name +# +def add_new_table(tname, pre = None, suf = None, logact = 0): + + if pre: + tbname = '{}_{}'.format(pre, tname) + elif suf: + tbname = '{}_{}'.format(tname, suf) + else: + tbname = tname + if tbname in ADDTBLS: return + + PgLOG.pgsystem(get_pgddl_command(tname, pre, suf), logact) + ADDTBLS.append(tbname) + +# +# validate a table for given table name (tname), prefix (pre) and suffix (suf), +# and add it if not existing +# +def valid_table(tname, pre = None, suf = None, logact = 0): + + if pre: + tbname = '{}_{}'.format(pre, tbname) + elif suf: + tbname = '{}_{}'.format(tbname, suf) + else: + tbname = tname + if tbname in ADDTBLS: return tbname + + if not pgcheck(tbname, logact): PgLOG.pgsystem(get_pgddl_command(tname, pre, suf), logact) + ADDTBLS.append(tbname) + return tbname + +# +# local function to log query error +# +def check_dberror(pgerr, pgcnt, sqlstr, ary, logact = PGDBI['ERRLOG']): + + ret = PgLOG.FAILURE + + pgcode = pgerr.pgcode + pgerror = pgerr.pgerror + dberror = "{} {}".format(pgcode, pgerror) if pgcode and pgerror else str(pgerr) + if pgcnt < PgLOG.PGLOG['DBRETRY']: + if not pgcode: + if PGDBI['DBNAME'] == PGDBI['DEFDB'] and PGDBI['DBSHOST'] != PGDBI['DEFSHOST']: + default_dbinfo() + qelog(dberror, 0, "Retry Connecting to {} on {}".format(PGDBI['DBNAME'], PGDBI['DBHOST']), ary, pgcnt, PgLOG.MSGLOG) + else: + qelog(dberror, 5+5*pgcnt, "Retry Connecting", ary, pgcnt, PgLOG.LOGWRN) + return PgLOG.SUCCESS + elif re.match(r'^(08|57)', pgcode): + qelog(dberror, 0, "Retry Connecting", ary, pgcnt, PgLOG.LOGWRN) + pgconnect(1, pgcnt + 1) + return (PgLOG.FAILURE if not pgdb else PgLOG.SUCCESS) + elif re.match(r'^55', pgcode): # try to lock again + qelog(dberror, 10, "Retry Locking", ary, pgcnt, PgLOG.LOGWRN) + return PgLOG.SUCCESS + elif pgcode == '25P02': # try to add table + qelog(dberror, 0, "Rollback transaction", ary, pgcnt, PgLOG.LOGWRN) + pgdb.rollback() + return PgLOG.SUCCESS + elif pgcode == '42P01' and logact&PgLOG.ADDTBL: # try to add table + qelog(dberror, 0, "Retry after adding a table", ary, pgcnt, PgLOG.LOGWRN) + try_add_table(dberror, logact) + return PgLOG.SUCCESS + + if logact&PgLOG.DOLOCK and pgcode and re.match(r'^55\w\w\w$', pgcode): + logact &= ~PgLOG.EXITLG # no exit for lock error + return qelog(dberror, 0, sqlstr, ary, pgcnt, logact) + +# +# return hash reference to postgresql batch mode command and output file name +# +def pgbatch(sqlfile, foreground = 0): + +# if(PGDBI['VWHOST'] and PGDBI['VWHOME'] and +# PGDBI['DBSHOST'] == PGDBI['VWSHOST'] and PGDBI['SCNAME'] == PGDBI['VWNAME']): +# slave = "/{}/{}.slave".format(PGDBI['VWHOME'], PGDBI['VWHOST']) +# if not op.exists(slave): default_scname() + + dbhost = 'localhost' if PGDBI['DBSHOST'] == PgLOG.PGLOG['HOSTNAME'] else PGDBI['DBHOST'] + options = "-h {} -p {}".format(dbhost, PGDBI['DBPORT']) + os.environ['PGPASSWORD'] = PGDBI['PWNAME'] + options += " -U {} {}".format(PGDBI['LNNAME'], PGDBI['DBNAME']) + + if not sqlfile: return options + + if foreground: + batch = "psql {} < {} |".format(options, sqlfile) + else: + batch['out'] = sqlfile + if re.search(r'\.sql$', batch['out']): + batch['out'] = re.sub(r'\.sql$', '.out', batch['out']) + else: + batch['out'] += ".out" + + batch['cmd'] = "psql {} < {} > {} 2>&1".format(options, sqlfile , batch['out']) + + return batch + +# +# start a connection to dssdb database and return a DBI object; None if error +# force connect if connect > 0 +# +def pgconnect(reconnect = 0, pgcnt = 0, autocommit = True): + + global pgdb + + if pgdb: + if reconnect and not pgdb.closed: return pgdb # no need reconnect + elif reconnect: + reconnect = 0 # initial connection + +# if PGDBI['VWHOST'] and PGDBI['VWHOME'] and PGDBI['DBSHOST'] == PGDBI['VWSHOST'] and PGDBI['SCNAME'] == PGDBI['VWNAME']: +# slave = "/{}/{}.slave".format(PGDBI['VWHOME'], PGDBI['VWHOST']) +# if not op.exists(slave): default_scname() + + while True: + config = {'database' : PGDBI['DBNAME'], + 'user' : PGDBI['LNNAME'], + 'password' : PGDBI['PWNAME']} + if PGDBI['DBSHOST'] == PgLOG.PGLOG['HOSTNAME']: + config['host'] = 'localhost' + else: + config['host'] = PGDBI['DBHOST'] if PGDBI['DBHOST'] else PGDBI['CDHOST'] + if not PGDBI['DBPORT']: PGDBI['DBPORT'] = get_dbport(PGDBI['DBNAME']) + if PGDBI['DBPORT']: config['port'] = PGDBI['DBPORT'] + + sqlstr = "psycopg2.connect(**{})".format(config) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + try: + PgLOG.PGLOG['PGDBBUF'] = pgdb = PgSQL.connect(**config) + if reconnect: PgLOG.pglog("{} Reconnected at {}".format(sqlstr, PgLOG.current_datetime()), PgLOG.MSGLOG|PgLOG.FRCLOG) + if autocommit: pgdb.autocommit = autocommit + return pgdb + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, PGDBI['EXITLG']): return PgLOG.FAILURE + pgcnt += 1 + +# +# return a PostgreSQL cursor upon success +# +def pgcursor(): + + global pgdb + pgcur = None + + if not pgdb: + pgconnect() + if not pgdb: return PgLOG.FAILURE + + pgcnt = 0 + while True: + try: + pgcur = pgdb.cursor() + spath = "SET search_path = '{}'".format(PGDBI['SCNAME']) + if PGDBI['SCPATH'] and PGDBI['SCPATH'] != PGDBI['SCNAME']: + spath += ", '{}'".format(PGDBI['SCPATH']) + pgcur.execute(spath) + except PgSQL.Error as pgerr: + if pgcnt == 0 and pgdb.closed: + pgconnect(1) + elif not check_dberror(pgerr, pgcnt, '', None, PGDBI['EXITLG']): + return PgLOG.FAILURE + else: + break + pgcnt += 1 + + return pgcur + +# +# disconnect to dssdb database +# +def pgdisconnect(stopit = 1): + + global pgdb + if pgdb: + if stopit: pgdb.close() + PgLOG.PGLOG['PGDBBUF'] = pgdb = None + +# +# gather table field default information as hash array with field names as keys +# and default values as values +# the whole table information is cached to a hash array with table names as keys +# +def pgtable(tablename, logact = PGDBI['ERRLOG']): + + if tablename in TABLES: return TABLES[tablename].copy() # cached already + intms = r'^(smallint||bigint|integer)$' + fields = "column_name col, data_type typ, is_nullable nil, column_default def" + condition = table_condition(tablename) + pgcnt = 0 + while True: + pgrecs = pgmget('information_schema.columns', fields, condition, logact) + cnt = len(pgrecs['col']) if pgrecs else 0 + if cnt: break + if pgcnt == 0 and logact&PgLOG.ADDTBL: + add_new_table(tablename, logact = logact) + else: + return PgLOG.pglog(tablename + ": Table not exists", logact) + pgcnt += 1 + + pgdefs = {} + for i in range(cnt): + name = pgrecs['col'][i] + isint = re.match(intms, pgrecs['typ'][i]) + dflt = pgrecs['def'][i] + if dflt != None: + if re.match(r'^nextval\(', dflt): + dflt = 0 + else: + dflt = check_default_value(dflt, isint) + elif pgrecs['nil'][i] == 'YES': + dflt = None + elif isint: + dflt = 0 + else: + dflt = '' + pgdefs[name] = dflt + + TABLES[tablename] = pgdefs.copy() + return pgdefs + +# +# get sequence field name for given table name +# +def pgsequence(tablename, logact = PGDBI['ERRLOG']): + + if tablename in SEQUENCES: return SEQUENCES[tablename] # cached already + condition = table_condition(tablename) + " AND column_default LIKE 'nextval(%'" + pgrec = pgget('information_schema.columns', 'column_name', condition, logact) + seqname = pgrec['column_name'] if pgrec else None + SEQUENCES[tablename] = seqname + + return seqname + +# +# check default value for integer & string +# +def check_default_value(dflt, isint): + + if isint: + ms = re.match(r"^'{0,1}(\d+)", dflt) + if ms: dflt = int(ms.group(1)) + elif dflt[0] == "'": + ms = re.match(r"^(.+)::", dflt) + if ms: dflt = ms.group(1) + elif dflt != 'NULL': + dflt = "'{}'".format(dflt) + return dflt + +# +# local fucntion: insert prepare pgadd()/pgmadd() for given table and field names +# according to options of multiple place holds and returning sequence id +# +def prepare_insert(tablename, fields, multi = True, getid = None): + + strfld = pgnames(fields, '.', ',') + if multi: + strplc = "(" + ','.join(['%s']*len(fields)) + ")" + else: + strplc = '%s' + sqlstr = "INSERT INTO {} ({}) VALUES {}".format(tablename, strfld, strplc) + if getid: sqlstr += " RETURNING " + getid + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + + return sqlstr + +# +# local fucntion: prepare default value for single record +# +def prepare_default(tablename, record, logact = 0): + + table = pgtable(tablename, logact) + + for fld in record: + val = record[fld] + if val is None: + vlen = 0 + elif isinstance(val, str): + vlen = len(val) + else: + vlen = 1 + if vlen == 0: record[fld] = table[fld] + +# +# local fucntion: prepare default value for multiple records +# +def prepare_defaults(tablename, records, logact = 0): + + table = pgtable(tablename, logact) + + for fld in records: + vals = records[fld] + vcnt = len(vals) + for i in range(vcnt): + if vals[i] is None: + vlen = 0 + elif isinstance(vals[i], str): + vlen = len(vals[i]) + else: + vlen = 1 + if vlen == 0: records[fld][i] = table[fld] + +# +# insert one record into tablename +# tablename: add record for one table name each call +# record: hash reference with keys as field names and hash values as field values +# return PgLOG.SUCCESS or PgLOG.FAILURE +# +def pgadd(tablename, record, logact = PGDBI['ERRLOG'], getid = None): + + global curtran + if not record: return PgLOG.pglog("Nothing adds to " + tablename, logact) + if logact&PgLOG.DODFLT: prepare_default(tablename, record, logact) + if logact&PgLOG.AUTOID and not getid: getid = pgsequence(tablename, logact) + sqlstr = prepare_insert(tablename, list(record), True, getid) + values = tuple(record.values()) + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Insert: " + str(values)) + + ret = acnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + acnt = 1 + if getid: + ret = pgcur.fetchone()[0] + else: + ret = PgLOG.SUCCESS + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgadd: 1 record added to " + tablename + ", return " + str(ret)) + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += acnt + if curtran > PGDBI['MTRANS']: starttran() + + return ret + +# +# insert multiple records into tablename +# tablename: add records for one table name each call +# records: dict with field names as keys and each value is a list of field values +# return PgLOG.SUCCESS or PgLOG.FAILURE +# +def pgmadd(tablename, records, logact = PGDBI['ERRLOG'], getid = None): + + global curtran + if not records: return PgLOG.pglog("Nothing to insert to table " + tablename, logact) + if logact&PgLOG.DODFLT: prepare_defaults(tablename, records, logact) + if logact&PgLOG.AUTOID and not getid: getid = pgsequence(tablename, logact) + multi = True if getid else False + sqlstr = prepare_insert(tablename, list(records), multi, getid) + + v = records.values() + values = list(zip(*v)) + cntrow = len(values) + ids = [] if getid else None + + if PgLOG.PGLOG['DBGLEVEL']: + for row in values: PgLOG.pgdbg(1000, "Insert: " + str(row)) + + count = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + + if getid: + while count < cntrow: + record = values[count] + try: + pgcur.execute(sqlstr, record) + ids.append(pgcur.fetchone()[0]) + count += 1 + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, record, logact): return PgLOG.FAILURE + break + else: + try: + execute_values(pgcur, sqlstr, values, page_size=PGDBI['PGSIZE']) + count = cntrow + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values[0], logact): return PgLOG.FAILURE + if count >= cntrow: break + pgcnt += 1 + + pgcur.close() + if(PgLOG.PGLOG['DBGLEVEL']): PgLOG.pgdbg(1000, "pgmadd: {} of {} record(s) added to {}".format(count, cntrow, tablename)) + + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += count + if curtran > PGDBI['MTRANS']: starttran() + + return (ids if ids else count) + +# +# local function: select prepare for pgget() and pgmget() +# +def prepare_select(tablenames, fields = None, condition = None, cndflds = None, logact = 0): + + sqlstr = '' + if tablenames: + if fields: + sqlstr = "SELECT " + fields + else: + sqlstr = "SELECT count(*) cntrec" + + sqlstr += " FROM " + tablenames + if condition: + if re.match(r'^\s*(ORDER|GROUP|HAVING|OFFSET|LIMIT)\s', condition, re.I): + sqlstr += " " + condition # no where clause, append directly + else: + sqlstr += " WHERE " + condition + elif cndflds: + sep = 'WHERE' + for fld in cndflds: + sqlstr += " {} {}=%s".format(sep, fld) + sep = 'AND' + if logact&PgLOG.DOLOCK: + starttran() + sqlstr += " FOR UPDATE" + elif fields: + sqlstr = "SELECT " + fields + elif condition: + sqlstr = condition + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + + return sqlstr + +# +# tablenames: comma deliminated string of one or more tables and more than one table for joining, +# fields: comma deliminated string of one or more field names, +# condition: querry conditions for where clause +# return a dict reference with keys as field names upon success +# +def pgget(tablenames, fields, condition = None, logact = 0): + + if not logact: logact = PGDBI['ERRLOG'] + if fields and condition and not re.search(r'limit 1$', condition, re.I): condition += " LIMIT 1" + sqlstr = prepare_select(tablenames, fields, condition, None, logact) + if fields and not re.search(r'(^|\s)limit 1($|\s)', sqlstr, re.I): sqlstr += " LIMIT 1" + ucname = True if logact&PgLOG.UCNAME else False + pgcnt = 0 + record = {} + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr) + vals = pgcur.fetchone() + if vals: + colcnt = len(pgcur.description) + for i in range(colcnt): + col = pgcur.description[i] + colname = col[0].upper() if ucname else col[0] + val = vals[i] + if col[1] == CHCODE and val and val[-1] == ' ': val = val.rstrip() + record[colname] = val + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if record and tablenames and not fields: + if PgLOG.PGLOG['DBGLEVEL']: + PgLOG.pgdbg(1000, "pgget: {} record(s) found from {}".format(record['cntrec'], tablenames)) + return record['cntrec'] + elif PgLOG.PGLOG['DBGLEVEL']: + cnt = 1 if record else 0 + PgLOG.pgdbg(1000, "pgget: {} record retrieved from {}".format(cnt, tablenames)) + + return record + +# +# tablenames: comma deliminated string of one or more tables and more than one table for joining, +# fields: comma deliminated string of one or more field names, +# condition: querry conditions for where clause +# return a dict reference with keys as field names upon success, values for each field name +# are in a list. All lists are the same length with missing values set to None +# +def pgmget(tablenames, fields, condition = None, logact = PGDBI['ERRLOG']): + + sqlstr = prepare_select(tablenames, fields, condition, None, logact) + ucname = True if logact&PgLOG.UCNAME else False + count = pgcnt = 0 + records = {} + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr) + rowvals = pgcur.fetchall() + if rowvals: + colcnt = len(pgcur.description) + count = len(rowvals) + colvals = list(zip(*rowvals)) + for i in range(colcnt): + col = pgcur.description[i] + colname = col[0].upper() if ucname else col[0] + vals = list(colvals[i]) + if col[1] == CHCODE: + for j in range(count): + if vals[j] and vals[j][-1] == ' ': vals[j] = vals[j].rstrip() + records[colname] = vals + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: + PgLOG.pgdbg(1000, "pgmget: {} record(s) retrieved from {}".format(count, tablenames)) + + return records + +# +# tablenames: comma deliminated string of one or more tables +# fields: comma deliminated string of one or more field names, +# cnddict: condition dict with field names : values +# return a dict(field names : values) upon success +# +# retrieve one records from tablenames condition dict +# +def pghget(tablenames, fields, cnddict, logact = PGDBI['ERRLOG']): + + if not tablenames: return PgLOG.pglog("Miss Table name to query", logact) + if not fields: return PgLOG.pglog("Nothing to query " + tablenames, logact) + if not cnddict: return PgLOG.pglog("Miss condition dict values to query " + tablenames, logact) + sqlstr = prepare_select(tablenames, fields, None, list(cnddict), logact) + if fields and not re.search(r'limit 1$', sqlstr, re.I): sqlstr += " LIMIT 1" + ucname = True if logact&PgLOG.UCNAME else False + + values = tuple(cnddict.values()) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Query from {} for {}".format(tablenames, values)) + + pgcnt = 0 + record = {} + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + vals = pgcur.fetchone() + if vals: + colcnt = len(pgcur.description) + for i in range(colcnt): + col = pgcur.description[i] + colname = col[0].upper() if ucname else col[0] + val = vals[i] + if col[1] == CHCODE and val and val[-1] == ' ': val = val.rstrip() + record[colname] = val + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if record and tablenames and not fields: + if PgLOG.PGLOG['DBGLEVEL']: + PgLOG.pgdbg(1000, "pghget: {} record(s) found from {}".format(record['cntrec'], tablenames)) + return record['cntrec'] + elif PgLOG.PGLOG['DBGLEVEL']: + cnt = 1 if record else 0 + PgLOG.pgdbg(1000, "pghget: {} record retrieved from {}".format(cnt, tablenames)) + + return record + +# +# tablenames: comma deliminated string of one or more tables +# fields: comma deliminated string of one or more field names, +# cnddicts: condition dict with field names : value lists +# return a dict(field names : value lists) upon success +# +# retrieve multiple records from tablenames for condition dict +# +def pgmhget(tablenames, fields, cnddicts, logact = PGDBI['ERRLOG']): + + if not tablenames: return PgLOG.pglog("Miss Table name to query", logact) + if not fields: return PgLOG.pglog("Nothing to query " + tablenames, logact) + if not cnddicts: return PgLOG.pglog("Miss condition dict values to query " + tablenames, logact) + sqlstr = prepare_select(tablenames, fields, None, list(cnddicts), logact) + ucname = True if logact&PgLOG.UCNAME else False + + v = cnddicts.values() + values = list(zip(*v)) + cndcnt = len(values) + + if PgLOG.PGLOG['DBGLEVEL']: + for row in values: + PgLOG.pgdbg(1000, "Query from {} for {}".format(tablenames, row)) + + colcnt = ccnt = count = pgcnt = 0 + cols = [] + chrs = [] + records = {} + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + while ccnt < cndcnt: + cndvals = values[ccnt] + try: + pgcur.execute(sqlstr, cndvals) + ccnt += 1 + rowvals = pgcur.fetchall() + if rowvals: + if colcnt == 0: + for col in pgcur.description: + colname = col[0].upper() if ucname else col[0] + if col[1] == CHCODE: chrs.append(colname) + cols.append(colname) + records[colname] = [] + colcnt = len(cols) + rcnt = len(rowvals) + count += rcnt + colvals = list(zip(*rowvals)) + for i in range(colcnt): + vals = list(colvals[i]) + colname = cols[i] + if chrs and colname in chrs: + for j in range(rcnt): + if vals[j] and vals[j][-1] == ' ': vals[j] = vals[j].rstrip() + records[colname].extend(vals) + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, cndvals, logact): return PgLOG.FAILURE + break + if ccnt >= cndcnt: break + pgcnt += 1 + pgcur.close() + + if PgLOG.PGLOG['DBGLEVEL']: + PgLOG.pgdbg(1000, "pgmhget: {} record(s) retrieved from {}".format(count, tablenames)) + + return records + +# +# local fucntion: update prepare for pgupdt, pghupdt and pgmupdt +# +def prepare_update(tablename, fields, condition = None, cndflds = None): + + strset = [] + # build set string + for fld in fields: + strset.append("{}=%s".format(pgname(fld, '.'))) + strflds = ",".join(strset) + + # build condition string + if not condition: + cndset = [] + for fld in cndflds: + cndset.append("{}=%s".format(pgname(fld, '.'))) + condition = " AND ".join(cndset) + + sqlstr = "UPDATE {} SET {} WHERE {}".format(tablename, strflds, condition) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + + return sqlstr + +# +# update one or multiple rows in tablename +# tablename: update for one table name each call +# record: dict with field names : values +# condition: update conditions for where clause) +# return number of rows undated upon success +# +def pgupdt(tablename, record, condition, logact = PGDBI['ERRLOG']): + + global curtran + if not record: PgLOG.pglog("Nothing updates to " + tablename, logact) + if not condition or isinstance(condition, int): PgLOG.pglog("Miss condition to update " + tablename, logact) + sqlstr = prepare_update(tablename, list(record), condition) + if logact&PgLOG.DODFLT: prepare_default(tablename, record, logact) + + values = tuple(record.values()) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Update {} for {}".format(tablename, values)) + + ucnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + ucnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgupdt: {} record(s) updated to {}".format(ucnt, tablename)) + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += ucnt + if curtran > PGDBI['MTRANS']: starttran() + + return ucnt + +# +# update one or multiple records in tablename +# tablename: update for one table name each call +# record: update values, dict with field names : values +# cnddict: condition dict with field names : values +# return number of records updated upon success +# +def pghupdt(tablename, record, cnddict, logact = PGDBI['ERRLOG']): + + global curtran + if not record: PgLOG.pglog("Nothing updates to " + tablename, logact) + if not cnddict or isinstance(cnddict, int): PgLOG.pglog("Miss condition to update to " + tablename, logact) + if logact&PgLOG.DODFLT: prepare_defaults(tablename, record, logact) + sqlstr = prepare_update(tablename, list(record), None, list(cnddict)) + + values = tuple(record.values()) + tuple(cnddict.values()) + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Update {} for {}".format(tablename, values)) + + ucnt = count = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + count += 1 + ucnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgmupdt: {}/{} record(s) updated to {}".format(count, cntrow, tablename)) + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += ucnt + if curtran > PGDBI['MTRANS']: starttran() + + return ucnt + +# +# update multiple records in tablename +# tablename: update for one table name each call +# records: update values, dict with field names : value lists +# cnddicts: condition dict with field names : value lists +# return number of records updated upon success +# +def pgmupdt(tablename, records, cnddicts, logact = PGDBI['ERRLOG']): + + global curtran + if not records: PgLOG.pglog("Nothing updates to " + tablename, logact) + if not cnddicts or isinstance(cnddicts, int): PgLOG.pglog("Miss condition to update to " + tablename, logact) + if logact&PgLOG.DODFLT: prepare_defaults(tablename, records, logact) + sqlstr = prepare_update(tablename, list(records), None, list(cnddicts)) + + fldvals = tuple(records.values()) + cntrow = len(fldvals[0]) + cndvals = tuple(cnddicts.values()) + cntcnd = len(cndvals[0]) + if cntcnd != cntrow: return PgLOG.pglog("Field/Condition value counts Miss match {}/{} to update {}".format(cntrow, cntcnd, tablename), logact) + v = fldvals + cndvals + values = list(zip(*v)) + + if PgLOG.PGLOG['DBGLEVEL']: + for row in values: PgLOG.pgdbg(1000, "Update {} for {}".format(tablename, row)) + + ucnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + execute_batch(pgcur, sqlstr, values, page_size=PGDBI['PGSIZE']) + ucnt = cntrow + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values[0], logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + pgcur.close() + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgmupdt: {}/{} record(s) updated to {}".format(ucnt, cntrow, tablename)) + if(logact&PgLOG.ENDLCK): + endtran() + elif curtran: + curtran += ucnt + if curtran > PGDBI['MTRANS']: starttran() + + return ucnt + +# +# local fucntion: delete prepare for pgdel, pghdel and del +# +def prepare_delete(tablename, condition = None, cndflds = None): + + # build condition string + if not condition: + cndset = [] + for fld in cndflds: + cndset.append("{}=%s".format(fld)) + condition = " AND ".join(cndset) + + sqlstr = "DELETE FROM {} WHERE {}".format(tablename, condition) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + + return sqlstr + +# +# delete one or mutiple records in tablename according condition +# tablename: delete for one table name each call +# condition: delete conditions for where clause +# return number of records deleted upon success +# +def pgdel(tablename, condition, logact = PGDBI['ERRLOG']): + + global curtran + if not condition or isinstance(condition, int): PgLOG.pglog("Miss condition to delete from " + tablename, logact) + sqlstr = prepare_delete(tablename, condition) + + dcnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr) + dcnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgdel: {} record(s) deleted from {}".format(dcnt, tablename)) + if logact&PgLOG.ENDLCK: + endtran() + elif curtran: + curtran += dcnt + if curtran > PGDBI['MTRANS']: starttran() + + return dcnt + +# +# delete one or mutiple records in tablename according condition +# tablename: delete for one table name each call +# cndict: delete condition dict for names : values +# return number of records deleted upon success +# +def pghdel(tablename, cnddict, logact = PGDBI['ERRLOG']): + + global curtran + if not cnddict or isinstance(cnddict, int): PgLOG.pglog("Miss condition dict to delete from " + tablename, logact) + sqlstr = prepare_delete(tablename, None, list(cnddict)) + + values = tuple(cnddicts.values()) + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "Delete from {} for {}".format(tablename, values)) + + dcnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr, values) + dcnt = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pghdel: {} record(s) deleted from {}".format(dcnt, tablename)) + if logact&PgLOG.ENDLCK: + endtran() + elif curtran: + curtran += dcnt + if curtran > PGDBI['MTRANS']: starttran() + + return dcnt + +# +# delete mutiple records in tablename according condition +# tablename: delete for one table name each call +# cndicts: delete condition dict for names : value lists +# return number of records deleted upon success +# +def pgmdel(tablename, cnddicts, logact = PGDBI['ERRLOG']): + + global curtran + if not cnddicts or isinstance(cnddicts, int): PgLOG.pglog("Miss condition dict to delete from " + tablename, logact) + sqlstr = prepare_delete(tablename, None, list(cnddicts)) + + v = cnddicts.values() + values = list(zip(*v)) + if PgLOG.PGLOG['DBGLEVEL']: + for row in values: + PgLOG.pgdbg(1000, "Delete from {} for {}".format(tablenames, row)) + + dcnt = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + execute_batch(pgcur, sqlstr, values, page_size=PGDBI['PGSIZE']) + dcnt = len(values) + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, values[0], logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + pgcur.close() + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgmdel: {} record(s) deleted from {}".format(dcnt, tablename)) + if logact&PgLOG.ENDLCK: + endtran() + elif curtran: + curtran += dcnt + if curtran > PGDBI['MTRANS']: starttran() + + return dcnt + +# +# sqlstr: a complete sql string +# return number of record affected upon success +# +def pgexec(sqlstr, logact = PGDBI['ERRLOG']): + + global curtran + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(100, sqlstr) + + ret = pgcnt = 0 + while True: + pgcur = pgcursor() + if not pgcur: return PgLOG.FAILURE + try: + pgcur.execute(sqlstr) + ret = pgcur.rowcount + pgcur.close() + except PgSQL.Error as pgerr: + if not check_dberror(pgerr, pgcnt, sqlstr, None, logact): return PgLOG.FAILURE + else: + break + pgcnt += 1 + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, "pgexec: {} record(s) affected for {}".format(ret, sqlstr)) + if logact&PgLOG.ENDLCK: + endtran() + elif curtran: + curtran += ret + if curtran > PGDBI['MTRANS']: starttran() + + return ret + +# +# tablename: one table name to a temporary table +# fromtable: table name data gathing from +# fields: table name data gathing from +# condition: querry conditions for where clause +# return number of records created upon success +# +def pgtemp(tablename, fromtable, fields, condition = None, logact = 0): + + sqlstr = "CREATE TEMPORARY TABLE {} SELECT {} FROM {}".format(tablename, fields, fromtable) + if condition: sqlstr += " WHERE " + condition + + return pgexec(sqlstr, logact) + +# +# get condition for given table name for accessing information_schema +# +def table_condition(tablename): + + ms = re.match(r'(.+)\.(.+)', tablename) + if ms: + scname = ms.group(1) + tbname = ms.group(2) + else: + scname = PGDBI['SCNAME'] + tbname = tablename + + return "table_schema = '{}' AND table_name = '{}'".format(scname, tbname) + +# +# check if a given table name exists or not +# tablename: one table name to check +# +def pgcheck(tablename, logact = 0): + + condition = table_condition(tablename) + + ret = pgget('information_schema.tables', None, condition, logact) + return (PgLOG.SUCCESS if ret else PgLOG.FAILURE) + +# +# group of functions to check parent records and add an empty one if missed +# return user.uid upon success, 0 otherwise +# +def check_user_uid(userno, date = None): + + if not userno: return 0 + if tyep(userno) is str: userno = int(userno) + + if date is None: + datecond = "until_date IS NULL" + date = 'today' + else: + datecond = "(start_date IS NULL OR start_date <= '{}') AND (until_date IS NULL OR until_date >= '{}')".format(date, date) + + pgrec = pgget("dssdb.user", "uid", "userno = {} AND {}".format(userno, datecond), PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + + if userno not in NMISSES: + PgLOG.pglog("{}: Scientist ID NOT on file for {}".format(userno, date), PgLOG.LGWNEM) + NMISSES.append(userno) + + # check again if a user is on file with different date range + pgrec = pgget("dssdb.user", "uid", "userno = {}".format(userno), PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + + pgrec = ucar_user_info(userno) + if not pgrec: pgrec = {'userno' : userno, 'stat_flag' : 'M'} + uid = pgadd("dssdb.user", pgrec, (PGDBI['EXITLG']|PgLOG.AUTOID)) + if uid: PgLOG.pglog("{}: Scientist ID Added as user.uid = {}".format(useno, uid), PgLOG.LGWNEM) + + return uid + +# +# return user.uid upon success, 0 otherwise +# +def get_user_uid(logname, date = None): + + if not logname: return 0 + if not date: + date = 'today' + datecond = "until_date IS NULL" + else: + datecond = "(start_date IS NULL OR start_date <= '{}') AND (until_date IS NULL OR until_date >= '{}')".format(date, date) + + pgrec = pgget("dssdb.user", "uid", "logname = '{}' AND {}".format(logname, datecond), PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + + if logname not in LMISSES: + PgLOG.pglog("{}: UCAR Login Name NOT on file for {}".format(logname, date), PgLOG.LGWNEM) + LMISSES.append(logname) + + # check again if a user is on file with different date range + pgrec = pgget("dssdb.user", "uid", "logname = '{}'".format(logname), PGDBI['ERRLOG']) + if pgrec: return pgrec['uid'] + + pgrec = ucar_user_info(0, logname) + if not pgrec: pgrec = {'logname' : logname, 'stat_flag' : 'M'} + uid = pgadd("dssdb.user", pgrec, (PGDBI['EXITLG']|PgLOG.AUTOID)) + if uid: PgLOG.pglog("{}: UCAR Login Name Added as user.uid = {}".format(logname, uid), PgLOG.LGWNEM) + + return uid + +# +# get ucar user info for given userno (scientist number) or logname (Ucar login) +# +def ucar_user_info(userno, logname = None): + + MATCH = { + 'upid' : "upid", + 'uid' : "userno", + 'username' : "logname", + 'lastName' : "lstname", + 'firstName' : "fstname", + 'active' : "stat_flag", + 'internalOrg' : "division", + 'externalOrg' : "org_name", + 'country' : "country", + 'forwardEmail' : "email", + 'email' : "ucaremail", + 'phone' : "phoneno" + } + + buf = PgLOG.pgsystem("pgperson " + ("-uid {}".format(userno) if userno else "-username {}".format(logname)), PgLOG.LOGWRN, 20) + if not buf: return None + + pgrec = {} + for line in buf.split('\n'): + ms = re.match(r'^(.+)<=>(.*)$', line) + if ms: + (key, val) = ms.groups() + if key in MATCH: + if key == 'upid' and 'upid' in pgrec: break # get one record only + pgrec[MATCH[key]] = val + + if not pgrec: return None + + if userno: + pgrec['userno'] = userno + elif pgrec['userno']: + pgrec['userno'] = userno = int(pgrec['userno']) + if pgrec['upid']: pgrec['upid'] = int(pgrec['upid']) + if pgrec['stat_flag']: pgrec['stat_flag'] = 'A' if pgrec['stat_flag'] == '1' else 'C' + if pgrec['email'] and re.search(r'(@|\.)ucar\.edu$', pgrec['email'], re.I): + pgrec['email'] = pgrec['ucaremail'] + pgrec['org_name'] = 'NCAR' + country = pgrec['country'] if 'country' in pgrec else None + pgrec['country'] = set_country_code(pgrec['email'], country) + if pgrec['division']: + val = "NCAR" + else: + val = None + pgrec['org_type'] = get_org_type(val, pgrec['email']) + + buf = PgLOG.pgsystem("pgusername {}".format(pgrec['logname']), PgLOG.LOGWRN, 20) + if not buf: return pgrec + + for line in buf.split('\n'): + ms = re.match(r'^(.+)<=>(.*)$', line) + if ms: + (key, val) = ms.groups() + if key == 'startDate': + m = re.match(r'^(\d+-\d+-\d+)\s', val) + if m: + pgrec['start_date'] = m.group(1) + else: + pgrec['start_date'] = val + + if key == 'endDate': + m = re.match(r'^(\d+-\d+-\d+)\s', val) + if m: + pgrec['until_date'] = m.group(1) + else: + pgrec['until_date'] = val + + return pgrec + +# +# set country code for given coutry name or email address +# +def set_country_code(email, country = None): + + codes = { + 'CHINA' : "P.R.CHINA", + 'ENGLAND' : "UNITED.KINGDOM", + 'FR' : "FRANCE", + 'KOREA' : "SOUTH.KOREA", + 'USSR' : "RUSSIA", + 'US' : "UNITED.STATES", + 'U.S.A.' : "UNITED.STATES" + } + + if country: + country = country.upper() + ms = re.match(r'^(\w+)\s(\w+)$', country) + if ms: + country = ms.group(1) + '.' + ms.group(2) + elif country in codes: + country = codes[country] + else: + country = email_to_country(email) + + return country + +# return wuser.wuid upon success, 0 otherwise +def check_wuser_wuid(email, date = None): + + if not email: return 0 + emcond = "email = '{}'".format(email) + if not date: + date = 'today' + datecond = "until_date IS NULL" + else: + datecond = "(start_date IS NULL OR start_date <= '{}') AND (until_date IS NULL OR until_date >= '{}')".format(date, date) + + pgrec = pgget("wuser", "wuid", "{} AND {}".format(emcond, datecond), PGDBI['ERRLOG']) + if pgrec: return pgrec['wuid'] + + # check again if a user is on file with different date range + pgrec = pgget("wuser", "wuid", emcond, PgLOG.LOGERR) + if pgrec: return pgrec['wuid'] + + # now add one in + record = {'email' : email} + # check again if a ruser is on file + pgrec = pgget("ruser", "*", emcond + " AND end_date IS NULL", PGDBI['ERRLOG']) + if not pgrec: pgrec = pgget("ruser", "*", emcond, PGDBI['ERRLOG']) + + if pgrec: + record['ruid'] = pgrec['id'] + record['fstname'] = pgrec['fname'] + record['lstname'] = pgrec['lname'] + record['country'] = pgrec['country'] + record['org_type'] = get_org_type(pgrec['org_type'], pgrec['email']) + record['start_date'] = str(pgrec['rdate']) + if pgrec['end_date']: + record['until_date'] = str(pgrec['end_date']) + record['stat_flag'] = 'C' + else: + record['stat_flag'] = 'A' + + if pgrec['title']: record['utitle'] = pgrec['title'] + if pgrec['mname']: record['midinit'] = pgrec['mname'][0] + if pgrec['org']: record['org_name'] = pgrec['org'] + else: + record['stat_flag'] = 'M' + record['org_type'] = get_org_type('', email) + record['country'] = email_to_country(email) + + wuid = pgadd("wuser", record, PgLOG.LOGERR|PgLOG.AUTOID) + if wuid: + if pgrec: + PgLOG.pglog("{}({}, {}) Added as wuid({})".format(email, pgrec['lname'], pgrec['fname'], wuid), PgLOG.LGWNEM) + else: + PgLOG.pglog("{} Added as wuid({})".format(email, wuid), PgLOG.LGWNEM) + return wuid + + return 0 + +# return wuser.wuid upon success, 0 otherwise +def check_cdp_wuser(username): + + pgrec = pgget("wuser", "wuid", "cdpname = '{}'".format(username), PGDBI['EXITLG']) + if pgrec: return pgrec['wuid'] + + # missing wuser record add one in + pgrec = get_cdp_user(None, None, username) + if not pgrec: + if username not in LMISSES: + PgLOG.pglog("Missing CDP User '{}'".format(username), PgLOG.LGWNEM) + LMISSES['username'] = 1 + return 0 + + idrec = pgget("wuser", "wuid", "email = '{}'".format(pgrec['email']), PGDBI['EXITLG']) + wuid = idrec['wuid'] if idrec else 0 + if wuid > 0: + idrec = {} + idrec['cdpid'] = pgrec['cdpid'] + idrec['cdpname'] = pgrec['cdpname'] + pgupdt("wuser", idrec, "wuid = {}".format(wuid) , PGDBI['EXITLG']) + else: + pgrec['stat_flag'] = 'A' + pgrec['org_type'] = get_org_type(pgrec['org_type'], pgrec['email']) + pgrec['country'] = email_to_country(pgrec['email']) + wuid = pgadd("wuser", pgrec, PGDBI['EXITLG']|PgLOG.AUTOID) + if wuid > 0: + PgLOG.pglog("CDP User {} added as wuid = {} in RDADB".format(username, wuid), PgLOG.LGWNEM) + + return wuid + +# +# for given email to get long country name +# +def email_to_country(email): + + ms = re.search(r'\.(\w\w)$', email) + if ms: + pgrec = pgget("countries", "token", "domain_id = '{}'".format(ms.group(1)), PGDBI['EXITLG']) + if pgrec: return pgrec['token'] + elif re.search(r'\.(gov|edu|mil|org|com|net)$', email): + return "UNITED.STATES" + else: + return "UNKNOWN" + +# +# if filelists is published for given dataset, reset it to 'P' +# +def reset_rdadb_version(dsid): + + pgexec("UPDATE dataset SET version = version + 1 WHERE dsid = '{}'".format(dsid), PGDBI['ERRLOG']) + +# +# check the use rdadb flag in table dataset for a given dataset and given values +# +def use_rdadb(dsid, logact = 0, vals = None): + + ret = '' # default to empty in case dataset not in RDADB + if dsid: + pgrec = pgget("dataset", "use_rdadb", "dsid = '{}'".format(dsid), PGDBI['EXITLG']) + if pgrec: + ret = 'N' # default to 'N' if dataset record in RDADB already + if pgrec['use_rdadb']: + if not vals: vals = "IPYMW" # default to Internal; Publishable; Yes RDADB + if vals.find(pgrec['use_rdadb']) > -1: + ret = pgrec['use_rdadb'] + elif logact: + PgLOG.pglog("Dataset '{}' is not in RDADB!".format(dsid), logact) + + return ret + +# +# fld: field name for querry condition +# vals: reference to aaray of values +# isstr: 1 for string values requires quotes and support wildcard +# noand: 1 for skiping the leading ' AND ' for condition +# return a condition string for a given field +# +def get_field_condition(fld, vals, isstr = 0, noand = 0): + + cnd = wcnd = negative = '' + sign = "=" + logic = " OR " + count = len(vals) if vals else 0 + if count == 0: return '' + ncnt = scnt = wcnt = cnt = 0 + for i in range(count): + val = vals[i] + if val is None or (i > 0 and val == vals[i-1]): continue + if i == 0 and val == PGSIGNS[0]: + negative = "NOT " + logic = " AND " + continue + if scnt == 0 and isinstance(val, str): + ms = re.match(r'^({})$'.format('|'.join(PGSIGNS[1:])), val) + if ms: + osign = sign = ms.group(1) + scnt += 1 + if sign == "<>": + scnt += 1 + sign = negative + "BETWEEN" + elif negative: + sign = "<=" if (sign == ">") else ">=" + continue + if isstr: + if not isinstance(val, str): val = str(val) + if sign == "=": + if not val: + ncnt += 1 # found null string + elif val.find('%') > -1: + sign = negative + "LIKE" + elif re.search(r'[\[\(\?\.]', val): + sign = negative + "SIMILAR TO" + if val.find("'") != 0: + val = "'{}'".format(val) + elif isinstance(val, str): + if val.find('.') > -1: + val = float(val) + else: + val = int(val) + if sign == "=": + if cnt > 0: cnd += ", " + cnd += str(val) + cnt += 1 + else: + if sign == "AND": + wcnd += " {} {}".format(sign, val) + else: + if wcnt > 0: wcnd += logic + wcnd += "{} {} {}".format(fld, sign, val) + wcnt += 1 + if re.search(r'BETWEEN$', sign): + sign = "AND" + else: + sign = "=" + scnt = 0 + + if scnt > 0: + s = 's' if scnt > 1 else '' + PgLOG.pglog("Need {} value{} after sign '{}'".format(scnt, s, osign), PgLOG.LGEREX) + if wcnt > 1: wcnd = "({})".format(wcnd) + if cnt > 0: + if cnt > 1: + cnd = "{} {}IN ({})".format(fld, negative, cnd) + else: + cnd = "{} {} {}".format(fld, ("<>" if negative else "="), cnd) + if ncnt > 0: + ncnd = "{} IS {}NULL".format(fld, negative) + cnd = "({}{}{})".format(cnd, logic, ncnd) + if wcnt > 0: cnd = "({}{}{})".format(cnd, logic, wcnd) + elif wcnt > 0: + cnd = wcnd + if cnd and not noand: cnd = " AND " + cnd + + return cnd + +# +# build up fieldname string for given or default condition +# +def fieldname_string(fnames, dnames = None, anames = None, wflds = None): + + if not fnames: + fnames = dnames # include default fields names + elif re.match(r'^all$', fnames, re.I): + fnames = anames # include all field names + + if not wflds: return fnames + + for wfld in wflds: + if not wfld or fnames.find(wfld) > -1: continue # empty field, or included already + if wfld == "Q": + pos = fnames.find("R") # request name + elif wfld == "Y": + pos = fnames.find("X") # parent group name + elif wfld == "G": + pos = fnames.find("I") # group name + else: + pos = -1 # prepend other with-field names + + if pos == -1: + fnames = wfld + fnames # prepend with-field + else: + fnames = fnames[0:pos] + wfld + fnames[pos:] # insert with-field + + return fnames + +# +# Function get_group_field_path(gindex: group index +# dsid: dataset id +# field: path field name: webpath or savedpath) +# go through group tree upward to find a none-empty path, return it or null +# +def get_group_field_path(gindex, dsid, field): + + if gindex: + pgrec = pgget("dsgroup", "pindex, {}".format(field), + "dsid = '{}' AND gindex = {}".format(dsid, gindex), PGDBI['EXITLG']) + else: + pgrec = pgget("dataset", field, + "dsid = '{}'".format(dsid), PGDBI['EXITLG']) + if pgrec: + if pgrec[field]: + return pgrec[field] + elif gindex: + return get_group_field_path(pgrec['pindex'], dsid, field) + else: + return None + +# +# get the specialist info for a given dataset +# +def get_specialist(dsid, logact = PGDBI['ERRLOG']): + + if dsid in SPECIALIST: return SPECIALIST['dsid'] + + pgrec = pgget("dsowner, dssgrp", "specialist, lstname, fstname", + "specialist = logname AND dsid = '{}' AND priority = 1".format(dsid), logact) + if pgrec: + if pgrec['specialist'] == "datahelp" or pgrec['specialist'] == "dss": + pgrec['lstname'] = "Help" + pgrec['fstname'] = "Data" + else: + pgrec['specialist'] = "datahelp" + pgrec['lstname'] = "Help" + pgrec['fstname'] = "Data" + + SPECIALIST['dsid'] = pgrec # cache specialist info for dsowner of dsid + return pgrec + +# +# build customized email from get_email() +# +def build_customized_email(table, field, condition, subject, logact = 0): + + msg = PgLOG.get_email() + + if not msg: return PgLOG.FAILURE + + sender = PgLOG.PGLOG['CURUID'] + "@ucar.edu" + receiver = PgLOG.PGLOG['EMLADDR'] if PgLOG.PGLOG['EMLADDR'] else (PgLOG.PGLOG['CURUID'] + "@ucar.edu") + if receiver.find(sender) < 0: PgLOG.add_carbon_copy(sender, 1) + ebuf = "From: {}\nTo: {}\n".format(sender, receiver) + if PgLOG.PGLOG['CCDADDR']: ebuf += "Cc: {}\n".format(PgLOG.PGLOG['CCDADDR']) + if not subject: subject = "Message from {}-{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.get_command()) + ebuf += "Subject: {}!\n\n{}\n".format(subject, msg) + + estat = cache_customized_email(table, field, condition, ebuf, logact) + if estat and logact: + PgLOG.pglog("Email {} cached to '{}.{}' for {}, Subject: {}".format(receiver, table, field, condition, subject), logact) + + return estat + +# +# email: full user email address +# +# get user real name from table ruser for a given email address +# opts == 1 : include email +# opts == 2 : include org_type +# opts == 4 : include country +# opts == 8 : include valid_email +# opts == 16 : include org +# +def get_ruser_names(email, opts = 0, date = None): + + fields = "lname lstname, fname fstname" + + if opts&1: fields += ", email" + if opts&2: fields += ", org_type" + if opts&4: fields += ", country" + if opts&8: fields += ", valid_email" + if opts&16: fields += ", org" + + if date: + datecond = "rdate <= '{}' AND (end_date IS NULL OR end_date >= '{}')".format(date, date) + else: + datecond = "end_date IS NULL" + date = time.strftime("%Y-%m-%d", (time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime())) + emcnd = "email = '{}'".format(email) + pgrec = pgget("ruser", fields, "{} AND {}".format(emcnd, datecond), PgLOG.LGEREX) + if not pgrec: # missing user record add one in + PgLOG.pglog("{}: email not in ruser for {}".format(email, date), PgLOG.LOGWRN) + # check again if a user is on file with different date range + pgrec = pgget("ruser", fields, emcnd, PgLOG.LGEREX) + if not pgrec and pgget("dssdb.user", '', emcnd): + fields = "lstname, fstname" + if opts&1: fields += ", email" + if opts&2: fields += ", org_type" + if opts&4: fields += ", country" + if opts&8: fields += ", email valid_email" + if opts&16: fields += ", org_name org" + pgrec = pgget("dssdb.user", fields, emcnd, PgLOG.LGEREX) + + if pgrec and pgrec['lstname']: + pgrec['name'] = (pgrec['fstname'].capitalize() + ' ') if pgrec['fstname'] else '' + pgrec['name'] += pgrec['lstname'].capitalize() + else: + if not pgrec: pgrec = {} + pgrec['name'] = email.split('@')[0] + if opts&1: pgrec['email'] = email + + return pgrec + +# +# cache a customized email for sending it later +# +def cache_customized_email(table, field, condition, emlmsg, logact = 0): + + pgrec = {field: emlmsg} + if pgupdt(table, pgrec, condition, logact|PgLOG.ERRLOG): + if logact: PgLOG.pglog("Email cached to '{}.{}' for {}".format(table, field, condition), logact&(~PgLOG.EXITLG)) + return PgLOG.SUCCESS + else: + msg = "cache email to '{}.{}' for {}".format(table, field, condition) + PgLOG.pglog("Error msg, try to send directly now", logact|PgLOG.ERRLOG) + return PgLOG.send_customized_email(msg, emlmsg, logact) + +# +# otype: user organization type +# email: user email address) +# +# return: orgonizaion type like DSS, NCAR, UNIV... +# +def get_org_type(otype, email): + + if not otype: otype = "OTHER" + if email: + ms = re.search(r'(@|\.)ucar\.edu$', email) + if ms: + mc = ms.group(1) + if otype == 'UCAR' or otype == 'OTHER': otype = 'NCAR' + if otype == 'NCAR' and mc == '@': + ms = re.match(r'^(.+)@', email) + if ms and pgget("dssgrp", "", "logname = '{}'".format(ms.group(1))): otype = 'DSS' + else: + ms = re.search(r'\.(mil|org|gov|edu|com|net)(\.\w\w|$)', email) + if ms: + otype = ms.group(1).upper() + if otype == 'EDU': otype = "UNIV" + + return otype + +# +# join values and handle the null values +# +def join_values(vstr, vals): + + if vstr: + vstr += "\n" + elif vstr is None: + vstr = '' + + return "{}Value{}({})".format(vstr, ('s' if len(vals) > 1 else ''), ', '.join(map(str, vals))) + +# +# check table hostname to find the system down times. Cache the result for 10 minutes +# +def get_system_downs(hostname, logact = 0): + + curtime = int(time.time()) + newhost = 0 + + if hostname not in SYSDOWN: + SYSDOWN[hostname] = {} + newhost = 1 + if newhost or (curtime - SYSDOWN[hostname]['chktime']) > 600: + SYSDOWN[hostname]['chktime'] = curtime + SYSDOWN[hostname]['start'] = 0 + SYSDOWN[hostname]['end'] = 0 + SYSDOWN[hostname]['active'] = 1 + SYSDOWN[hostname]['path'] = None + + pgrec = pgget('hostname', 'service, domain, downstart, downend', + "hostname = '{}'".format(hostname), logact) + if pgrec: + if pgrec['service'] == 'N': + SYSDOWN[hostname]['start'] = curtime + SYSDOWN[hostname]['active'] = 0 + else: + start = int(datetime.timestamp(pgrec['downstart'])) if pgrec['downstart'] else 0 + end = int(datetime.timestamp(pgrec['downend'])) if pgrec['downend'] else 0 + if start > 0 and (end == 0 or end > curtime): + SYSDOWN[hostname]['start'] = start + SYSDOWN[hostname]['end'] = end + if pgrec['service'] == 'S' and pgrec['domain'] and re.match(r'^/', pgrec['domain']): + SYSDOWN[hostname]['path'] = pgrec['domain'] + + SYSDOWN[hostname]['curtime'] = curtime + + return SYSDOWN[hostname] + +# +# return seconds for how long the system will continue to be down +# +def system_down_time(hostname, offset, logact = 0): + + down = get_system_downs(hostname, logact) + if down['start'] and down['curtime'] >= (down['start'] - offset): + if not down['end']: + if PgLOG.PGLOG['PGBATCH'] == PgLOG.PGLOG['PBSNAME']: + return PgLOG.PGLOG['PBSTIME'] + elif down['curtime'] <= down['end']: + return (down['end'] - down['curtime']) + + return 0 # the system is not down + +# +# return string message if the system is down +# +def system_down_message(hostname, path, offset, logact = 0): + + down = get_system_downs(hostname, logact) + msg = None + if down['start'] and down['curtime'] >= (down['start'] - offset): + match = match_down_path(path, down['path']) + if match: + msg = "{}{}:".format(hostname, ('-' + path) if match > 0 else '') + if not down['active']: + msg += " Not in Service" + else: + msg += " Planned down, started at " + PgLOG.current_datetime(down['start']) + if not down['end']: + msg += " And no end time specified" + elif down['curtime'] <= down['end']: + msg = " And will end by " + PgLOG.current_datetime(down['end']) + + return msg + +# +# return 1 if given path match daemon paths, 0 if not; -1 if cannot compare +# +def match_down_path(path, dpaths): + + if not (path and dpaths): return -1 + + paths = re.split(':', dpaths) + + for p in paths: + if re.match(r'^{}'.format(p), path): return 1 + + return 0 + +# validate is login user is in DECS group +# check all node if skpdsg is false, otherwise check non-DSG nodes +def validate_decs_group(cmdname, logname, skpdsg): + + if skpdsg and PgLOG.PGLOG['DSGHOSTS'] and re.search(r'(^|:){}'.format(PgLOG.PGLOG['HOSTNAME']), PgLOG.PGLOG['DSGHOSTS']): return + if not logname: lgname = PgLOG.PGLOG['CURUID'] + + if not pgget("dssgrp", '', "logname = '{}'".format(logname), PgLOG.LGEREX): + PgLOG.pglog("{}: Must be in DECS Group to run '{}' on {}".format(logname, cmdname, PgLOG.PGLOG['HOSTNAME']), PgLOG.LGEREX) + +# +# add an allusage record into yearly table; create a new yearly table if it does not exist +# year -- year to identify the yearly table, evaluated if missing +# records -- hash to hold one or multiple records. +# Dict keys: email -- user email address, +# org_type -- organization type +# country -- country code +# dsid -- dataset ID +# date -- date data accessed +# time -- time data accessed +# quarter -- quarter of the year data accessed +# size -- bytes of data accessed +# method -- delivery methods: MSS,Web,Ftp,Tape,Cd,Disk,Paper,cArt,Micro +# source -- usage source flag: W - wusage, O - ordusage +# midx -- refer to mbr2loc.midx if not 0 +# ip -- user IP address +# region -- user region name; for example, Colorado +# +# isarray -- if true, mutiple records provided via arrays for each hash key +# docheck -- if 1, check and add only if record is not on file +# docheck -- if 2, check and add if record is not on file, and update if exists +# docheck -- if 4, check and add if record is not on file, and update if exists, +# and also checking NULL email value too +# +def add_yearly_allusage(year, records, isarray = 0, docheck = 0): + + acnt = 0 + if not year: + ms = re.match(r'^(\d\d\d\d)', str(records['date'][0] if isarray else records['date'])) + if ms: year = ms.group(1) + tname = "allusage_{}".format(year) + if isarray: + cnt = len(records['email']) + if 'quarter' not in records: records['quarter'] = [0]*cnt + for i in range(cnt): + if not records['quarter'][i]: + ms = re.search(r'-(\d+)-', str(records['date'][i])) + if ms: records['quarter'][i] = int((int(ms.group(1))-1)/3)+1 + if docheck: + for i in range(cnt): + record = {} + for key in records: + record[key] = records[key][i] + cnd = "email = '{}' AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['email'], record['dsid'], record['method'], record['date'], record['time']) + pgrec = pgget(tname, 'aidx', cnd, PgLOG.LOGERR|PgLOG.ADDTBL) + if docheck == 4 and not pgrec: + cnd = "email IS NULL AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['dsid'], record['method'], record['date'], record['time']) + pgrec = pgget(tname, 'aidx', cnd, PgLOG.LOGERR|PgLOG.ADDTBL) + if pgrec: + if docheck > 1: acnt += pgupdt(tname, record, "aidx = {}".format(pgrec['aidx']), PgLOG.LGEREX) + else: + acnt += pgadd(tname, record, PgLOG.LGEREX|PgLOG.ADDTBL) + else: + acnt = pgmadd(tname, records, PgLOG.LGEREX|PgLOG.ADDTBL) + else: + record = records + if not ('quarter' in record and record['quarter']): + ms = re.search(r'-(\d+)-', str(record['date'])) + if ms: record['quarter'] = int((int(ms.group(1))-1)/3)+1 + if docheck: + cnd = "email = '{}' AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['email'], record['dsid'], record['method'], record['date'], record['time']) + pgrec = pgget(tname, 'aidx', cnd, PgLOG.LOGERR|PgLOG.ADDTBL) + if docheck == 4 and not pgrec: + cnd = "email IS NULL AND dsid = '{}' AND method = '{}' AND date = '{}' AND time = '{}'".format( + record['dsid'], record['method'], record['date'], record['time']) + pgrec = pgget(tname, 'aidx', cnd, PgLOG.LOGERR|PgLOG.ADDTBL) + if pgrec: + if docheck > 1: acnt = pgupdt(tname, record, "aidx = {}".format(pgrec['aidx']), PgLOG.LGEREX) + return acnt + acnt = pgadd(tname, record, PgLOG.LGEREX|PgLOG.ADDTBL) + + return acnt + +# +# add a wusage record into yearly table; create a new yearly table if it does not exist +# year -- year to identify the yearly table, evaluated if missing +# records -- hash to hold one or multiple records. +# Dict keys: wid - reference to wfile.wid +# wuid_read - reference to wuser.wuid, 0 if missing email +# dsid - reference to dataset.dsid at the time of read +# date_read - date file read +# time_read - time file read +# quarter - quarter of the year data accessed +# size_read - bytes of data read +# method - download methods: WEB, CURL, MGET, FTP and MGET +# locflag - location flag: Glade or Object +# ip - IP address +# +# isarray -- if true, mutiple records provided via arrays for each hash key +# +def add_yearly_wusage(year, records, isarray = 0): + + acnt = 0 + if not year: + ms = re.match(r'^(\d\d\d\d)', str(records['date_read'][0] if isarray else records['date_read'])) + if ms: year = ms.group(1) + tname = "wusage_{}".format(year) + if isarray: + if 'quarter' not in records: + cnt = len(records['wid']) + records['quarter'] = [0]*cnt + for i in range(cnt): + ms = re.search(r'-(\d+)-', str(records['date_read'][i])) + if ms: records['quarter'][i] = (int((int(ms.group(1))-1)/3)+1) + acnt = pgmadd(tname, records, PgLOG.LGEREX|PgLOG.ADDTBL) + else: + record = records + if 'quarter' not in record: + ms = re.search(r'-(\d+)-', str(record['date_read'])) + if ms: record['quarter'] = (int((int(ms.group(1))-1)/3)+1) + acnt = pgadd(tname, record, PgLOG.LGEREX|PgLOG.ADDTBL) + + return acnt + +# +# double quote a array of single or sign delimited strings +# +def pgnames(ary, sign = None, joinstr = None): + + pgary = [] + for a in ary: + pgary.append(pgname(a, sign)) + + if joinstr == None: + return pgary + else: + return joinstr.join(pgary) + +# +# double quote a single or sign delimited string +# +def pgname(str, sign = None): + + if sign: + nstr = '' + names = str.split(sign[0]) + for name in names: + if nstr: nstr += sign[0] + nstr += pgname(name, sign[1:]) + else: + nstr = str.strip() + if nstr and nstr.find('"') < 0: + if not re.match(r'^[a-z_][a-z0-9_]*$', nstr) or nstr in PGRES: + nstr = '"{}"'.format(nstr) + + return nstr diff --git a/src/rda_python_common/PgFile.py b/src/rda_python_common/PgFile.py new file mode 100644 index 0000000..7053ed9 --- /dev/null +++ b/src/rda_python_common/PgFile.py @@ -0,0 +1,3014 @@ +# +############################################################################### +# +# Title : PgFile.py +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/05/2020 +# Purpose : python library module to copy, move and delete data files locally +# and remotely +# +# Work File : $DSSHOME/lib/python/PgFile.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### +# +import sys +import os +from os import path as op +import pwd +import grp +import stat +import re +import time +import glob +import json +import PgLOG +import PgUtil +import PgSIG +import PgDBI + +CMDBTH = (0x0033) # return both stdout and stderr, 16 + 32 + 2 + 1 +RETBTH = (0x0030) # return both stdout and stderr, 16 + 32 +CMDRET = (0x0110) # return stdout and save error, 16 + 256 +CMDERR = (0x0101) # display command and save error, 1 + 256 +CMDGLB = (0x0313) # return stdout and save error for globus, 1+2+16+256+512 + +PGCMPS = { +# extension Compress Uncompress ArchiveFormat + 'Z' : ['compress -f', 'uncompress -f', 'Z'], + 'zip' : ['zip', 'unzip', 'ZIP'], + 'gz' : ['gzip', 'gunzip', 'GZ'], + 'xz' : ['xz', 'unxz', 'XZ'], + 'bz2' : ['bzip2', 'bunzip2', 'BZ2'] +} +CMPSTR = '|'.join(PGCMPS) + +PGTARS = { +# extension Packing Unpacking ArchiveFormat + 'tar' : ['tar -cvf', 'tar -xvf', 'TAR'], + 'tar.Z' : ['tar -Zcvf', 'tar -xvf', 'TAR.Z'], + 'zip' : ['zip -v', 'unzip -v', 'ZIP'], + 'tgz' : ['tar -zcvf', 'tar -xvf', 'TGZ'], + 'tar.gz' : ['tar -zcvf', 'tar -xvf', 'TAR.GZ'], + 'txz' : ['tar -cvJf', 'tar -xvf', 'TXZ'], + 'tar.xz' : ['tar -cvJf', 'tar -xvf', 'TAR.XZ'], + 'tbz2' : ['tar -cvjf', 'tar -xvf', 'TBZ2'], + 'tar.bz2' : ['tar -cvjf', 'tar -xvf', 'TAR.BZ2'] +} + +TARSTR = '|'.join(PGTARS) +DELDIRS = {} +TASKIDS = {} # cache unfinished +MD5CMD = 'md5sum' +SHA512CMD = 'sha512sum' +LHOST = "localhost" +OHOST = PgLOG.PGLOG['OBJCTSTR'] +BHOST = PgLOG.PGLOG['BACKUPNM'] +DHOST = PgLOG.PGLOG['DRDATANM'] +OBJCTCMD = "isd_s3_cli" +BACKCMD = "dsglobus" + +HLIMIT = 0 # HTAR file count limit +BLIMIT = 2 # minimum back tar file size in DB +DIRLVLS = 0 + +# record how many errors happen for working with HPSS, local or remote machines +ECNTS = {'D' : 0, 'H' : 0, 'L' : 0, 'R' : 0, 'O' : 0, 'B' : 0} +# up limits for how many continuing errors allowed +ELMTS = {'D' : 20, 'H' : 20, 'L' : 20, 'R' : 20, 'O' : 10, 'B' : 10} + +# down storage hostnames & paths +DHOSTS = { + 'G' : PgLOG.PGLOG['GPFSNAME'], + 'O' : OHOST, + 'B' : BHOST, + 'D' : DHOST +} + +DPATHS = { + 'G' : PgLOG.PGLOG['DSSDATA'], + 'O' : PgLOG.PGLOG['OBJCTBKT'], + 'B' : '/' + PgLOG.PGLOG['DEFDSID'], # backup globus endpoint + 'D' : '/' + PgLOG.PGLOG['DEFDSID'] # disaster recovery globus endpoint +} + +QSTATS = { + 'A' : 'ACTIVE', + 'I' : 'INACTIVE', + 'S' : 'SUCCEEDED', + 'F' : 'FAILED', +} + +QPOINTS = { + 'L' : 'rda-glade', + 'B' : 'rda-quasar', + 'D' : 'rda-quasar-drdata' +} + +QHOSTS = { + 'rda-glade' : LHOST, + 'rda-quasar' : BHOST, + 'rda-quasar-drdata' : DHOST +} + +ENDPOINTS = { + 'rda-glade' : "NCAR RDA GLADE", + 'rda-quasar' : "NCAR RDA Quasar", + 'rda-quasar-drdata' : "NCAR RDA Quasar DRDATA" +} + +# +# reset the up limit for a specified error type +# +def reset_error_limit(etype, lmt): + + ELMTS[etype] = lmt + +# +# wrapping PgLOG.pglog() to show error and no fatal exit at the first call for retry +# +def errlog(msg, etype, retry = 0, logact = 0): + + bckgrnd = PgLOG.PGLOG['BCKGRND'] + logact |= PgLOG.ERRLOG + if not retry: + if msg and not re.search(r'\n$', msg): msg += "\n" + msg += "[The same execution will be retried in {} Seconds]".format(PgSIG.PGSIG['ETIME']) + PgLOG.PGLOG['BCKGRND'] = 1 + logact &= ~(PgLOG.EMEROL|PgLOG.EXITLG) + elif ELMTS[etype]: + ECNTS[etype] += 1 + if ECNTS[etype] >= ELMTS[etype]: + logact |= PgLOG.EXITLG + ECNTS[etype] = 0 + + if PgLOG.PGLOG['DSCHECK'] and logact&PgLOG.EXITLG: PgDBI.record_dscheck_error(msg) + PgLOG.pglog(msg, logact) + PgLOG.PGLOG['BCKGRND'] = bckgrnd + if not retry: time.sleep(PgSIG.PGSIG['ETIME']) + + return PgLOG.FAILURE + +# +# Copy a file from one host (including local host) to an another host (including local host) +# excluding copy file from remote host to remote host copying in background is permitted +# +# tofile - target file name +# fromfile - source file name +# tohost - target host name, default to LHOST +# fromhost - original host name, default to LHOST +# +# Return 1 if successful 0 if failed with error message generated in PgLOG.pgsystem() cached in PgLOG.PGLOG['SYSERR'] +# +def copy_rda_file(tofile, fromfile, tohost = LHOST, fromhost = LHOST, logact = 0): + + thost = strip_host_name(tohost) + fhost = strip_host_name(fromhost) + + if PgUtil.pgcmp(thost, fhost, 1) == 0: + if PgUtil.pgcmp(thost, LHOST, 1) == 0: + return local_copy_local(tofile, fromfile, logact) + elif PgUtil.pgcmp(fhost, LHOST, 1) == 0: + if PgUtil.pgcmp(thost, OHOST, 1) == 0: + return local_copy_object(tofile, fromfile, None, None, logact) + elif PgUtil.pgcmp(thost, BHOST, 1) == 0: + return local_copy_backup(tofile, fromfile, QPOINTS['B'], logact) + elif PgUtil.pgcmp(thost, DHOST, 1) == 0: + return local_copy_backup(tofile, fromfile, QPOINTS['D'], logact) + else: + return local_copy_remote(tofile, fromfile, tohost, logact) + elif PgUtil.pgcmp(thost, LHOST, 1) == 0: + if PgUtil.pgcmp(fhost, OHOST, 1) == 0: + return object_copy_local(tofile, fromfile, None, logact) + elif PgUtil.pgcmp(fhost, BHOST, 1) == 0: + return backup_copy_local(tofile, fromfile, QPOINTS['B'], logact) + elif PgUtil.pgcmp(fhost, DHOST, 1) == 0: + return backup_copy_local(tofile, fromfile, QPOINTS['D'], logact) + else: + return remote_copy_local(tofile, fromfile, fromhost) + + return errlog("{}-{}->{}-{}: Cannot copy file".format(fhost, fromfile, thost, tofile), 'O', 1, PgLOG.LGEREX) + +# +# Copy a file locally +# +# tofile - target file name +# fromfile - source file name +# +def local_copy_local(tofile, fromfile, logact = 0): + + finfo = check_local_file(fromfile, 0, logact) + if not finfo: + if finfo != None: return PgLOG.FAILURE + return lmsg(fromfile, "{} to copy to {}".format(PgLOG.PGLOG['MISSFILE'], tofile), logact) + + target = tofile + ms = re.match(r'^(.+)/$', tofile) + if ms: + dir = ms.group(1) + tofile += op.basename(fromfile) + else: + dir = get_local_dirname(tofile) + + if not make_local_directory(dir, logact): return PgLOG.FAILURE + + cmd = "cp -{} {} {}".format(('f' if finfo['isfile'] else "rf"), fromfile, target) + reset = loop = 0 + while((loop-reset) < 2): + info = None + PgLOG.PGLOG['ERR2STD'] = ['are the same file'] + ret = PgLOG.pgsystem(cmd, logact, CMDERR) + PgLOG.PGLOG['ERR2STD'] = [] + if ret: + info = check_local_file(tofile, 143, logact) # 1+2+4+8+128 + if info: + if not info['isfile']: + set_local_mode(tofile, 0, 0, info['mode'], info['logname'], logact) + return PgLOG.SUCCESS + elif info['data_size'] == finfo['data_size']: + set_local_mode(tofile, 1, 0, info['mode'], info['logname'], logact) + return PgLOG.SUCCESS + elif info != None: + break + + if PgLOG.PGLOG['SYSERR']: + errmsg = PgLOG.PGLOG['SYSERR'] + else: + errmsg = "Error of '{}': Miss target file {}".format(cmd, tofile) + errlog(errmsg, 'L', (loop - reset), logact) + if loop == 0: reset = reset_local_info(tofile, info, logact) + loop += 1 + + return PgLOG.FAILURE + +# +# Copy a local file to a remote host +# +# tofile - target file name +# fromfile - source file name +# host - remote host name +# +def local_copy_remote(tofile, fromfile, host, logact = 0): + + finfo = check_local_file(fromfile, 0, logact) + if not finfo: + if finfo != None: return PgLOG.FAILURE + return lmsg(fromfile, "{} to copy to {}-{}".format(PgLOG.PGLOG['MISSFILE'], host, tofile), logact) + + target = tofile + ms = re.match(r'^(.+)/$', tofile) + if ms: + dir = ms.group(1) + tofile += op.basename(fromfile) + else: + dir = op.dirname(tofile) + + if not make_remote_directory(dir, host, logact): return PgLOG.FAILURE + + cmd = PgLOG.get_sync_command(host) + cmd += " {} {}".format(fromfile, target) + for loop in range(2): + if PgLOG.pgsystem(cmd, logact, CMDERR): + info = check_remote_file(tofile, host, 0, logact) + if info: + if not finfo['isfile']: + set_remote_mode(tofile, 0, host, PgLOG.PGLOG['EXECMODE']) + return PgLOG.SUCCESS + elif info['data_size'] == finfo['data_size']: + set_remote_mode(tofile, 1, host, PgLOG.PGLOG['FILEMODE']) + return PgLOG.SUCCESS + elif info != None: + break + + errlog(PgLOG.PGLOG['SYSERR'], 'R', loop, logact) + + return PgLOG.FAILURE + +# +# Copy a local file to object store +# +# tofile - target file name +# fromfile - source file name +# bucket - bucket name on Object store +# meta - reference to metadata hash +# +def local_copy_object(tofile, fromfile, bucket = None, meta = None, logact = 0): + + if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT'] + if meta is None: meta = {} + if 'user' not in meta: meta['user'] = PgLOG.PGLOG['CURUID'] + if 'group' not in meta: meta['group'] = PgLOG.PGLOG['RDAGRP'] + uinfo = json.dumps(meta) + + finfo = check_local_file(fromfile, 0, logact) + if not finfo: + if finfo != None: return PgLOG.FAILURE + return lmsg(fromfile, "{} to copy to {}-{}".format(PgLOG.PGLOG['MISSFILE'], OHOST, tofile), logact) + + if not logact&PgLOG.OVRIDE: + tinfo = check_object_file(tofile, bucket, 0, logact) + if tinfo and tinfo['data_size'] > 0: + return PgLOG.pglog("{}-{}-{}: file exists already".format(OHOST, bucket, tofile), logact) + + cmd = "{} ul -lf {} -b {} -k {} -md '{}'".format(OBJCTCMD, fromfile, bucket, tofile, uinfo) + for loop in range(2): + buf = PgLOG.pgsystem(cmd, logact, CMDBTH) + tinfo = check_object_file(tofile, bucket, 0, logact) + if tinfo: + if tinfo['data_size'] == finfo['data_size']: + return PgLOG.SUCCESS + elif tinfo != None: + break + + errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact) + + return PgLOG.FAILURE + +# +# Copy multiple files from a Globus endpoint to another +# tofiles - target file name list, echo name leading with /dsnnn.n/ on Quasar and +# leading with /data/ or /decsdata/ on local glade disk +# fromfiles - source file name list, the same format as the tofiles +# topoint - target endpoint name, 'rda-glade', 'rda-quasar' or 'rda-quasar-drdata' +# frompoint - source endpoint name, the same choices as the topoint +# +def quasar_multiple_trasnfer(tofiles, fromfiles, topoint, frompoint, logact = 0): + + ret = PgLOG.FAILURE + qstr = '{"action":"transfer","label":"%s","verify_checksum":true,' % ENDPOINTS[topoint] +# qstr = '{"action":"transfer","label":"%s",' % ENDPOINTS[topoint] + qstr += '"source_endpoint":"%s","destination_endpoint":"%s","files":[\n' % (frompoint, topoint) + fcnt = len(fromfiles) + bstr = '' + for i in range(fcnt): + qstr += '%s{"source_file":"%s","destination_file":"%s"}' % (bstr, fromfiles[i], tofiles[i]) + if i == 0: bstr = ',\n' + qstr += ']}' + + task = submit_globus_task(BACKCMD, topoint, logact, qstr) + if task['stat'] == 'S': + ret = PgLOG.SUCCESS + elif task['stat'] == 'A': + TASKIDS["{}-{}".format(topoint, tofiles[0])] = task['id'] + ret = PgLOG.FINISH + + return ret + +# +# Copy a file from a Globus endpoint to another + +# tofile - target file name, leading with /dsnnn.n/ on Quasar and +# leading with /data/ or /decsdata/ on local glade disk +# fromfile - source file, the same format as the tofile +# topoint - target endpoint name, 'rda-glade', 'rda-quasar' or 'rda-quasar-drdata' +# frompoint - source endpoint name, the same choices as the topoint +# +def endpoint_copy_endpoint(tofile, fromfile, topoint, frompoint, logact = 0): + + ret = PgLOG.FAILURE + finfo = check_globus_file(fromfile, frompoint, 0, logact) + if not finfo: + if finfo != None: return ret + return lmsg(fromfile, "{} to copy {} file to {}-{}".format(PgLOG.PGLOG['MISSFILE'], frompoint, topoint, tofile), logact) + + if not logact&PgLOG.OVRIDE: + tinfo = check_globus_file(tofile, topoint, 0, logact) + if tinfo and tinfo['data_size'] > 0: + return PgLOG.pglog("{}-{}: file exists already".format(topoint, tofile), logact) + + cmd = "{} -t -vc -se {} -de {} -sf {} -df {}".format(BACKCMD, frompoint, topoint, fromfile, tofile) + task = submit_globus_task(cmd, topoint, logact) + if task['stat'] == 'S': + ret = PgLOG.SUCCESS + elif task['stat'] == 'A': + TASKIDS["{}-{}".format(topoint, tofile)] = task['id'] + ret = PgLOG.FINISH + + return ret + +# +# submit a globus task and return a task id +# +def submit_globus_task(cmd, endpoint, logact = 0, qstr = None): + + task = {'id' : None, 'stat' : 'U'} + loop = reset = 0 + while (loop-reset) < 2: + buf = PgLOG.pgsystem(cmd, logact, CMDGLB, qstr) + syserr = PgLOG.PGLOG['SYSERR'] + if buf and buf.find('a task has been created') > -1: + ms = re.search(r'Task ID:\s+(\S+)', buf) + if ms: + task['id'] = ms.group(1) + lp = 0 + while lp < 2: + task['stat'] = check_globus_status(task['id'], endpoint, logact) + if task['stat'] == 'S': break + time.sleep(PgSIG.PGSIG['ETIME']) + lp += 1 + if task['stat'] == 'S' or task['stat'] == 'A': break + if task['stat'] == 'F' and not syserr: break + + errmsg = "Error Execute: " + cmd + if qstr: errmsg += " with stdin:\n" + qstr + if syserr: + errmsg += "\n" + syserr + (hstat, msg) = host_down_status('', QHOSTS[endpoint], 1, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'B', (loop - reset), logact) + if loop == 0 and syserr and syserr.find('This user has too many pending jobs') > -1: reset = 1 + loop += 1 + + if task['stat'] == 'S' or task['stat'] == 'A': ECNTS['B'] = 0 # reset error count + return task + +# +# check Globus transfer status for given taskid. Cancel the task +# if PgLOG.NOWAIT presents and Details is neither OK nor Queued +# +def check_globus_status(taskid, endpoint = None, logact = 0): + + ret = 'U' + if not taskid: return ret + if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP'] + mp = r'Status:\s+({})'.format('|'.join(QSTATS.values())) + cmd = "{} -gt --task-id {}".format(BACKCMD, taskid) + astats = ['OK', 'Queued'] + + for loop in range(2): + buf = PgLOG.pgsystem(cmd, logact, CMDRET) + if buf: + ms = re.search(mp, buf) + if ms: + ret = ms.group(1)[0] + if ret == 'A': + ms = re.search(r'Details:\s+(\S+)', buf) + if ms: + detail = ms.group(1) + if detail not in astats: + if logact&PgLOG.NOWAIT: + errmsg = "{}: Cancel Task due to {}:\n{}".format(taskid, detail, buf) + errlog(errmsg, 'B', 1, logact) + ccmd = "{} -ct --task-id {}".format(BACKCMD, taskid) + PgLOG.pgsystem(ccmd, logact, 7) + else: + time.sleep(PgSIG.PGSIG['ETIME']) + continue + break + + errmsg = "Error Execute: " + cmd + if PgLOG.PGLOG['SYSERR']: + errmsg = "\n" + PgLOG.PGLOG['SYSERR'] + (hstat, msg) = host_down_status('', QHOSTS[endpoint], 1, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'B', loop, logact) + + if ret == 'S' or ret == 'A': ECNTS['B'] = 0 # reset error count + return ret + +# +# return SUCCESS if Globus transfer is done; FAILURE otherwise +# +def check_globus_finished(tofile, topoint, logact = 0): + + ret = PgLOG.SUCCESS + ckey = "{}-{}".format(topoint, tofile) + if ckey in TASKIDS: + taskid = TASKIDS[ckey] + else: + errlog(ckey + ": Miss Task ID to check Status", 'B', 1, logact) + return PgLOG.FAILURE + + lp = 0 + if logact&PgLOG.NOWAIT: + act = logact&(~PgLOG.NOWAIT) + lps = 2 + else: + act = logact + lps = 0 + + while True: + stat = check_globus_status(taskid, topoint, act) + if stat == 'A': + if lps: + lp += 1 + if lp > lps: act = logact + time.sleep(PgSIG.PGSIG['ETIME']) + else: + if stat == 'S': + del TASKIDS[ckey] + else: + status = QSTATS[stat] if stat in QSTATS else 'UNKNOWN' + errlog("{}: Status '{}' for Task {}".format(ckey, status, taskid), 'B', 1, logact) + ret = PgLOG.FAILURE + break + + return ret + +# +# Copy a local file to Quasar backup tape system +# +# tofile - target file name, leading with /dsnnn.n/ +# fromfile - source file name, leading with /data/ or /decsdata/ +# endpoint - endpoint name on Quasar Backup Server +# +def local_copy_backup(tofile, fromfile, endpoint = None, logact = 0): + + if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP'] + return endpoint_copy_endpoint(tofile, fromfile, endpoint, 'rda-glade', logact) + +# +# Copy a Quasar backup file to local Globus endpoint +# +# tofile - target file name, leading with /data/ or /decsdata/ +# fromfile - source file name, leading with /dsnnn.n/ +# endpoint - endpoint name on Quasar Backup Server +# +def backup_copy_local(tofile, fromfile, endpoint = None, logact = 0): + + if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP'] + return endpoint_copy_endpoint(tofile, fromfile, 'rda-glade', endpoint, logact) + +# +# Copy a remote file to local +# +# tofile - target file name +# fromfile - source file name +# host - remote host name +# +def remote_copy_local(tofile, fromfile, host, logact = 0): + + cmd = PgLOG.get_sync_command(host) + finfo = check_remote_file(fromfile, host, 0, logact) + if not finfo: + if finfo != None: return PgLOG.FAILURE + return errlog("{}-{}: {} to copy to {}".format(host, fromfile, PgLOG.PGLOG['MISSFILE'], tofile), 'R', 1, logact) + + target = tofile + ms = re.match(r'^(.+)/$', tofile) + if ms: + dir = ms.group(1) + tofile += op.basename(fromfile) + else: + dir = get_local_dirname(tofile) + + if not make_local_directory(dir, logact): return PgLOG.FAILURE + + cmd += " -g {} {}".format(fromfile, target) + loop = reset = 0 + while (loop-reset) < 2: + if PgLOG.pgsystem(cmd, logact, CMDERR): + info = check_local_file(tofile, 143, logact) # 1+2+4+8+128 + if info: + if not info['isfile']: + set_local_mode(tofile, 0, PgLOG.PGLOG['EXECMODE']) + return PgLOG.SUCCESS + elif info['data_size'] == finfo['data_size']: + set_local_mode(tofile, 1, PgLOG.PGLOG['FILEMODE']) + return PgLOG.SUCCESS + elif info != None: + break + + errlog(PgLOG.PGLOG['SYSERR'], 'L', (loop - reset), logact) + if loop == 0: reset = reset_local_info(tofile, info, logact) + loop += 1 + + return PgLOG.FAILURE + +# +# Copy a object file to local +# +# tofile - target file name +# fromfile - source file name +# bucket - bucket name on Object store +# +def object_copy_local(tofile, fromfile, bucket = None, logact = 0): + + ret = PgLOG.FAILURE + if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT'] + finfo = check_object_file(fromfile, bucket, 0, logact) + if not finfo: + if finfo != None: return ret + return lmsg(fromfile, "{}-{} to copy to {}".format(OHOST, PgLOG.PGLOG['MISSFILE'], tofile), logact) + + cmd = "{} go -k {} -b {}".format(OBJCTCMD, fromfile, bucket) + fromname = op.basename(fromfile) + toname = op.basename(tofile) + if toname == tofile: + dir = odir = None + else: + dir = op.dirname(tofile) + odir = change_local_directory(dir, logact) + loop = reset = 0 + while (loop-reset) < 2: + buf = PgLOG.pgsystem(cmd, logact, CMDBTH) + info = check_local_file(fromname, 143, logact) # 1+2+4+8+128 + if info: + if info['data_size'] == finfo['data_size']: + set_local_mode(fromfile, info['isfile'], 0, info['mode'], info['logname'], logact) + if toname == fromname or move_local_file(toname, fromname, logact): + ret = PgLOG.SUCCESS + break + + + elif info != None: + break + + errlog("Error Execute: {}\n{}".format(cmd, buf), 'L', (loop - reset), logact) + if loop == 0: reset = reset_local_info(tofile, info, logact) + loop += 1 + if odir and odir != dir: + change_local_directory(odir, logact) + + return ret + +# +# Copy a remote file to object +# +# tofile - target object file name +# fromfile - source remote file name +# host - remote host name +# bucket - bucket name on Object store +# meta - reference to metadata hash +# +def remote_copy_object(tofile, fromfile, host, bucket = None, meta = None, logact = 0): + + if is_local_host(host): return local_copy_object(tofile, fromfile, bucket, meta, logact) + + locfile = "{}/{}".format(PgLOG.PGLOG['TMPPATH'], op.basename(tofile)) + ret = remote_copy_local(locfile, fromfile, host, logact) + if ret: + ret = local_copy_object(tofile, locfile, bucket, meta, logact) + delete_local_file(locfile, logact) + + return ret + +# +# Copy an object file to remote +# +# tofile - target remote file name +# fromfile - source object file name +# host - remote host name +# bucket - bucket name on Object store +# meta - reference to metadata hash +# +def object_copy_remote(tofile, fromfile, host, bucket = None, logact = 0): + + if is_local_host(host): return object_copy_local(tofile, fromfile, bucket, logact) + + locfile = "{}/{}".format(PgLOG.PGLOG['TMPPATH'], op.basename(tofile)) + ret = object_copy_local(locfile, fromfile, bucket, logact) + if ret: + ret = local_copy_remote(fromfile, locfile, host, logact) + delete_local_file(locfile, logact) + + return ret + +# +# Delete a file/directory on a given host name (including local host) no background process for deleting +# +# file - file name to be deleted +# host - host name the file on, default to LHOST +# +# Return 1 if successful 0 if failed with error message generated in PgLOG.pgsystem() cached in PgLOG.PGLOG['SYSERR'] +# +def delete_rda_file(file, host, logact = 0): + + shost = strip_host_name(host) + if PgUtil.pgcmp(shost, LHOST, 1) == 0: + return delete_local_file(file, logact) + elif PgUtil.pgcmp(shost, OHOST, 1) == 0: + return delete_object_file(file, None, logact) + else: + return delete_remote_file(file, host, logact) + +# +# Delete a local file/irectory +# +def delete_local_file(file, logact = 0): + + info = check_local_file(file, 0, logact) + if not info: return PgLOG.FAILURE + cmd = "rm -rf " + cmd += file + loop = reset = 0 + while (loop-reset) < 2: + if PgLOG.pgsystem(cmd, logact, CMDERR): + info = check_local_file(file, 14, logact) + if info is None: + if DIRLVLS: record_delete_directory(op.dirname(file), LHOST) + return PgLOG.SUCCESS + elif not info: + break # error checking file + + errlog(PgLOG.PGLOG['SYSERR'], 'L', (loop - reset), logact) + if loop == 0: reset = reset_local_info(file, info, logact) + loop += 1 + + return PgLOG.FAILURE + +# +# Delete file/directory on a remote host +# +def delete_remote_file(file, host, logact = 0): + + if not check_remote_file(file, host, logact): return PgLOG.FAILURE + + cmd = PgLOG.get_sync_command(host) + + for loop in range(2): + if PgLOG.pgsystem("{} -d {}".format(cmd, file), logact, CMDERR): + if DIRLVLS: record_delete_directory(op.dirname(file), host) + return PgLOG.SUCCESS + + errlog(PgLOG.PGLOG['SYSERR'], 'R', loop, logact) + + return PgLOG.FAILURE + +# +# Delete a file on object store +# +def delete_object_file(file, bucket = None, logact = 0): + + if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT'] + for loop in range(2): + list = object_glob(file, bucket, 0, logact) + if not list: return PgLOG.FAILURE + errmsg = None + for key in list: + cmd = "{} dl {} -b {}".format(OBJCTCMD, key, bucket) + if not PgLOG.pgsystem(cmd, logact, CMDERR): + errmsg = PgLOG.PGLOG['SYSERR'] + break + + list = object_glob(file, bucket, 0, logact) + if not list: return PgLOG.SUCCESS + if errmsg: errlog(errmsg, 'O', loop, logact) + + return PgLOG.FAILURE + +# +# Delete a backup file on Quasar Server +# +def delete_backup_file(file, endpoint = None, logact = 0): + + if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP'] + info = check_backup_file(file, endpoint, 0, logact) + if not info: return PgLOG.FAILURE + + cmd = "{} -d -ep {} -tf {}".format(BACKCMD, endpoint, file) + task = submit_globus_task(cmd, endpoint, logact) + if task['stat'] == 'S': + return PgLOG.SUCCESS + elif task['stat'] == 'A': + TASKIDS["{}-{}".format(endpoint, file)] = task['id'] + return PgLOG.FINISH + + return PgLOG.FAILURE + +# +# reset local file/directory information to make them writable for PgLOG.PGLOG['RDAUSER'] +# file - file name (mandatory) +# info - gathered file info with option 14, None means file not exists +# +def reset_local_info(file, info = None, logact = 0): + + ret = 0 + if info: + if info['isfile']: + ret += reset_local_file(file, info, logact) + dir = get_local_dirname(file) + info = check_local_file(dir, 14, logact) + else: + dir = file + else: + dir = get_local_dirname(file) + info = check_local_file(dir, 14, logact) + + if info: ret += reset_local_directory(dir, info, logact) + + return 1 if ret else 0 + +# +# reset local directory group/mode +# +def reset_local_directory(dir, info = None, logact = 0): + + ret = 0 + if not (info and 'mode' in info and 'group' in info and 'logname' in info): + info = check_local_file(dir, 14, logact) + if info: + if info['mode'] and info['mode'] != 0o775: + ret += set_local_mode(dir, 0, 0o775, info['mode'], info['logname'], logact) + if info['group'] and PgLOG.PGLOG['RDAGRP'] != info['group']: + ret += change_local_group(dir, PgLOG.PGLOG['RDAGRP'], info['group'], info['logname'], logact) + + return 1 if ret else 0 + +# +# reset local file group/mode +# +def reset_local_file(file, info = None, logact = 0): + + ret = 0 + if not (info and 'mode' in info and 'group' in info and 'logname' in info): + info = check_local_file(file, 14, logact) + if info: + if info['mode'] != 0o664: + ret += set_local_mode(file, 1, 0o664, info['mode'], info['logname'], logact) + if PgLOG.PGLOG['RDAGRP'] != info['group']: + ret += change_local_group(file, PgLOG.PGLOG['RDAGRP'], info['group'], info['logname'], logact) + + return ret + +# +# Move file locally or remotely on the same host no background process for moving +# +# tofile - target file name +# fromfile - original file name +# host - host name the file is moved on, default to LHOST +# +# Return PgLOG.SUCCESS if successful PgLOG.FAILURE otherwise +# +def move_rda_file(tofile, fromfile, host, logact = 0): + + shost = strip_host_name(host) + if PgUtil.pgcmp(shost, LHOST, 1) == 0: + return move_local_file(tofile, fromfile, logact) + elif PgUtil.pgcmp(shost, OHOST, 1) == 0: + return move_object_file(tofile, fromfile, None, None, logact) + else: + return move_remote_file(tofile, fromfile, host, logact) + +# +# Move a file locally +# +# tofile - target file name +# fromfile - source file name +# +def move_local_file(tofile, fromfile, logact = 0): + + dir = get_local_dirname(tofile) + info = check_local_file(fromfile, 0, logact) + tinfo = check_local_file(tofile, 0, logact) + if not info: + if info != None: return PgLOG.FAILURE + if tinfo: + PgLOG.pglog("{}: Moved to {} already".format(fromfile, tofile), PgLOG.LOGWRN) + return PgLOG.SUCCESS + else: + return errlog("{}: {} to move".format(fromfile, PgLOG.PGLOG['MISSFILE']), 'L', 1, logact) + if tinfo: + if tinfo['data_size'] > 0 and not logact&PgLOG.OVRIDE: + return errlog("{}: File exists, cannot move {} to it".format(tofile, fromfile), 'L', 1, logact) + elif tinfo != None: + return PgLOG.FAILURE + + if not make_local_directory(dir, logact): return PgLOG.FAILURE + + cmd = "mv {} {}".format(fromfile, tofile) + loop = reset = 0 + while (loop-reset) < 2: + if PgLOG.pgsystem(cmd, logact, CMDERR): + if DIRLVLS: record_delete_directory(op.dirname(fromfile), LHOST) + return PgLOG.SUCCESS + + errlog(PgLOG.PGLOG['SYSERR'], 'L', (loop - reset), logact) + if loop == 0: reset = reset_local_info(tofile, info, logact) + loop += 1 + + return PgLOG.FAILURE + +# +# Move a remote file on the same host +# +# tofile - target file name +# fromfile - original file name +# host - remote host name +# locfile - local copy of tofile +# +def move_remote_file(tofile, fromfile, host, logact = 0): + + if is_local_host(host): return move_local_file(tofile, fromfile, logact) + + ret = PgLOG.FAILURE + dir = op.dirname(tofile) + info = check_remote_file(fromfile, host, 0, logact) + tinfo = check_remote_file(tofile, host, 0, logact) + if not info: + if info != None: return PgLOG.FAILURE + if tinfo: + PgLOG.pglog("{}-{}: Moved to {} already".format(host, fromfile, tofile), PgLOG.LOGWRN) + return PgLOG.SUCCESS + else: + return errlog("{}-{}: {} to move".format(host, fromfile, PgLOG.PGLOG['MISSFILE']), 'R', 1, logact) + if tinfo: + if tinfo['data_size'] > 0 and not logact&PgLOG.OVRIDE: + return errlog("{}-{}: File exists, cannot move {} to it".format(host, tofile, fromfile), 'R', 1, logact) + elif tinfo != None: + return PgLOG.FAILURE + + if make_remote_directory(dir, host, logact): + locfile = "{}/{}".format(PgLOG.PGLOG['TMPPATH'], op.basename(tofile)) + if remote_copy_local(locfile, fromfile, host, logact): + ret = local_copy_remote(tofile, locfile, host, logact) + delete_local_file(locfile, logact) + if ret: + ret = delete_remote_file(fromfile, host, logact) + if DIRLVLS: record_delete_directory(op.dirname(fromfile), host) + + return ret + +# +# Move an object file on Object Store +# +# tofile - target file name +# fromfile - original file name +# tobucket - target bucket name +# frombucket - original bucket name +# +def move_object_file(tofile, fromfile, tobucket, frombucket, logact = 0): + + ret = PgLOG.FAILURE + if not tobucket: tobucket = PgLOG.PGLOG['OBJCTBKT'] + if not frombucket: frombucket = tobucket + finfo = check_object_file(fromfile, frombucket, 0, logact) + tinfo = check_object_file(tofile, tobucket, 0, logact) + if not finfo: + if finfo != None: return PgLOG.FAILURE + if tinfo: + PgLOG.pglog("{}-{}: Moved to {}-{} already".format(frombucket, fromfile, tobucket, tofile), PgLOG.LOGWRN) + return PgLOG.SUCCESS + else: + return errlog("{}-{}: {} to move".format(frombucket, fromfile, PgLOG.PGLOG['MISSFILE']), 'R', 1, logact) + if tinfo: + if tinfo['data_size'] > 0 and not logact&PgLOG.OVRIDE: + return errlog("{}-{}: Object File exists, cannot move {}-{} to it".format(tobucket, tofile, frombucket, fromfile), 'R', 1, logact) + elif tinfo != None: + return PgLOG.FAILURE + + cmd = "{} mv -b {} -db {} -k {} -dk {}".format(OBJCTCMD, frombucket, tobucket, fromfile, tofile) + ucmd = "{} gm -k {} -b {}".format(OBJCTCMD, fromfile, bucket) + ubuf = PgLOG.pgsystem(ucmd, PgLOG.LOGWRN, CMDRET) + if ubuf and re.match(r'^\{', ubuf): cmd += " -md '{}'".format(ubuf) + + for loop in range(2): + buf = PgLOG.pgsystem(cmd, logact, CMDBTH) + tinfo = check_object_file(tofile, tobucket, 0, logact) + if tinfo: + if tinfo['data_size'] == finfo['data_size']: + return PgLOG.SUCCESS + elif tinfo != None: + break + + errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact) + + return PgLOG.FAILURE + +# +# Move an object path on Object Store and all the file keys under it +# +# topath - target path name +# frompath - original path name +# tobucket - target bucket name +# frombucket - original bucket name +# +def move_object_path(topath, frompath, tobucket, frombucket, logact = 0): + + ret = PgLOG.FAILURE + if not tobucket: tobucket = PgLOG.PGLOG['OBJCTBKT'] + if not frombucket: frombucket = tobucket + fcnt = check_object_path(frompath, frombucket, logact) + tcnt = check_object_path(topath, tobucket, logact) + if not fcnt: + if fcnt == None: return PgLOG.FAILURE + if tcnt: + PgLOG.pglog("{}-{}: Moved to {}-{} already".format(frombucket, frompath, tobucket, topath), PgLOG.LOGWRN) + return PgLOG.SUCCESS + else: + return errlog("{}-{}: {} to move".format(frombucket, frompath, PgLOG.PGLOG['MISSFILE']), 'R', 1, logact) + + cmd = "{} mv -b {} -db {} -k {} -dk {}".format(OBJCTCMD, frombucket, tobucket, frompath, topath) + + for loop in range(2): + buf = PgLOG.pgsystem(cmd, logact, CMDBTH) + fcnt = check_object_path(frompath, frombucket, logact) + if not fcnt: return PgLOG.SUCCESS + errlog("Error Execute: {}\n{}".format(cmd, buf), 'O', loop, logact) + + return PgLOG.FAILURE + +# +# Move a backup file on Quasar Server +# +# tofile - target file name +# fromfile - source file name +# endpoint - Globus endpoint +# +def move_backup_file(tofile, fromfile, endpoint = None, logact = 0): + + ret = PgLOG.FAILURE + if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP'] + finfo = check_backup_file(fromfile, endpoint, 0, logact) + tinfo = check_backup_file(tofile, endpoint, 0, logact) + if not finfo: + if finfo != None: return ret + if tinfo: + PgLOG.pglog("{}: Moved to {} already".format(fromfile, tofile), PgLOG.LOGWRN) + return PgLOG.SUCCESS + else: + return errlog("{}: {} to move".format(fromfile, PgLOG.PGLOG['MISSFILE']), 'B', 1, logact) + + if tinfo: + if tinfo['data_size'] > 0 and not logact&PgLOG.OVRIDE: + return errlog("{}: File exists, cannot move {} to it".format(tofile, fromfile), 'B', 1, logact) + elif tinfo != None: + return ret + + cmd = "{} --rename -ep {} --oldpath {} --newpath {}".format(BACKCMD, endpoint, fromfile, tofile) + loop = 0 + while loop < 2: + buf = PgLOG.pgsystem(cmd, logact, CMDRET) + syserr = PgLOG.PGLOG['SYSERR'] + if buf: + if buf.find('File or directory renamed successfully') > -1: + ret = PgLOG.SUCCESS + break + if syserr: + if syserr.find("No such file or directory") > -1: + if make_backup_directory(op.dirname(tofile), endpoint, logact): continue + errmsg = "Error Execute: {}\n{}".format(cmd, syserr) + (hstat, msg) = host_down_status('', QHOSTS[endpoint], 1, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'B', loop, logact) + loop += 1 + + if ret == PgLOG.SUCCESS: ECNTS['B'] = 0 # reset error count + return ret + +# +# Make a directory on a given host name (including local host) +# +# dir - directory path to be made +# host - host name the directory on, default to LHOST +# +# Return PgLOG.SUCCESS(1) if successful or PgLOG.FAILURE(0) if failed +# +def make_rda_directory(dir, host, logact = 0): + + if not dir: return PgLOG.SUCCESS + shost = strip_host_name(host) + if PgUtil.pgcmp(shost, LHOST, 1) == 0: + return make_local_directory(dir, logact) + else: + return make_remote_directory(dir, host, logact) + +# +# Make a local directory +# +# dir - directory path to be made +# +def make_local_directory(dir, logact = 0): + + return make_one_local_directory(dir, None, logact) + +# +# Make a local directory recursively +# +def make_one_local_directory(dir, odir = None, logact = 0): + + if not dir or op.isdir(dir): return PgLOG.SUCCESS + if op.isfile(dir): return errlog(dir + ": is file, cannot make directory", 'L', 1, logact) + + if not odir: odir = dir + if is_root_directory(dir, 'L', LHOST, "make directory " + odir, logact): return PgLOG.FAILURE + if not make_one_local_directory(op.dirname(dir), odir, logact): return PgLOG.FAILURE + + loop = reset = 0 + while (loop-reset) < 2: + try: + os.mkdir(dir, PgLOG.PGLOG['EXECMODE']) + except Exception as e: + errmsg = str(e) + if errmsg.find('File exists') > -1: return PgLOG.SUCCESS + errlog(errmsg, 'L', (loop - reset), logact) + if loop == 0: reset = reset_local_info(dir, None, logact) + loop += 1 + else: + return PgLOG.SUCCESS + + return PgLOG.FAILURE + +# +# Make a directory on a remote host name +# +# dir - directory path to be made +# host - host name the directory on +# +def make_remote_directory(dir, host, logact = 0): + + return make_one_remote_directory(dir, None, host, logact) + +def make_one_remote_directory(dir, odir, host, logact = 0): + + info = check_remote_file(dir, host, 0, logact) + if info: + if info['isfile']: return errlog("{}-{}: is file, cannot make directory".format(host, dir), 'R', 1, logact) + return PgLOG.SUCCESS + elif info != None: + return PgLOG.FAILURE + + if not odir: odir = dir + if is_root_directory(dir, 'R', host, "make directory {} on {}".format(odir, host), logact): return PgLOG.FAILURE + + if make_one_remote_directory(op.dirname(dir), odir, host, logact): + if PgLOG.pgsystem("{} {} {}".format(PgLOG.get_sync_command(host), PgLOG.PGLOG['TMPSYNC'], dir), logact, 5): + set_remote_mode(dir, 0, host, PgLOG.PGLOG['EXECMODE']) + return PgLOG.SUCCESS + + return PgLOG.FAILURE + +# +# Make a quasar directory +# +# dir - directory path to be made +# +def make_backup_directory(dir, endpoint, logact = 0): + + return make_one_backup_directory(dir, None, endpoint, logact) + +# +# Make a quasar directory recursively +# +def make_one_backup_directory(dir, odir, endpoint = None, logact = 0): + + if not dir or dir == '/': return PgLOG.SUCCESS + if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP'] + info = check_backup_file(dir, endpoint, 0, logact) + if info: + if info['isfile']: return errlog("{}-{}: is file, cannot make backup directory".format(enpdpoint, dir), 'B', 1, logact) + return PgLOG.SUCCESS + elif info != None: + return PgLOG.FAILURE + + if not odir: odir = dir + if not make_one_backup_directory(op.dirname(dir), odir, endpoint, logact): return PgLOG.FAILURE + + cmd = "{} --mkdir -ep {} -p {}".format(BACKCMD, endpoint, dir) + for loop in range(2): + buf = PgLOG.pgsystem(cmd, logact, CMDRET) + syserr = PgLOG.PGLOG['SYSERR'] + if buf: + if(buf.find('The directory was created successfully') > -1 or + buf.find("Path '{}' already exists".format(dir)) > -1): + ret = PgLOG.SUCCESS + break + if syserr: + if syserr.find("No such file or directory") > -1: + ret = make_one_backup_directory(op.dirname(dir), odir, endpoint, logact) + if ret == PgLOG.SUCCESS or loop or opt&64 == 0: break + time.sleep(PgSIG.PGSIG['ETIME']) + else: + if opt&64 == 0: return PgLOG.FAILURE + errmsg = "Error Execute: {}\n{}".format(cmd, syserr) + (hstat, msg) = host_down_status('', QHOSTS[endpoint], 1, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'B', loop, logact) + + if ret == PgLOG.SUCCESS: ECNTS['B'] = 0 # reset error count + return ret + +# +# check and return 1 if a root directory +# +def is_root_directory(dir, etype, host = None, action = None, logact = 0): + + ret = cnt = 0 + + if etype == 'H': + ms = re.match(r'^({})(.*)$'.format(PgLOG.PGLOG['ALLROOTS']), dir) + if ms: + m2 = ms.group(2) + if not m2 or m2 == '/': ret = 1 + else: + cnt = 2 + elif re.match(r'^{}'.format(PgLOG.PGLOG['DSSDATA']), dir): + ms = re.match(r'^({})(.*)$'.format(PgLOG.PGLOG['GPFSROOTS']), dir) + if ms: + m2 = ms.group(2) + if not m2 or m2 == '/': ret = 1 + else: + cnt = 4 + else: + ms = re.match(r'^({})(.*)$'.format(PgLOG.PGLOG['HOMEROOTS']), dir) + if ms: + m2 = ms.group(2) + if not m2 or m2 == '/': ret = 1 + else: + cnt = 2 + + if cnt and re.match(r'^(/[^/]+){0,%d}(/*)$' % cnt, dir): + ret = 1 + + if ret and action: + cnt = 0 + errmsg = "{}: Cannot {} from {}".format(dir, action, PgLOG.PGLOG['HOSTNAME']) + (hstat, msg) = host_down_status(dir, host, 0, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, etype, 1, logact|PgLOG.ERRLOG) + + return ret + +# +# set mode for a given direcory/file on a given host (include local host) +# +def set_rda_mode(file, isfile, host, nmode = None, omode = None, logname = None, logact = 0): + + shost = strip_host_name(host) + if PgUtil.pgcmp(shost, LHOST, 1) == 0: + return set_local_mode(file, isfile, nmode, omode, logname, logact) + else: + return set_remote_mode(file, isfile, host, nmode, omode, logact) + +# +# set mode for given local directory or file +# +def set_local_mode(file, isfile = 1, nmode = 0, omode = 0, logname = None, logact = 0): + + if not nmode: nmode = (PgLOG.PGLOG['FILEMODE'] if isfile else PgLOG.PGLOG['EXECMODE']) + if not (omode and logname): + info = check_local_file(file, 6) + if not info: + if info != None: return PgLOG.FAILURE + return lmsg(file, "{} to set mode({})".format(PgLOG.PGLOG['MISSFILE'], PgLOG.int2base(nmode, 8)), logact) + omode = info['mode'] + logname = info['logname'] + + if nmode == omode: return PgLOG.SUCCESS + + try: + os.chmod(file, nmode) + except Exception as e: + return errlog(str(e), 'L', 1, logact) + + return PgLOG.SUCCESS + +# +# set mode for given directory or file on remote host +# +def set_remote_mode(file, isfile, host, nmode = 0, omode = 0, logact = 0): + + if not nmode: nmode = (PgLOG.PGLOG['FILEMODE'] if isfile else PgLOG.PGLOG['EXECMODE']) + if not omode: + info = check_remote_file(file, host, 6) + if not info: + if info != None: return PgLOG.FAILURE + return errlog("{}-{}: {} to set mode({})".format(host, file, PgLOG.PGLOG['MISSFILE'], PgLOG.int2base(nmode, 8)), 'R', 1, logact) + omode = info['mode'] + + if nmode == omode: return PgLOG.SUCCESS + return PgLOG.pgsystem("{} -m {} {}".format(PgLOG.get_sync_command(host), PgLOG.int2base(nmode, 8), file), logact, 5) + +# +# change group for given local directory or file +# +def change_local_group(file, ngrp = None, ogrp = None, logname = None, logact = 0): + + if not ngrp: + ngid = PgLOG.PGLOG['RDAGID'] + else: + ngid = grp.getgrnam[ngrp].gr_gid + if logact and logact&PgLOG.EXITLG: logact &=~PgLOG.EXITLG + if not (ogrp and logname): + info = check_local_file(file, 10, logact) + if not info: + if info != None: return PgLOG.FAILURE + return errlog("{}: {} to change group({})".format(file, PgLOG.PGLOG['MISSFILE'], ngrp), 'L', 1, logact) + ogid = info['gid'] + ouid = info['uid'] + else: + ouid = pwd.getpwnam(logname).pw_uid + ogid = grp.getgrnam(logname).gr_gid + + if ngid == ogid: return PgLOG.SUCCESS + + try: + os.chown(file, nuid, ngid) + except Exception as e: + return errlog(str(e), 'L', 1, logact) + +# +# Check if given path on a specified host or the host itself are down +# +# path: path name to be checked +# host: host name the file on, default to LHOST +# chkopt: 1 - do a file/path check, 0 - do not +# +# Return array of 2 (hstat, msg) +# hstat: 0 if system is up and accessible, +# 1 - host is down, +# 2 - if path not accessible +# negative values if planned system down +# msg: None - stat == 0 +# an unempty string for system down message - stat != 0 +# +def host_down_status(path, host, chkopt = 0, logact = 0): + + shost = strip_host_name(host) + hstat = 0 + rets = [0, None] + + msg = hostname = None + + if PgUtil.pgcmp(shost, LHOST, 1) == 0: + if not path or (chkopt and check_local_file(path)): return rets + msg = path + ": is not accessible" + flag = "L" + if re.match(r'^(/{}/|{})'.format(PgLOG.PGLOG['GPFSNAME'], PgLOG.PGLOG['DSSDATA']), path): + hstat = 1 + hostname = PgLOG.PGLOG['GPFSNAME'] + else: + hstat = 2 + + elif PgUtil.pgcmp(shost, PgLOG.PGLOG['GPFSNAME'], 1) == 0: + if not path or (chkopt and check_local_file(path)): return rets + msg = path + ": is not accessible" + flag = "L" + hstat = 1 + hostname = PgLOG.PGLOG['GPFSNAME'] + elif PgUtil.pgcmp(shost, BHOST, 1) == 0: + if path: + hstat = 2 + else: + hstat = 1 + path = DPATHS['B'] + + if chkopt and check_backup_file(path, QPOINTS['B']): return rets + hostname = BHOST + msg = "{}-{}: is not accessible".format(hostname, path) + flag = "B" + elif PgUtil.pgcmp(shost, DHOST, 1) == 0: + if path: + hstat = 2 + else: + hstat = 1 + path = DPATHS['B'] + + if chkopt and check_backup_file(path, QPOINTS['D']): return rets + hostname = DHOST + msg = "{}-{}: is not accessible".format(hostname, path) + flag = "D" + elif PgUtil.pgcmp(shost, OHOST, 1) == 0: + if path: + hstat = 2 + else: + hstat = 1 + path = PgLOG.PGLOG['OBJCTBKT'] + + if chkopt and check_object_file(path): return rets + + hostname = OHOST + msg = "{}-{}: is not accessible".format(hostname, path) + flag = "O" + elif PgUtil.pgcmp(shost, PgLOG.PGLOG['PGBATCH'], 1): + if path and chkopt and check_remote_file(path, host): return rets + estat = ping_remote_host(host) + if estat: + hstat = 1 + hostname = host + else: + if not path: return rets + if re.match(r'^/{}/'.format(PgLOG.PGLOG['GPFSNAME']), path): + hstat = 1 + hostname = PgLOG.PGLOG['GPFSNAME'] + else: + hstat = 2 + hostname = host + + flag = "R" + msg = "{}-{}: is not accessible".format(host, path) + elif PgLOG.get_host(1) == PgLOG.PGLOG['PGBATCH']: # local host is a batch node + if not path or (chkopt and check_local_file(path)): return rets + msg = path + ": is not accessible" + flag = "L" + if re.match(r'^(/{}/|{})'.format(PgLOG.PGLOG['GPFSNAME'], PgLOG.PGLOG['DSSDATA']), path): + hstat = 1 + hostname = PgLOG.PGLOG['GPFSNAME'] + else: + hstat = 2 + + msg += " at the moment Checked on " + PgLOG.PGLOG['HOSTNAME'] + + if hostname: + estat = PgDBI.system_down_message(hostname, path, 0, logact) + if estat: + hstat = -hstat + msg += "\n" + estat + + if logact and (chkopt or hstat < 0): errlog(msg, flag, 1, logact) + + return (hstat, msg) + +# +# Check if given path on a specified host is down or not +# +# path: path name to be checked +# host: host name the file on, default to LHOST +# +# Return errmsg if not accessible and None otherwise +# +def check_host_down(path, host, logact = 0): + + (hstat, msg) = host_down_status(path, host, 1, logact) + + return msg if hstat else None + +# +# Check if given service name is accessible from a specified host +# +# sname: service name to be checked +# fhost: from host name to connect to service, default to LHOST +# +# reset the service flag to A or I accordingly +# +# Return 0 if accessible, dsservice.sindex if not, and -1 if can not be checked +# +def check_service_accessibilty(sname, fhost = None, logact = 0): + + if not fhost: fhost = PgLOG.PGLOG['HOSTNAME'] + pgrec = PgDBI.pgget("dsservice", "*", "service = '{}' AND hostname = '{}'".format(sname, fhost), logact) + if not pgrec: + PgLOG.pglog("dsservice: Access {} from {} is not defined in RDA Configuration".format(sname, fhost), logact) + return -1 + + path = sname if (pgrec['flag'] == "H" or pgrec['flag'] == "G") else None + (hstat, msg) = host_down_status(path, fhost, 1, logact) + + return msg if hstat else None + +# +# check if this host is a local host for given host name +# +def is_local_host(host): + + host = strip_host_name(host) + if host == LHOST or PgLOG.valid_batch_host(host): return 1 + + return 0 + +# +# check and return action string on a node other than local one +# +def local_host_action(host, action, info, logact = 0): + + if is_local_host(host): return 1 + if not logact: return 0 + + if host == "partition": + msg = "for individual partition" + elif host == "rda_config": + msg = "via https://rda.ucar.edu/internal/rda_pg_config" + elif host in PgLOG.BCHCMDS: + msg = "on a {} Node".format(host) + else: + msg = "on " + host + + return PgLOG.pglog("{}: Cannot {}, try {}".format(info, action, msg), logact) + +# +# ping a given remote host name +# +# return None if system is up error messge if not +# +def ping_remote_host(host): + + while True: + buf = PgLOG.pgsystem("ping -c 3 " + host, PgLOG.LOGWRN, CMDRET) + if buf: + ms = re.search(r'3 packets transmitted, (\d)', buf) + if ms: + if int(ms.group(1)) > 0: + return None + else: + return host + " seems down not accessible" + if PgLOG.PGLOG['SYSERR']: + if PgLOG.PGLOG['SYSERR'].find("ping: unknown host") > -1 and host.find('.') > -1: + host += ".ucar.edu" + continue + return PgLOG.PGLOG['SYSERR'] + else: + return "Cannot ping " + host + +# +# compare given two host names, return 1 if same and 0 otherwise +# +def same_hosts(host1, host2): + + host1 = strip_host_name(host1) + host2 = strip_host_name(host2) + + return (1 if PgUtil.pgcmp(host1, host2, 1) == 0 else 0) + +# +# strip and identify the proper host name +# +def strip_host_name(host): + + if not host: return LHOST + + ms = re.match(r'^([^\.]+)\.', host) + if ms: host = ms.group(1) + if PgUtil.pgcmp(host, PgLOG.PGLOG['HOSTNAME'], 1) == 0: + return LHOST + else: + return host + +# +# Check a file stuatus info on a given host name (including local host) no background process for checking +# +# file: file name to be checked +# host: host name the file on, default to LHOST +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 4 - get permission mode in 3 octal digits (mode) +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# 32 - get checksum (checksum), work for local file only +# +# Return a dict of file info, or None if file not exists +# +def check_rda_file(file, host = LHOST, opt = 0, logact = 0): + + shost = strip_host_name(host) + + if PgUtil.pgcmp(shost, LHOST, 1) == 0: + return check_local_file(file, opt, logact) + elif PgUtil.pgcmp(shost, OHOST, 1) == 0: + return check_object_file(file, None, opt, logact) + elif PgUtil.pgcmp(shost, BHOST, 1) == 0: + return check_backup_file(file, QPOINTS['B'], opt, logact) + elif PgUtil.pgcmp(shost, DHOST, 1) == 0: + return check_backup_file(file, QPOINTS['D'], opt, logact) + else: + return check_remote_file(file, host, opt, logact) + +# +# wrapper to check_local_file() and check_globus_file() to check info for a file +# on local or remote Globus endpoints +# +def check_globus_file(file, endpoint = None, opt = 0, logact = 0): + + if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP'] + if endpoint == 'rda-glade': + if re.match(r'^/(data|decsdata)/', file): file = PgLOG.PGLOG['DSSDATA'] + file + return check_local_file(file, opt, logact) + else: + return check_backup_file(file, endpoint, opt, logact) + +# +# check and get local file status information +# +# file: local File name +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 4 - get permission mode in 3 octal digits (mode) +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# 32 - get checksum (checksum) +# 64 - remove file too small +# 128 - check twice for missing file +# +# Return: a dict of file info, or None if not exists +# +def check_local_file(file, opt = 0, logact = 0): + + ret = None + if not file: return ret + loop = 0 + while loop < 2: + if op.exists(file): + try: + fstat = os.stat(file) + ret = local_file_stat(file, fstat, opt, logact) + break + except Exception as e: + errmsg = "{}: {}".format(file, str(e)) + (hstat, msg) = host_down_status(file, LHOST, 0, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'L', loop, logact) + else: + if loop > 0 or opt&128 == 0: break + PgLOG.pglog(file + ": check it again in a moment", PgLOG.LOGWRN) + time.sleep(6) + loop += 1 + + if loop > 1: return PgLOG.FAILURE + ECNTS['L'] = 0 # reset error count + return ret + +# +# local function to get local file stat +# +def local_file_stat(file, fstat, opt, logact): + + if not fstat: + errlog(file + ": Error check file stat", 'L', 1, logact) + return None + + info = {} + info['isfile'] = (1 if stat.S_ISREG(fstat.st_mode) else 0) + info['data_size'] = fstat.st_size + info['fname'] = op.basename(file) + if not opt: return info + if opt&64 and info['isfile'] and info['data_size'] < PgLOG.PGLOG['MINSIZE']: + PgLOG.pglog("{}: Remove {} file".format(file, ("Small({}B)".format(info['data_size']) if info['data_size'] else "Empty")), logact&~PgLOG.EXITLG) + delete_local_file(file, logact) + return None + + if opt&17: + mdate, mtime = PgUtil.get_date_time(fstat.st_mtime) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + cdate, ctime = PgUtil.get_date_time(fstat.st_ctime) + info['date_created'] = cdate + info['time_created'] = ctime + if opt&16: info['week_day'] = PgUtil.get_weekday(mdate) + + if opt&2: + info['uid'] = fstat.st_uid + info['logname'] = pwd.getpwuid(info['uid']).pw_name + if opt&4: info['mode'] = stat.S_IMODE(fstat.st_mode) + if opt&8: + info['gid'] = fstat.st_gid + info['group'] = grp.getgrgid(info['gid']).gr_name + if opt&32: info['checksum'] = get_md5sum(file, 0, logact) + + return info + +# +# check and get file status information of a file on remote host +# +# file: remote File name +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - file owner's login name (logname), assumed 'rdadata' +# 4 - get permission mode in 3 octal digits (mode) +# 8 - get group name (group), assumed 'dss' +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# +# Return: a dict of file info, or None if not exists +# +def check_remote_file(file, host, opt = 0, logact = 0): + + if not file: return None + ms = re.match(r'^(.+)/$', file) + if ms: file = ms.group(1) # remove ending '/' in case + cmd = "{} {}".format(PgLOG.get_sync_command(host), file) + loop = 0 + while loop < 2: + buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, CMDRET) + if buf or not PgLOG.PGLOG['SYSERR'] or PgLOG.PGLOG['SYSERR'].find(PgLOG.PGLOG['MISSFILE']) > -1: break + errmsg = PgLOG.PGLOG['SYSERR'] + (hstat, msg) = host_down_status(file, host, 0, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'R', loop, logact) + loop += 1 + + if loop > 1: return PgLOG.FAILURE + ECNTS['R'] = 0 # reset error count + if buf: + for line in re.split(r'\n', buf): + info = remote_file_stat(line, opt) + if info: return info + + return None + +# +# local function to get remote file stat +# +def remote_file_stat(line, opt): + + info = {} + items = re.split(r'\s+', line) + if len(items) < 5 or items[4] == '.': return None + ms = re.match(r'^([d\-])([\w\-]{9})$', items[0]) + info['isfile'] = (1 if ms and ms.group(1) == "-" else 0) + if opt&4: info['mode'] = get_file_mode(ms.group(2)) + fsize = items[1] + if fsize.find(',') > -1: fsize = re.sub(r',', '', fsize) + info['data_size'] = int(fsize) + info['fname'] = op.basename(items[4]) + if not opt: return info + if opt&17: + mdate = PgUtil.format_date(items[2], "YYYY-MM-DD", "YYYY/MM/DD") + mtime = items[3] + if PgLOG.PGLOG['GMTZ']: (mdate, mtime) = PgUtil.addhour(date, time, PgLOG.PGLOG['GMTZ']) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = PgUtil.get_weekday(mdate) + + if opt&2: info['logname'] = "rdadata" + if opt&8: info['group'] = PgLOG.PGLOG['RDAGRP'] + + return info + +# +# check and get object file status information +# +# file: object store File key name +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 4 - get metadata hash +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# 32 - get checksum (checksum) +# 64 - check once, no rechecking +# +# Return a dict of file info, or None if file not exists +# +def check_object_file(file, bucket = None, opt = 0, logact = 0): + + if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT'] + ret = None + if not file: return ret + cmd = "{} lo {} -b {}".format(OBJCTCMD, file, bucket) + ucmd = "{} gm -k {} -b {}".format(OBJCTCMD, file, bucket) if opt&14 else None + loop = 0 + while loop < 2: + buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, CMDRET) + if buf: + if re.match(r'^\[\]', buf): break + if re.match(r'^\[\{', buf): + ary = json.loads(buf) + cnt = len(ary) + if cnt > 1: return PgLOG.pglog("{}-{}: {} records returned\n{}".format(bucket, file, cnt, buf), logact|PgLOG.ERRLOG) + hash = ary[0] + uhash = None + if ucmd: + ubuf = PgLOG.pgsystem(ucmd, PgLOG.LOGWRN, CMDRET) + if ubuf and re.match(r'^\{', ubuf): uhash = json.loads(ubuf) + ret = object_file_stat(hash, uhash, opt) + break + if opt&64: return PgLOG.FAILURE + errmsg = "Error Execute: {}\n{}".format(cmd, PgLOG.PGLOG['SYSERR']) + (hstat, msg) = host_down_status(bucket, OHOST, 0, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'O', loop, logact) + loop += 1 + + if loop > 1: return PgLOG.FAILURE + ECNTS['O'] = 0 # reset error count + return ret + +# +# check an object path status information +# +# path: object store path name +# +# Return count of object key names, 0 if not file exists; None if error checking +# +def check_object_path(path, bucket = None, logact = 0): + + if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT'] + ret = None + if not path: return ret + cmd = "{} lo {} -ls -b {}".format(OBJCTCMD, path, bucket) + loop = 0 + while loop < 2: + buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, CMDRET) + if buf: + ary = json.loads(buf) + return len(ary) + errmsg = "Error Execute: {}\n{}".format(cmd, PgLOG.PGLOG['SYSERR']) + (hstat, msg) = host_down_status(bucket, OHOST, 0, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'O', loop, logact) + loop += 1 + + ECNTS['O'] = 0 # reset error count + return ret + +# +# object store function to get file stat +# +def object_file_stat(hash, uhash, opt): + + info = {'isfile' : 1, 'data_size' : int(hash['Size']), 'fname' : op.basename(hash['Key'])} + if not opt: return info + if opt&17: + ms = re.match(r'^(\d+-\d+-\d+)\s+(\d+:\d+:\d+)', hash['LastModified']) + if ms: + (mdate, mtime) = ms.groups() + if PgLOG.PGLOG['GMTZ']: (mdate, mtime) = PgUtil.addhour(mdate, mtime, PgLOG.PGLOG['GMTZ']) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = PgUtil.get_weekday(mdate) + if opt&32: + ms = re.match(r'"(.+)"', hash['ETag']) + if ms: info['checksum'] = ms.group(1) + if uhash: + if opt&2: info['logname'] = uhash['user'] + if opt&4: info['meta'] = uhash + if opt&8: info['group'] = uhash['group'] + + return info + +# +# check and get backup file status information +# +# file: backup File key name +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 4 - get metadata hash +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# 64 - rechecking +# +# Return a dict of file info, or None if file not exists +# +def check_backup_file(file, endpoint = None, opt = 0, logact = 0): + + ret = None + if not file: return ret + if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP'] + bdir = op.dirname(file) + bfile = op.basename(file) + cmd = "{} -ls -ep {} -p {} --filter {}".format(BACKCMD, endpoint, bdir, bfile) + ccnt = loop = 0 + while loop < 2: + buf = PgLOG.pgsystem(cmd, logact, CMDRET) + syserr = PgLOG.PGLOG['SYSERR'] + if buf: + getstat = 0 + for line in re.split(r'\n', buf): + if re.match(r'^(User|-+)\s*\|', line): + getstat += 1 + elif getstat > 1: + ret = backup_file_stat(line, opt) + if ret: break + if ret: break + if loop or opt&64 == 0: return ret + time.sleep(PgSIG.PGSIG['ETIME']) + elif syserr: + if syserr.find("Directory '{}' not found on endpoint".format(bdir)) > -1: + if loop or opt&64 == 0: return ret + time.sleep(PgSIG.PGSIG['ETIME']) + elif ccnt < 2 and syserr.find("The connection to the server was broken") > -1: + time.sleep(PgSIG.PGSIG['ETIME']) + ccnt += 1 + continue + else: + if opt&64 == 0: return PgLOG.FAILURE + errmsg = "Error Execute: {}\n{}".format(cmd, syserr) + (hstat, msg) = host_down_status('', QHOSTS[endpoint], 0, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'B', loop, logact) + loop += 1 + + if ret: ECNTS['B'] = 0 # reset error count + return ret + +# +# backup store function to get file stat +# +def backup_file_stat(line, opt): + + info = {} + items = re.split(r'[\s\|]+', line) + if len(items) < 8: return None + info['isfile'] = (1 if items[6] == 'file' else 0) + info['data_size'] = int(items[3]) + info['fname'] = items[7] + if not opt: return info + if opt&17: + mdate = items[4] + mtime = items[5] + ms = re.match(r'^(\d+:\d+:\d+)', mtime) + if ms: mtime = ms.group(1) + if PgLOG.PGLOG['GMTZ']: (mdate, mtime) = PgUtil.addhour(date, time, PgLOG.PGLOG['GMTZ']) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = PgUtil.get_weekday(mdate) + if opt&2: info['logname'] = items[0] + if opt&4: info['mode'] = get_file_mode(items[2]) + if opt&8: info['group'] = items[1] + + return info + +# +# check and get a file status information inside a tar file +# +# file: File name to be checked +# tfile: the tar file name +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 4 - get permission mode in 3 octal digits (mode) +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# +# Return a dict of file info, or None if file not exists +# +def check_tar_file(file, tfile, opt = 0, logact = 0): + + ret = None + if not (file and tfile): return ret + + for loop in range(2): + buf = PgLOG.pgsystem("tar -tvf {} {}".format(tfile, file), PgLOG.LOGWRN, CMDRET) + if buf or not PgLOG.PGLOG['SYSERR'] or PgLOG.PGLOG['SYSERR'].find('Not found in archive') > -1: break + + errmsg = PgLOG.PGLOG['SYSERR'] + (hstat, msg) = host_down_status(tfile, LHOST, 0, logact) + errlog(errmsg, 'L', loop, logact) + + if loop > 0: return PgLOG.FAILURE + if buf: + for line in re.split(r'\n', buf): + ret = tar_file_stat(line, opt) + if ret: break + ECNTS['L'] = 0 # reset error count + + return ret + +# +# local function to get file stat in a tar file +# +def tar_file_stat(line, opt): + + items = re.split(r'\s+', line) + if len(items) < 6: return None + ms = re.match(r'^([d\-])([\w\-]{9})$', items[0]) + if not ms: return None + info = {} + info['isfile'] = (1 if ms and ms.group(1) == "-" else 0) + info['data_size'] = int(items[2]) + info['fname'] = op.basename(items[5]) + if not opt: return info + if opt&4: info['mode'] = get_file_mode(ms.group(2)) + if opt&17: + mdate = items[3] + mtime = items[4] + if PgLOG.PGLOG['GMTZ']: (mdate, mtime) = PgUtil.addhour(mdate, mtime, PgLOG.PGLOG['GMTZ']) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = PgUtil.get_weekday(mdate) + + if opt&10: + ms = re.match(r'^(\w+)/(\w+)', items[1]) + if ms: + if opt&2: info['logname'] = ms.group(1) + if opt&8: info['group'] = ms.group(2) + + return info + +# +# check and get a file status information on ftp server +# +# file: File name to be checked +# name: login user name +# pswd: login password +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 4 - get permission mode in 3 octal digits (mode) +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# +# Return a dict of file info, or None if file not exists +# +def check_ftp_file(file, opt = 0, name = None, pswd = None, logact = 0): + + if not file: return None + + ms = re.match(r'^(.+)/$', file) + if ms: file = ms.group(1) # remove ending '/' in case + cmd = "ncftpls -l " + if name: cmd += "-u {} ".format(name) + if pswd: cmd += "-p {} ".format(pswd) + fname = op.basename(file) + + + for loop in range(2): + buf = PgLOG.pgsystem(cmd + file, PgLOG.LOGWRN, CMDRET) + if buf: break + if PgLOG.PGLOG['SYSERR']: + errlog(PgLOG.PGLOG['SYSERR'], 'O', loop, logact|PgLOG.LOGERR) + if loop == 0: file = op.dirname(file) + '/' + + if loop > 1: return PgLOG.FAILURE + for line in re.split(r'\n', buf): + if not line or line.find(fname) < 0: continue + info = ftp_file_stat(line, opt) + if info: return info + + return None + +# +# local function to get stat of a file on ftp server +# +def ftp_file_stat(line, opt): + + items = re.split(r'\s+', line) + if len(items) < 9: return None + ms = re.match(r'^([d\-])([\w\-]{9})$', items[0]) + info = {} + info['isfile'] = (1 if ms and ms.group(1) == "-" else 0) + info['data_size'] = int(items[4]) + info['fname'] = op.basename(items[8]) + if not opt: return info + if opt&4: info['mode'] = get_file_mode(ms.group(2)) + if opt&17: + dy = int(items[6]) + mn = PgUtil.get_month(items[5]) + if re.match(r'^\d+$', items[7]): + yr = int(items[7]) + mtime = "00:00:00" + else: + mtime = items[7] + ":00" + cdate = PgUtil.curdate() + ms = re.match(r'^(\d+)-(\d\d)', cdate) + if ms: + yr = int(ms.group(1)) + cm = int(ms.group(2)) # current month + if cm < mn: yr -= 1 # previous year + + mdate = "{}-{:02}-{:02}".format(yr, mn, dy) + if opt&1: + info['date_modified'] = mdate + info['time_modified'] = mtime + if opt&16: info['week_day'] = PgUtil.get_weekday(mdate) + + if opt&2: info['logname'] = items[2] + if opt&8: info['group'] = items[3] + + return info + +# +# get an array of directories/files under given dir on a given host name (including local host) +# +# dir: directory name to be listed +# host: host name the directory on, default to LHOST +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 4 - get permission mode in 3 octal digits (mode) +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# 32 - get checksum (checksum), work for local file only +# +# Return: a dict with filenames as keys None if empty directory +# +def rda_glob(dir, host, opt = 0, logact = 0): + + shost = strip_host_name(host) + if PgUtil.pgcmp(shost, LHOST, 1) == 0: + return local_glob(dir, opt, logact) + elif PgUtil.pgcmp(shost, OHOST, 1) == 0: + return object_glob(dir, None, opt, logact) + elif PgUtil.pgcmp(shost, BHOST, 1) == 0: + return backup_glob(dir, None, opt, logact) + else: + return remote_glob(dir, host, opt, logact) + +# +# get an array of directories/files under given dir on local host +# +# dir: directory name to be listed +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 4 - get permission mode in 3 octal digits (mode) +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# 32 - get checksum (checksum), work for local file only +# +# 256 - get files only and ignore directories +# +# Return: dict with filenames as keys or None if empty directory +# + +def local_glob(dir, opt = 0, logact = 0): + + flist = {} + if not re.search(r'[*?]', dir): + if op.exists(dir): + dir = PgLOG.join_paths(dir, "*") + else: + dir += "*" + + for file in glob.glob(dir): + info = check_local_file(file, opt, logact) + if info and (info['isfile'] or not 256&opt): flist[file] = info + + return flist + +# +# check and get file status information of a file on remote host +# +# dir: remote directory name +# host: host name the directory on, default to LHOST +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - file owner's login name (logname), assumed 'rdadata' +# 4 - get permission mode in 3 octal digits (mode) +# 8 - get group name (group), assumed 'dss' +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# +# Return: dict with filenames as keys or None if empty directory +# +def remote_glob(dir, host, opt = 0, logact = 0): + + flist = {} + if not re.search(r'/$', dir): dir += '/' + buf = PgLOG.pgsystem(PgLOG.get_sync_command(host) + " dir", PgLOG.LOGWRN, CMDRET) + if not buf: + if PgLOG.PGLOG['SYSERR'] and PgLOG.PGLOG['SYSERR'].find(PgLOG.PGLOG['MISSFILE']) < 0: + errlog("{}-{}: Error list directory\n{}".format(host, dir, PgLOG.PGLOG['SYSERR']), 'R', 1, logact) + return flist + + for line in re.split(r'\n', buf): + info = remote_file_stat(line, opt) + if info: flist[dir + info['fname']] = info + + return flist + +# +# check and get muiltiple object store file status information +# +# dir: object directory name +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# +# Return: a dict with filenames as keys, or None if not exists +# +def object_glob(dir, bucket = None, opt = 0, logact = 0): + + flist = {} + if not bucket: bucket = PgLOG.PGLOG['OBJCTBKT'] + ms = re.match(r'^(.+)/$', dir) + if ms: dir = ms.group(1) + cmd = "{} lo {} -b {}".format(OBJCTCMD, dir, bucket) + ary = err = None + buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, CMDRET) + if buf: + if re.match(r'^\[\{', buf): + ary = json.loads(buf) + elif not re.match(r'^\[\]', buf): + err = "{}\n{}".format(PgLOG.PGLOG['SYSERR'], buf) + else: + err = PgLOG.PGLOG['SYSERR'] + if not ary: + if err: + errlog("{}-{}-{}: Error list files\n{}".format(OHOST, bucket, dir, err), 'O', 1, logact) + return PgLOG.FAILURE + else: + return flist + + for hash in ary: + uhash = None + if opt&10: + ucmd = "{} gm -l {} -b {}".format(OBJCTCMD, hash['Key'], bucket) + ubuf = PgLOG.pgsystem(ucmd, PgLOG.LOGWRN, CMDRET) + if ubuf and re.match(r'^\{.+', ubuf): uhash = json.loads(ubuf) + info = object_file_stat(hash, uhash, opt) + if info: flist[hash['Key']] = info + + return flist + +# +# check and get muiltiple Quasar backup file status information +# +# dir: backup path +# opt: 0 - get data size only (fname, data_size, isfile), fname is the file basename +# 1 - get date/time modified (date_modified, time_modfied) +# 2 - get file owner's login name (logname) +# 8 - get group name (group) +# 16 - get week day 0-Sunday, 1-Monday (week_day) +# 64 - rechecking +# +# Return: a dict with filenames as keys, or None if not exists +# +def backup_glob(dir, endpoint = None, opt = 0, logact = 0): + + if not dir: return None + if not endpoint: endpoint = PgLOG.PGLOG['BACKUPEP'] + cmd = "{} -ls -ep {} -p {}".format(BACKCMD, endpoint, dir) + flist = {} + for loop in range(2): + buf = PgLOG.pgsystem(cmd, logact, CMDRET) + syserr = PgLOG.PGLOG['SYSERR'] + if buf: + getstat = 0 + for line in re.split(r'\n', buf): + if re.match(r'^(User|-+)\s*\|', line): + getstat += 1 + elif getstat > 1: + info = backup_file_stat(line, opt) + if info: flist[info['fname']] = info + if flist: break + if loop or opt&64 == 0: return None + time.sleep(PgSIG.PGSIG['ETIME']) + elif syserr: + if syserr.find("Directory '{}' not found on endpoint".format(dir)) > -1: + if loop or opt&64 == 0: return None + time.sleep(PgSIG.PGSIG['ETIME']) + else: + if opt&64 == 0: return PgLOG.FAILURE + errmsg = "Error Execute: {}\n{}".format(cmd, syserr) + (hstat, msg) = host_down_status('', QHOSTS[endpoint], 0, logact) + if hstat: errmsg += "\n" + msg + errlog(errmsg, 'B', loop, logact) + + if flist: + ECNTS['B'] = 0 # reset error count + return flist + else: + return PgLOG.FAILURE + +# +# local function to get file/directory mode for given permission string, for example, rw-rw-r-- +# +def get_file_mode(perm): + + mbits = [4, 2, 1] + mults = [64, 8, 1] + plen = len(perm) + if plen == 4: + perm = perm[1:] + plen = 3 + mode = 0 + for i in range(3): + for j in range(3): + pidx = 3*i+j + if pidx < plen and perm[pidx] != "-": mode += mults[i]*mbits[j] + + return mode + +# +# Evaluate md5 checksum +# +# file: file name for MD5 checksum +# count: defined if filename is a array +# +# Return: one or a array of 128-bits md5 'fingerprint' None if failed +# +def get_md5sum(file, count = 0, logact = 0): + + cmd = MD5CMD + ' ' + + if count > 0: + checksum = [None]*count + for i in range(count): + chksm = PgLOG.pgsystem(cmd + file[i], logact, 20) + if chksm: + ms = re.search(r'(\w{32})', chksm) + if ms: checksum[i] = ms.group(1) + else: + chksm = PgLOG.pgsystem(cmd + file, logact, 20) + checksum = None + if chksm: + ms = re.search(r'(\w{32})', chksm) + if ms: checksum = ms.group(1) + + return checksum + +# +# Evaluate md5 checksums and compare them for two given files +# +# file1, file2: file names +# +# Return: 0 if same and 1 if not +# +def compare_md5sum(file1, file2, logact = 0): + + if op.isdir(file1) or op.isdir(file2): + files1 = get_directory_files(file1) + fcnt1 = len(files1) if files1 else 0 + files2 = get_directory_files(file2) + fcnt2 = len(files2) if files2 else 0 + if fcnt1 != fcnt2: return 1 + chksm1 = get_md5sum(files1, fcnt1, logact) + chksm1 = ''.join(chksm1) + chksm2 = get_md5sum(files1, fcnt2, logact) + chksm2 = ''.join(chksm2) + else: + chksm1 = get_md5sum(file1, 0, logact) + chksm2 = get_md5sum(file2, 0, logact) + + return (0 if (chksm1 and chksm2 and chksm1 == chksm2) else 1) + +# +# change local directory to todir, and return odir upon success +# +def change_local_directory(todir, logact = 0): + + if logact: + lact = logact&~(PgLOG.EXITLG|PgLOG.ERRLOG) + else: + logact = lact = PgLOG.LOGWRN + if not op.isdir(todir): + if op.isfile(todir): return errlog(todir + ": is file, cannot change directory", 'L', 1, logact) + if not make_local_directory(todir, logact): return PgLOG.FAILURE + + odir = PgLOG.PGLOG['CURDIR'] + if todir == odir: + PgLOG.pglog(todir + ": in Directory", lact) + return odir + try: + os.chdir(todir) + except Exception as e: + return errlog(str(e), 'L', 1, logact) + else: + if not op.isabs(todir): todir = os.getcwd() + PgLOG.PGLOG['CURDIR'] = todir + PgLOG.pglog(todir + ": Change to Directory", lact) + + return odir + +# +# record the directory for the deleted file +# pass in empty dir to turn the recording delete directory on +# +def record_delete_directory(dir, val): + + global DIRLVLS + + if dir is None: + if isinstance(val, int): + DIRLVLS = val + elif re.match(r'^\d+$'): + DIRLVLS = int(val) + elif dir and not re.match(r'^(\.|\./|/)$', dir) and dir not in DELDIRS: + DELDIRS[dir] = val + +# +# remove the recorded delete directory if it is empty +# +def clean_delete_directory(logact = 0): + + global DIRLVLS, DELDIRS + + if not DIRLVLS: return + if logact: + lact = logact&~(PgLOG.EXITLG) + else: + logact = lact = PgLOG.LOGWRN + lvl = DIRLVLS + DIRLVLS = 0 # set to 0 to stop recording directory + while lvl > 0: + lvl -= 1 + dirs = {} + for dir in DELDIRS: + host = DELDIRS[dir] + dinfo = (dir if host == LHOST else "{}-{}".format(host, dir)) + dstat = rda_empty_directory(dir, DELDIRS[dir]) + if dstat == 0: + if delete_rda_file(dir, host, logact): + PgLOG.pglog(dinfo + ": Empty directory removed", lact) + elif dstat > 0: + if dstat == 1 and lvl > 0: PgLOG.pglog(dinfo + ": Directory not empty yet", lact) + continue + + if lvl: dirs[op.dirname(dir)] = host + + if not dirs: break + DELDIRS = dirs + + DELDIRS = {} # empty cache afterward + +# +# remove the empty given directory and its all subdirectories +# +# return 1 if empty dirctory removed 0 otherwise +# +def clean_empty_directory(dir, host, logact = 0): + + if not dir: return 0 + + dirs = rda_glob(dir, host) + cnt = 0 + if logact: + lact = logact&~PgLOG.EXITLG + else: + lact = logact = PgLOG.LOGWRN + + if dirs: + for name in dirs: + cnt += 1 + if dirs[name]['isfile']: continue + cnt -= clean_empty_directory(name, host, logact) + + dinfo = (dir if same_hosts(host, LHOST) else "{}-{}".format(host, dir)) + if cnt == 0: + if delete_rda_file(dir, host, logact): + PgLOG.pglog(dinfo + ": Empty directory removed", lact) + return 1 + else: + PgLOG.pglog(dinfo + ": Directory not empty yet", lact) + + return 0 + + +# +# check if given directory is empty +# +# Return: 0 if empty directory, 1 if not empty and -1 if invalid directory +# +def rda_empty_directory(dir, host): + + shost = strip_host_name(host) + + if PgUtil.pgcmp(shost, LHOST, 1) == 0: + return local_empty_directory(dir) + else: + return remote_empty_directory(dir, host) + +# +# return 0 if empty local directory, 1 if not; -1 if cannot remove +# +def local_empty_directory(dir): + + if not op.isdir(dir): return -1 + if is_root_directory(dir, 'L'): return 2 + if not re.search(r'/$', dir): dir += '/' + dir += '*' + return (1 if glob.glob(dir) else 0) + +# +# return 0 if empty remote directory, 1 if not; -1 if cannot remove +# +def remote_empty_directory(dir, host): + + if is_root_directory(dir, 'R', host): return 2 + if not re.search(r'/$', dir): dir += '/' + buf = PgLOG.pgsystem("{} {}".format(PgLOG.get_sync_command(host), dir), PgLOG.LOGWRN, CMDRET) + if not buf: return -1 + + for line in re.split(r'\n', buf): + if remote_file_stat(line, 0): return 1 + + return 0 + +# +# get sizes of files on a given host +# +# files: file names to get sizes +# host: host name the file on, default to LHOST +# +# return: array of file sizes size is -1 if file does not exist +# +def rda_file_sizes(files, host, logact = 0): + + sizes = [] + for file in files: sizes.append(rda_file_size(file, host, 2, logact)) + + return sizes + +# +# get sizes of local files +# +# files: file names to get sizes +# +# return: array of file sizes size is -1 if file does not exist +# +def local_file_sizes(files, logact = 0): + + sizes = [] + for file in files: sizes.append(local_file_size(file, 6, logact)) + + return sizes + +# +# check if a file on a given host is empty or too small to be considered valid +# +# file: file name to be checked +# host: host name the file on, default to LHOST +# opt: 1 - to remove empty file +# 2 - show message for empty file +# 4 - show message for non-existing file +# +# return: file size in unit of byte +# 0 - empty file or small file, with size < PgLOG.PGLOG['MINSIZE'] +# -1 - file not exists +# -2 - error check file +# +def rda_file_size(file, host, opt = 0, logact = 0): + + info = check_rda_file(file, host, 0, logact) + if info: + if info['isfile'] and info['data_size'] < PgLOG.PGLOG['MINSIZE']: + if opt: + if opt&2: errlog("{}-{}: {} file".format(host, file, ("Too small({}B)".format(info['data_size']) if info['data_size'] > 0 else "Empty")), + 'O', 1, logact) + if opt&1: delete_rda_file(file, host, logact) + return 0 + else: + return info['data_size'] # if not regular file or not empty + + elif info != None: + return -2 # error access + else: + if opt&4: errlog("{}-{}: {}".format(host, file, PgLOG.PGLOG['MISSFILE']), 'O', 1, logact) + return -1 # file not exist + +# +# check if a local file is empty or too small to be considered valid +# +# file: file name to be checked +# opt: 1 - to remove empty file +# 2 - show message for empty file +# 4 - show message for non-existing file +# +# return: file size in unit of byte +# 0 - empty file or small file, with size < PgLOG.PGLOG['MINSIZE'] +# -1 - file not exists +# -2 - error check file +# +def local_file_size(file, opt = 0, logact = 0): + + if not op.isfile(file): + if opt&4: lmsg(file, PgLOG.PGLOG['MISSFILE'], logact) + return -1 # file not eixsts + + info = check_local_file(file, 0, logact) + if info: + if info['isfile'] and info['data_size'] < PgLOG.PGLOG['MINSIZE']: + if opt: + if opt&2: lmsg(file, ("Too small({}B)".format(info['data_size']) if info['data_size'] > 0 else "Empty file") , logact) + if opt&1: delete_local_file(file, logact) + return 0 + else: + return info['data_size'] # if not regular file or not empty + elif info != None: + return -2 # error check file + +# +# compress/uncompress a single local file +# +# ifile: file name to be compressed/uncompressed +# fmt: archive format +# act: 0 - uncompress +# 1 - compress +# 2 - get uncompress file name +# 3 - get compress file name +# return: array of new file name and archive format if changed otherwise original one +# +def compress_local_file(ifile, fmt = None, act = 0, logact = 0): + + ms = re.match(r'^(.+)\.({})'.format(CMPSTR), ifile) + if ms: + ofile = ms.group(1) + else: + ofile = ifile + + if fmt: + if act&1: + for ext in PGCMPS: + if re.search(r'(^|\.)({})(\.|$)'.format(ext), fmt, re.I): + ofile += '.' + ext + break + else: + ms = re.search(r'(^|\.)({})$'.format(CMPSTR), fmt, re.I) + if ms: fmt = re.sub(r'{}{}$'.format(ms.group(1), ms.group(2)), '', fmt, 1) + + if act < 2 and ifile != ofile: convert_files(ofile, ifile, 0, logact) + + return (ofile, fmt) + +# +# get file archive format from a givn file name; None if not found +# +def get_file_format(fname): + + ms = re.search(r'\.({})$'.format(TARSTR), fname, re.I) + if ms: return PGTARS[ms.group(1)][2] + + ms = re.search(r'\.({})$'.format(CMPSTR), fname, re.I) + if ms: return PGCMPS[ms.group(1)][2] + + return None + +# +# tar/untar mutliple local file into/from a single tar/tar.gz/tgz/zip file +# +# tfile: tar file name to be tar/untarred +# files: member file names in the tar file +# fmt: archive format (defaults to tar file name extension must be defined in PGTARS +# act: 0 - untar +# 1 - tar +# return: PgLOG.SUCCESS upon successful PgLOG.FAILURE otherwise +# +def tar_local_file(tfile, files, fmt, act, logact = 0): + + if not fmt: + ms = re.search(r'\.({})$'.format(TARSTR), tfile, re.I) + if ms: fmt = ms.group(1) + logact |= PgLOG.ERRLOG + + if not fmt: return PgLOG.pglog(tfile + ": Miss archive format", logact) + if fmt not in PGTARS: return PgLOG.pglog(tfile + ": unknown format fmt provided", logact) + tarray = PGTARS[fmt] + + if not act: #untar member files + cmd = "{} {}".format(tarray[1], tfile) + if files: cmd += ' ' + ' '.join(files) + else: + if not files: return PgLOG.pglog(tfile + ": Miss member file to archive", logact) + cmd = "{} {} {}".format(tarray[0], tfile, ' '.join(files)) + + return PgLOG.pgsystem(cmd, logact, 7) + +# +# get local file archive format by checking extension of given local file name +# +# file: local file name +# +def local_archive_format(file): + + ms = re.search(r'\.({})$'.format(CMPSTR), file) + if ms: + fmt = ms.group(1) + if re.search(r'\.tar\.{}$'.format(fmt), file): + return "TAR." + fmt.upper() + else: + return fmt.upper() + elif re.search(r'\.tar$', file): + return "TAR" + + return '' + +# +# local function to show message with full local file path +# +def lmsg(file, msg, logact = 0): + + if not op.isabs(file): file = PgLOG.join_paths(os.getcwd(), file) + + return errlog("{}: {}".format(file, msg), 'L', 1, logact) + +# +# check if given path is executable locally +# +# return PgLOG.SUCCESS if yes PgLOG.FAILURE if not +# +def check_local_executable(path, actstr = '', logact = 0): + + if os.access(path, os.W_OK): return PgLOG.SUCCESS + if check_local_accessible(path, actstr, logact): + if actstr: actstr += '-' + errlog("{}{}: Accessible, but Unexecutable on'{}'".format(actstr, path, PgLOG.PGLOG['HOSTNAME']), 'L', 1, logact) + + return PgLOG.FAILURE + + +# +# check if given path is writable locally +# +# return PgLOG.SUCCESS if yes PgLOG.FAILURE if not +# +def check_local_writable(path, actstr = '', logact = 0): + + if os.access(path, os.W_OK): return PgLOG.SUCCESS + if check_local_accessible(path, actstr, logact): + if actstr: actstr += '-' + errlog("{}{}: Accessible, but Unwritable on'{}'".format(actstr, path, PgLOG.PGLOG['HOSTNAME']), 'L', 1, logact) + + return PgLOG.FAILURE + +# +# check if given path is accessible locally +# +# return PgLOG.SUCCESS if yes, PgLOG.FAILURE if not +# +def check_local_accessible(path, actstr = '', logact = 0): + + if os.access(path, os.F_OK): return PgLOG.SUCCESS + if actstr: actstr += '-' + errlog("{}{}: Unaccessible on '{}'".format(actstr, path, PgLOG.PGLOG['HOSTNAME']), 'L', 1, logact) + return PgLOG.FAILURE + +# +# check if given webfile under PgLOG.PGLOG['DSSDATA'] is writable +# +# return PgLOG.SUCCESS if yes PgLOG.FAILURE if not +# +def check_webfile_writable(action, wfile, logact = 0): + + ms = re.match(r'^({}/\w+)'.format(PgLOG.PGLOG['DSSDATA']), wfile) + if ms: + return check_local_writable(ms.group(1), "{} {}".format(action, wfile), logact) + else: + return PgLOG.SUCCESS # do not need check + +# +# convert the one file to another via uncompress, move/copy, and/or compress +# +def convert_files(ofile, ifile, keep = 0, logact = 0): + + if ofile == ifile: return PgLOG.SUCCESS + oname = ofile + iname = ifile + + if keep: kfile = ifile + ".keep" + + oext = iext = None + for ext in PGCMPS: + if oext is None: + ms = re.match(r'^(.+)\.{}$'.format(ext), ofile) + if ms: + oname = ms.group(1) + oext = ext + if iext is None: + ms = re.match(r'^(.+)\.{}$'.format(ext), ifile) + if ms: + iname = ms.group(1) + iext = ext + + if iext and oext and oext == iext: + oext = iext = None + iname = ifile + oname = ofile + + if iext: # uncompress + if keep: + if iext == 'zip': + kfile = ifile + else: + local_copy_local(kfile, ifile, logact) + + if PgLOG.pgsystem("{} {}".format(PGCMPS[iext][1], ifile), logact, 5): + if iext == "zip": + path = op.dirname(iname) + if path and path != '.': move_local_file(iname, op.basename(iname), logact) + if not keep: delete_local_file(ifile, logact) + + if oname != iname: # move/copy + path = op.dirname(oname) + if path and not op.exists(path): make_local_directory(path, logact) + if keep and not op.exists(kfile): + local_copy_local(oname, iname, logact) + kfile = iname + else: + move_local_file(oname, iname, logact) + + if oext: # compress + if keep and not op.exists(kfile): + if oext == "zip": + kfile = oname + else: + local_copy_local(kfile, oname, logact) + + if oext == "zip": + path = op.dirname(oname) + if path: + if path != '.': path = change_local_directory(path, logact) + bname = op.basename(oname) + PgLOG.pgsystem("{} {}.zip {}".format(PGCMPS[oext][0], bname, bname), logact, 5) + if path != '.': change_local_directory(path, logact) + else: + PgLOG.pgsystem("{} {} {}".format(PGCMPS[oext][0], ofile, oname), logact, 5) + + if not keep and op.exists(ofile): delete_local_file(oname, logact) + else: + PgLOG.pgsystem("{} {}".format(PGCMPS[oext][0], oname), logact, 5) + + if keep and op.exists(kfile) and kfile != ifile: + if op.exist(ifile): + delete_local_file(kfile, logact) + else: + move_local_file(ifile, kfile, logact) + + if op.exists(ofile): + return PgLOG.SUCCESS + else: + return errlog("{}: ERROR convert from {}".format(ofile, ifile), 'L', 1, logact) + +# +# comapre two files from given two hash references to the file information +# return 0 if same, 1 different, -1 if can not compare +# +def compare_file_info(ainfo, binfo): + + if not (ainfo and binfo): return -1 # at least one is missing + + return (0 if (ainfo['data_size'] == binfo['data_size'] and + ainfo['date_modified'] == binfo['date_modified'] and + ainfo['time_modified'] == binfo['time_modified']) else 1) + +# +# get local_dirname +# +def get_local_dirname(file): + + dir = op.dirname(file) + if dir == '.': dir = os.getcwd() + + return dir + +# +# collect valid file names under a given directory, current directory if empty +# +def get_directory_files(dir = None, limit = 0, level = 0): + + files = [] + if dir: + if level == 0 and op.isfile(dir): + files.append(dir) + return files + dir += "/*" + else: + dir = "*" + + for file in glob.glob(dir): + if op.isdir(file): + if limit == 0 or (limit-level) > 0: + fs = get_directory_files(file, limit, level+1) + if fs: files.extend(fs) + else: + files.append(file) + + return files if files else None + +# +# reads a local file into a string and returns it +# +def read_local_file(file, logact = 0): + + try: + fd = open(file, 'r') + except Exception as e: + return errlog("{}: {}".format(file, str(e)), 'L', 1, logact) + else: + fstr = fd.read() + fd.close() + + return fstr + +# +# open a local file and return the file handler +# +def open_local_file(file, mode = 'r', logact = PgLOG.LOGERR): + + try: + fd = open(file, mode) + except Exception as e: + return errlog("{}: {}".format(file, str(e)), 'L', 1, logact) + + return fd + +# +# change absolute paths to relative paths +# +def get_relative_paths(files, cdir, logact = 0): + + cnt = len(files) + if cnt == 0: return files + if not cdir: cdir = os.getcwd() + + for i in range(cnt): + afile = files[i] + if op.isabs(afile): + files[i] = PgLOG.join_paths(afile, cdir, 1) + else: + PgLOG.pglog("{}: is not under the working directory '{}'".format(afile, cdir), logact) + + return files + +# +# check if the action to path is blocked +# +def check_block_path(path, act = '', logact = 0): + + blockpath = PgLOG.PGLOG['USRHOME'] + if not act: act = 'Copy' + + if re.match(r'^{}'.format(blockpath), path): + return PgLOG.pglog("{}: {} to {} is blocked".format(path, act, blockpath), logact) + else: + return 1 + +# +# join two filenames by uing the common prefix/suffix and keeping the different main bodies, +# the bodies are seprated by sep replace fext with text if provided +# +def join_filenames(name1, name2, sep = '-', fext = None, text = None): + + if fext: + name1 = remove_file_extention(name1, fext) + name2 = remove_file_extention(name2, fext) + + if name1 == name2: + fname = name1 + else: + fname = suffix = '' + cnt1 = len(name1) + cnt2 = len(name2) + cnt = (cnt1 if cnt1 < cnt2 else cnt2) + + # get common prefix + for pcnt in range(cnt): + if name1[pcnt] != name2[pcnt]: break + + # get common suffix + cnt -= pcnt + for scnt in range(0, cnt): + if name1[cnt1-scnt-1] != name2[cnt2-scnt-1]: break + + body1 = name1[pcnt:(cnt1-scnt)] + body2 = name2[pcnt:(cnt2-scnt)] + if scnt > 0: + suffix = name2[(cnt1-scnt):cnt1] + if name1[cnt1-scnt-1].isnumeric(): + ms = re.match(r'^([\d\.-]*\d)', suffix) + if ms: body1 += ms.group(1) # include trailing digit chrs to body1 + if pcnt > 0: + fname = name1[0:pcnt] + if name2[pcnt].isnumeric(): + ms = re.search(r'(\d[\d\.-]*)$', fname) + if ms: body2 = ms.group(1) + body2 # include leading digit chrs to body2 + + fname += body1 + sep + body2 + if suffix: fname += suffix + + if text: fname += "." + text + + return fname + +# remove given file extention if provided +# otherwise try to remove predfined compression extention in PGCMPS +def remove_file_extention(fname, fext): + + if not fname: return '' + + if fext: + fname = re.sub(r'\.{}$'.format(fext), '', fname, 1, re.I) + else: + for fext in PGCMPS: + mp = r'\.{}$'.format(fext) + if re.search(mp, fname): + fname = re.sub(mp, '', fname, 1, re.I) + break + + return fname + +# check if a previous down storage system is up now for given dflag +# +# return error message if failed checking, and None otherwise +# +def check_storage_down(dflag, dpath, dscheck, logact = 0): + + if dflag not in DHOSTS: + if logact: PgLOG.pglog(dflag + ": Unknown Down Flag for Storage Systems", logact) + return None + dhost = DHOSTS[dflag] + if not dpath and dflag in DPATHS: dpath = DPATHS[dflag] + for loop in range(2): + (stat, msg) = host_down_status(dpath, dhost, 1, logact) + if stat < 0: break # stop retry for planned down + + if not dscheck and PgLOG.PGLOG['DSCHECK']: dscheck = PgLOG.PGLOG['DSCHECK'] + if dscheck: + didx = dscheck['dflags'].find(dflag) + if msg: + if didx < 0: dscheck['dflags'] += dflag + else: + if didx > -1: dscheck['dflags'].replace(dflag, '', 1) + + return msg + +# +# check if previous down storage systems recorded in the dflags +# +# return an array of strings for storage systems that are still down, +# and empty array if all up +# +def check_storage_dflags(dflags, dscheck = None, logact = 0): + + if not dflags: return 0 + + isdict = isinstance(dflags, dict) + msgary = [] + for dflag in dflags: + msg = check_storage_down(dflag, dflags[dflag] if isdict else None, dscheck, logact) + if msg: msgary.append(msg) + + if not msgary: + if not dscheck and PgLOG.PGLOG['DSCHECK']: dscheck = PgLOG.PGLOG['DSCHECK'] + cidx = dscheck['cindex'] if dscheck else 0 + # clean dflags if the down storage systems are all up + if cidx: PgDBI.pgexec("UPDATE dscheck SET dflags = '' WHERE cindex = {}".format(cidx), logact) + + return msgary diff --git a/src/rda_python_common/PgGLBS.py b/src/rda_python_common/PgGLBS.py new file mode 100644 index 0000000..70a9fc0 --- /dev/null +++ b/src/rda_python_common/PgGLBS.py @@ -0,0 +1,198 @@ +# +############################################################################### +# +# Title : PgGLBS.py +# Author : Thomas Cram, tcram@ucar.edu +# Date : 12/10/2014 +# Purpose : python library module for Globus functions and utilities +# Changes : 10/10/2020, Zaihua Ji, zji@ucar.edu: +# converted from perl package to python module +# +# Work File : $DSSHOME/lib/python/PgGLBS.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### +# +import os +import re +import PgLOG +import PgUtil +from MyGlobus import MyGlobus, MyEndpoints, GLOBUS_REQUEST_DOMAIN +import PgDBI + +try: + from urllib.parse import urlencode +except: + from urllib import urlencode + +BFILES = {} # cache backup file names and dates for each bid + +# +# Remove the Globus share rule ID for a dsrqst share +# +def remove_globus_rid(ridx, dsid): + + if not ridx: return PgLOG.pglog("[remove_globus_rid] Request index is not defined", PgLOG.LOGWRN) + if not dsid: return PgLOG.pglog("[remove_globus_rid] Dataset ID is not defined", PgLOG.LOGWRN) + + cmd = "dsglobus" + args = "'-rp -ri {}'".format(ridx) + action = "RP" + host = "PBS" + workdir = "/glade/u/home/tcram" + opts = "'-l walltime=15:00'" + spec = "tcram" + check_cmd = "dscheck ac -cm {} -av {} -ds {} -an {} -hn {} -wd {} -sn {} -qs {} -md".format(cmd, args, dsid, action, host, workdir, spec, opts) + + PgLOG.pgsystem(check_cmd) + +# +# Submit a Globus transfer of the request output on behalf of the user +# +def submit_globus_transfer(ridx): + + # call dsglobus to submit transfer + cmd = "dsglobus -st -ri {}".format(ridx) + return PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 16) + +# +# check a RDA file is backed up or not for given file record; +# clear the cached bfile records if frec is None. +# return 0 if not yet, 1 if backed up, or -1 if backed up but modified +# +def file_backup_status(frec, chgdays = 1, logact = 0): + + if frec is None: + BFILES.clear() + return 0 + + bid = frec['bid'] + if not bid: return 0 + + fields = 'bfile, dsid, date_modified' + if chgdays > 0: fields += ', note' + if bid not in BFILES: BFILES[bid] = PgDBI.pgget('bfile', fields, 'bid = {}'.format(bid), logact) + brec = BFILES[bid] + if not brec: return 0 + + if 'sfile' in frec: + fname = frec['sfile'] + ftype = 'Saved' + else: + fname = frec['wfile'] + ftype = 'Web' + ret = 1 + fdate = frec['date_modified'] + bdate = brec['date_modified'] + if chgdays > 0 and PgUtil.diffdate(fdate, bdate) >= chgdays: + ret = -1 + if brec['note']: + mp = r'{}<:>{}<:>(\d+)<:>(\w+)<:>'.format(fname, frec['type']) + ms = re.search(mp, brec['note']) + if ms: + fsize = int(ms.group(1)) + cksum = ms.group(2) + if cksum and cksum == frec['checksum'] or not cksum and fsize == frec['data_size']: + ret = 1 + + if logact: + if ret == 1: + msg = "{}-{}: {} file backed up to /{}/{} by {}".format(frec['dsid'], fname, ftype, brec['dsid'], brec['bfile'], bdate) + else: + msg = "{}-{}: {} file changed on {}".format(frec['dsid'], fname, ftype, fdate) + PgLOG.pglog(msg, logact) + + return ret + +#========================================================================================= +def get_request_file_url(rfile, rpath=None, logact=0): + """ Returns the URL for a request file + Input arguments: + rfile = request file + dsid = dataset ID (dsnnn.n) + rpath = path to request file, relatvie to RDA data base path (e.g. '/dsrqst//' + """ + domain = GLOBUS_REQUEST_DOMAIN + + if not rpath: + try: + cond = "wfile='{}'".format(rfile) + wfrqst = PgDBI.pgget('wfrqst', 'rindex', cond, logact) + except: + msg = "[get_request_file_url] Problem getting rindex for request file {}".format(rfile) + PgLOG.pglog(msg) + if not wfrqst: + raise TypeError("Request file {} not found in table 'wfrqst'".format(rfile)) + rpath = get_request_path(wfrqst['rindex'], logact=0) + + if (rpath.find('/',0,1) != -1): + rpath = rpath.replace('/','',1) + + url = os.path.join(domain, rpath, rfile) + return url + +#========================================================================================= +def get_request_path(rindex, logact=0): + """ Returns relative path to request file + Example: '/dsrqst//' + """ + try: + fields = 'rqstid, location' + cond = 'rindex={}'.format(rindex) + rqst_info = PgDBI.pgget('dsrqst', fields, cond, logact) + except: + msg = "[get_request_path] Problem getting info for request index {}".format(rindex) + PgLOG.pglog(msg) + if not rqst_info: + raise TypeError("Request index {} not found in RDADB".format(rindex)) + + if rqst_info['location']: + base_path = MyGlobus['data_request_endpoint_base'] + loc = rqst_info['location'] + loc = loc.rstrip("/") + if (loc.find(base_path) != -1): + path_len = len(base_path) + path = "/{0}/".format(loc[path_len:]) + else: + path = "/" + else: + path = "/dsrqst/{0}/".format(rqst_info['rqstid']) + + return path + +#========================================================================================= +def get_guest_collection_url(dsid=None, locflag=None, rindex=None, logact=0): + """ Returns the URL for the guest collection endpoint in the Globus File Manager. + Either dataset ID (dsid) or request index (rindex) is required. If neither + dsid or rindex are provided, the default URL returned is the top level URL for + the 'NCAR RDA Dataset Archive' guest collection. + + Optional argument locflag = location flag of dataset ('G' = glade, 'O' = stratus, + 'B' = both glade and stratus, 'C' = CGD data under /glade/campaign/cgd/cesm) + """ + + if rindex: + origin_id = MyEndpoints['rda#data_request'] + origin_path = get_request_path(rindex, logact=logact) + elif dsid: + if not locflag: + cond = "dsid='{}'".format(dsid) + pgloc = PgDBI.pgget('dataset', 'locflag', cond, logact) + locflag = pgloc['locflag'] + if locflag == 'C': + origin_id = MyEndpoints['rda-cgd'] + origin_path = "/" + elif locflag == 'O' or locflag == 'B': + origin_id = MyEndpoints['rda-stratus'] + origin_path = "/{}/".format(dsid) + else: + origin_id = MyEndpoints['rda#datashare'] + origin_path = "/{}/".format(dsid) + else: + origin_id = MyEndpoints['rda#datashare'] + origin_path = "/" + + params = {'origin_id': origin_id, 'origin_path': origin_path} + url = '{0}?{1}'.format(MyGlobus['globus_share_url'], urlencode(params)) + + return url diff --git a/src/rda_python_common/PgLOG.py b/src/rda_python_common/PgLOG.py new file mode 100644 index 0000000..28efdb4 --- /dev/null +++ b/src/rda_python_common/PgLOG.py @@ -0,0 +1,1631 @@ +# +############################################################################### +# +# Title : PgLOG.py -- Module for logging messages. +# Author : Zaihua Ji, zji@ucar.edu +# Date : 03/02/2016 +# Purpose : Python library module to log message and also do other things +# according to the value of logact, like display the error +# message on screen and exit script +# +# Work File : $DSSHOME/lib/python/PgLOG.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### + +import sys +import os +import re +import pwd +import grp +import shlex +from subprocess import Popen, PIPE +from os import path as op +import time +import socket +import shutil +import traceback + +# define some constants for logging actions +MSGLOG = (0x00001) # logging message +WARNLG = (0x00002) # show logging message as warning +EXITLG = (0x00004) # exit after logging +LOGWRN = (0x00003) # MSGLOG|WARNLG +LOGEXT = (0x00005) # MSGLOG|EXITLG +WRNEXT = (0x00006) # WARNLG|EXITLG +LGWNEX = (0x00007) # MSGLOG|WARNLG|EXITLG +EMLLOG = (0x00008) # append message to email buffer +LGWNEM = (0x0000B) # MSGLOG|WARNLG|EMLLOG +LWEMEX = (0x0000F) # MSGLOG|WARNLG|EMLLOG|EXITLG +ERRLOG = (0x00010) # error log only, output to STDERR +LOGERR = (0x00011) # MSGLOG|ERRLOG +LGEREX = (0x00015) # MSGLOG|ERRLOG|EXITLG +LGEREM = (0x00019) # MSGLOG|ERRLOG|EMLLOG +DOLOCK = (0x00020) # action to lock table record(s) +ENDLCK = (0x00040) # action to end locking table record(s) +AUTOID = (0x00080) # action to retrieve the last auto added id +DODFLT = (0x00100) # action to set empty values to default ones +SNDEML = (0x00200) # action to send email now +RETMSG = (0x00400) # action to return the message back +FRCLOG = (0x00800) # force logging message +SEPLIN = (0x01000) # add a separating line for email/STDOUT/STDERR +BRKLIN = (0x02000) # add a line break for email/STDOUT/STDERR +EMLTOP = (0x04000) # prepend message to email buffer +RCDMSG = (0x00814) # make sure to record logging message +MISLOG = (0x00811) # cannot access logfile +EMLSUM = (0x08000) # record as email summary +EMEROL = (0x10000) # record error as email only +EMLALL = (0x1D208) # all email acts +DOSUDO = (0x20000) # add 'sudo -u PGLOG['RDAUSER']' +NOTLOG = (0x40000) # do not log any thing +OVRIDE = (0x80000) # do override existing file or record +NOWAIT = (0x100000) # do not wait on globus task to finish +ADDTBL = (0x200000) # action to add a new table if it does not exist +SKPTRC = (0x400000) # action to skip tracing when log errors +UCNAME = (0x800000) # action to change query field names to upper case +UCLWEX = (0x800015) # UCNAME|MSGLOG|WARNLG|EXITLG + +SUCCESS = 1 # Successful function call +FINISH = 2 # go through a function, including time out +FAILURE = 0 # Unsuccessful function call + +PGLOG = { # more defined in untaint_suid() with environment variables + 'EMLADDR' : '', + 'CCDADDR' : '', + 'SEPLINE' : "===========================================================\n", + 'TWOGBS' : 2147483648, + 'ONEGBS' : 1073741824, + 'MINSIZE' : 100, # minimal file size in bytes to be valid + 'LOGMASK' : (0xFFFFFF), # log mask to turn off certain log action bits + 'BCKGRND' : 0, # background process flag -b + 'ERRCNT' : 0, # record number of errors for email + 'ERRMSG' : '', # record error message for email + 'SUMMSG' : '', # record summary message for email + 'EMLMSG' : '', # record detail message for email + 'PRGMSG' : '', # record progressing message for email, replaced each time + 'GMTZ' : 0, # 0 - use local time, 1 - use greenwich mean time + 'NOLEAP' : 0, # 1 - skip 29 of Feburary while add days to date + 'GMTDIFF' : 6, # gmt is 6 hours ahead of us + 'CURUID' : None, # the login name who executes the program + 'SETUID' : '', # the login name for suid if it is different to the CURUID + 'FILEMODE': 0o664, # default 8-base file mode + 'EXECMODE': 0o775, # default 8-base executable file mode or directory mode + 'ARCHHOST': "hpss", # change to hpss from mss + 'ARCHROOT': "/FS/DECS", # root path for segregated tape on hpss + 'BACKROOT': "/DRDATA/DECS", # backup path for desaster recovering tape on hpss + 'OLDAROOT': "/FS/DSS", # old root path on hpss + 'OLDBROOT': "/DRDATA/DSS", # old backup tape on hpss + 'RDAUSER' : "rdadata", # common rda user name + 'RDAEMAIL' : "zji", # specialist to receipt email intead of common rda user name + 'SUDORDA' : 0, # 1 to allow sudo to PGLOG['RDAUSER'] + 'HOSTNAME' : '', # current host name the process in running on + 'OBJCTSTR' : "object", + 'BACKUPNM' : "quasar", + 'DRDATANM' : "drdata", + 'GPFSNAME' : "glade", + 'SLMNAME' : "SLURM", + 'PBSNAME' : "PBS", + 'DSIDCHRS' : "d", + 'DOSHELL' : False, + 'NEWDSID' : True, + 'BCHHOSTS' : "SLURM:PBS", + 'HOSTTYPE' : 'dav', # default HOSTTYPE + 'EMLMAX' : 256, # up limit of email line count + 'PGBATCH' : '', # current batch service name, SLURM or PBS + 'PGBINDIR' : '', + 'SLMTIME' : 604800, # max runtime for SLURM bath job, (7x24x60x60 seconds) + 'PBSTIME' : 86400, # max runtime for SLURM bath job, (7x24x60x60 seconds) + 'MSSGRP' : None, # set if set to different HPSS group + 'RDAGRP' : "decs", + 'DSCHECK' : None, # carry some cached dscheck information + 'PGDBBUF' : None, # reference to a connected database object + 'HPSSLMT' : 10, # up limit of HPSS streams + 'NOQUIT' : 0, # do not quit if this flag is set for daemons + 'DBRETRY' : 2, # db retry count after error + 'TIMEOUT' : 15, # default timeout (in seconds) for tosystem() + 'CMDTIME' : 120, # default command time (in seconds) for pgsystem() to record end time + 'SYSERR' : None, # cache the error message generated inside pgsystem() + 'ERR2STD' : [], # if non-empty reference to array of strings, change stderr to stdout if match + 'STD2ERR' : [], # if non-empty reference to array of strings, change stdout to stderr if match + 'MISSFILE': "No such file or directory", + 'GITHUB' : "https://github.com" # github server +} + +HOSTTYPES = { + 'rda' : 'dsg_mach', + 'casper' : 'dav', + 'crhtc' : 'dav', + 'cron' : 'dav', + 'cheyenne' : 'ch', + 'chadmin' : 'ch' +} + +CPID = { + 'PID' : "", + 'CTM' : int(time.time()), + 'CMD' : "", + 'CPID' : "", +} + +BCHCMDS = {'PBS' : 'qsub', 'SLURM' : 'sbatch'} + +# global dists to cashe information +COMMANDS = {} +SLMHOSTS = [] +SLMSTATS = {} +PBSHOSTS = [] +PBSSTATS = {} + +# +# get time string in format YYMMDDHHNNSS for given ctime; or current time if ctime is 0 +# +def current_datetime(ctime = 0): + + if PGLOG['GMTZ']: + dt = time.gmtime(ctime) if ctime else time.gmtime() + else: + dt = time.localtime(ctime) if ctime else time.localtime() + + return "{:02}{:02}{:02}{:02}{:02}{:02}".format(dt[0], dt[1], dt[2], dt[3], dt[4], dt[5]) + +# +# get an environment variable and untaint it +# +def get_environment(name, default = None, logact = 0): + + env = os.getenv(name, default) + if env is None and logact: + pglog(name + ": Environment variable is not defined", logact) + + return env + +# +# cache the msg string to global email entries for later call of send_email() +# +def set_email(msg, logact = 0): + + if logact and msg: + if logact&EMLTOP: + if PGLOG['PRGMSG']: + msg = PGLOG['PRGMSG'] + "\n" + msg + PGLOG['PRGMSG'] = "" + if PGLOG['ERRCNT'] == 0: + if not re.search(r'\n$', msg): msg += "!\n" + else: + if PGLOG['ERRCNT'] == 1: + msg += " with 1 Error:\n" + else: + msg += " with {} Errors:\n".format(PGLOG['ERRCNT']) + msg += break_long_string(PGLOG['ERRMSG'], 512, None, PGLOG['EMLMAX']/2, None, 50, 25) + PGLOG['ERRCNT'] = 0 + PGLOG['ERRMSG'] = '' + + if PGLOG['SUMMSG']: + msg += PGLOG['SEPLINE'] + if PGLOG['SUMMSG']: msg += "Summary:\n" + msg += break_long_string(PGLOG['SUMMSG'], 512, None, PGLOG['EMLMAX']/2, None, 50, 25) + + if PGLOG['EMLMSG']: + msg += PGLOG['SEPLINE'] + if PGLOG['SUMMSG']: msg += "Detail Information:\n" + + PGLOG['EMLMSG'] = msg + break_long_string(PGLOG['EMLMSG'], 512, None, PGLOG['EMLMAX'], None, 50, 40) + PGLOG['SUMMSG'] = "" # in case not + else: + if logact&ERRLOG: # record error for email summary + PGLOG['ERRCNT'] += 1 + PGLOG['ERRMSG'] += "{}. {}".format(PGLOG['ERRCNT'], msg) + elif logact&EMLSUM: + if PGLOG['SUMMSG']: + if logact&BRKLIN: PGLOG['SUMMSG'] += "\n" + if logact&SEPLIN: PGLOG['SUMMSG'] += PGLOG['SEPLINE'] + PGLOG['SUMMSG'] += msg # append + + if logact&EMLLOG: + if PGLOG['EMLMSG']: + if logact&BRKLIN: PGLOG['EMLMSG'] += "\n" + if logact&SEPLIN: PGLOG['EMLMSG'] += PGLOG['SEPLINE'] + PGLOG['EMLMSG'] += msg # append + elif msg is None: + PGLOG['EMLMSG'] = "" + +# +# retrieve the cached email message +# +def get_email(): + return PGLOG['EMLMSG'] + +# +# send a customized email with all entries included +# +def send_customized_email(logmsg, emlmsg, logact = 0): + + entries = { + 'fr' : ["From", 1, None], + 'to' : ["To", 1, None], + 'cc' : ["Cc", 0, None], + 'sb' : ["Subject", 1, None] + } + + if logmsg: + logmsg += ': ' + else: + logmsg = '' + for ekey in entries: + entry = entries[ekey][0] + if re.search(r'(^|\n){}:\s*\n'.format(entry), emlmsg, re.I): + ms = None + else: + ms = re.search(r'(^|\n){}:\s*(.+)\n'.format(entry), emlmsg, re.I) + if ms: + entries[ekey][2] = ms.group(2) + elif logact and entries[ekey][1]: + return pglog("{}Missing Entry '{}' for sending email".format(logmsg, entry), logact|ERRLOG) + + logmsg += "Email " + entries['to'][2] + if entries['cc'][2]: logmsg += " Cc'd " + entries['cc'][2] + logmsg += " Subject: " + entries['sb'][2] + + if pgsystem(PGLOG['EMLSEND'], logact, 4, emlmsg): + log_email(emlmsg) + if logact: pglog(logmsg, logact&(~EXITLG)) + return SUCCESS + else: + errmsg = "Error sending email: " + logmsg + return pglog(logmsg, (logact|ERRLOG)&~EXITLG) + +# +# send an email, if empty msg; send email message saved in PGLOG['EMLMSG'] instead +# +def send_email(subject = None, receiver = None, msg = None, sender = None, logact = 0): + + if not msg: + if PGLOG['EMLMSG']: + msg = PGLOG['EMLMSG'] + PGLOG['EMLMSG'] = '' + else: + return '' + + docc = 0 + if not sender: + sender = PGLOG['CURUID'] + if sender != PGLOG['RDAUSER']: docc = 1 + if sender == PGLOG['RDAUSER']: sender = PGLOG['RDAEMAIL'] + if sender.find('@') == -1: sender += "@ucar.edu" + if not receiver: + receiver = PGLOG['EMLADDR'] if PGLOG['EMLADDR'] else PGLOG['CURUID'] + if receiver == PGLOG['RDAUSER']: receiver = PGLOG['RDAEMAIL'] + if receiver.find('@') == -1: receiver += "@ucar.edu" + + if docc and not re.match(PGLOG['RDAUSER'], sender): add_carbon_copy(sender, 1) + + emlmsg = "From: {}\nTo: {}\n".format(sender, receiver) + logmsg = "Email " + receiver + if PGLOG['CCDADDR']: + emlmsg += "Cc: {}\n".format(PGLOG['CCDADDR']) + logmsg += " Cc'd " + PGLOG['CCDADDR'] + if not subject: subject = "Message from {}-{}".format(PGLOG['HOSTNAME'], get_command()) + if not re.search(r'!$', subject): subject += '!' + emlmsg += "Subject: {}\n{}\n".format(subject, msg) + if CPID['CPID']: logmsg += " in " + CPID['CPID'] + logmsg += ", Subject: {}\n".format(subject) + + if pgsystem(PGLOG['EMLSEND'], logact, 4, emlmsg): + log_email(emlmsg) + if logact: pglog(logmsg, logact&~EXITLG) + return logmsg + else: + errmsg = "Error sending email: " + logmsg + pglog(logmsg, (logact|ERRLOG)&~EXITLG) + return errmsg + +# +# log email sent +# +def log_email(emlmsg): + + if not CPID['PID']: CPID['PID'] = "{}-{}-{}".format(PGLOG['HOSTNAME'], get_command(), PGLOG['CURUID']) + cmdstr = "{} {} at {}\n".format(CPID['PID'], break_long_string(CPID['CMD'], 40, "...", 1), current_datetime()) + fn = "{}/{}".format(PGLOG['LOGPATH'], PGLOG['EMLFILE']) + f = open(fn, 'a') + f.write(cmdstr + emlmsg) + f.close() + +# +# Function: cmdlog(cmdline) +# cmdline - program name and all arguments +# ctime - time (in seconds) when the command starts +# +def cmdlog(cmdline = None, ctime = 0, logact = None): + + if logact is None: logact = MSGLOG|FRCLOG + if not ctime: ctime = int(time.time()) + + if not cmdline or re.match('(end|quit|exit|abort)', cmdline, re.I): + cmdline = cmdline.capitalize() if cmdline else "Ends" + cinfo = cmd_execute_time("{} {}".format(CPID['PID'], cmdline), (ctime - CPID['CTM'])) + ": " + if CPID['CPID']: cinfo += CPID['CPID'] + " <= " + cinfo += break_long_string(CPID['CMD'], 40, "...", 1) + if logact: pglog(cinfo, logact) + else: + cinfo = current_datetime(ctime) + if re.match(r'CPID \d+', cmdline): + CPID['PID'] = "{}({})-{}{}".format(PGLOG['HOSTNAME'], os.getpid(), PGLOG['CURUID'], cinfo) + if logact: pglog("{}: {}".format(CPID['PID'], cmdline), logact) + CPID['CPID'] = cmdline + elif CPID['PID'] and re.match(r'(starts|catches) ', cmdline): + if logact: pglog("{}: {} at {}".format(CPID['PID'], cmdline, cinfo), logact) + else: + CPID['PID'] = "{}({})-{}{}".format(PGLOG['HOSTNAME'], os.getpid(), PGLOG['CURUID'], cinfo) + if logact: pglog("{}: {}".format(CPID['PID'], cmdline), logact) + CPID['CMD'] = cmdline + + CPID['CTM'] = ctime + +# +# Function: pglog(msg, logact) return FAILURE or log message if not exit +# msg -- message to log +# locact -- logging actions: MSGLOG, WARNLG, ERRLOG, EXITLG, EMLLOG, & SNDEML +# +# log and display message/error and exit program according logact value +# +def pglog(msg, logact = MSGLOG): + + retmsg = None + logact &= PGLOG['LOGMASK'] # filtering the log actions + if logact&RCDMSG: logact |= MSGLOG + if PGLOG['NOQUIT']: logact &= ~EXITLG + if logact&EMEROL: + if logact&EMLLOG: logact &= ~EMLLOG + if not logact&ERRLOG: logact &= ~EMEROL + + if msg: msg = msg.lstrip() # remove leading whitespaces for logging message + + if logact&EXITLG: + ext = "Exit 1 in {}\n".format(os.getcwd()) + if not msg: + msg = ext + else: + msg = msg.rstrip() + msg += "; " + ext + else: + if msg and not re.search(r'(\n|\r)$', msg): msg += "\n" + if logact&RETMSG: retmsg = msg + + if logact&EMLALL: + if logact&SNDEML or not msg: + title = (msg if msg else "Message from {}-{}".format(PGLOG['HOSTNAME'], get_command())) + msg = send_email(title.rstrip()) + elif msg: + set_email(msg, logact) + + if not msg: return (retmsg if retmsg else FAILURE) + + if logact&EXITLG and (PGLOG['EMLMSG'] or PGLOG['SUMMSG'] or PGLOG['ERRMSG'] or PGLOG['PRGMSG']): + if not logact&EMLALL: set_email(msg, logact) + title = "ABORTS {}-{}".format(PGLOG['HOSTNAME'], get_command()) + set_email((("ABORTS " + CPID['PID']) if CPID['PID'] else title), EMLTOP) + msg += send_email(title) + + if logact&LOGERR: # make sure error is always logged + msg = break_long_string(msg) + if logact&(ERRLOG|EXITLG): + cmdstr = get_error_command(int(time.time()), logact) + msg = cmdstr + msg + + if not logact&NOTLOG: + if logact&ERRLOG: + if not PGLOG['ERRFILE']: PGLOG['ERRFILE'] = re.sub(r'.log$', '.err', PGLOG['LOGFILE']) + ERR = open("{}/{}".format(PGLOG['LOGPATH'], PGLOG['ERRFILE']), 'a') + ERR.write(msg) + if not logact&(EMLALL|SKPTRC): ERR.write(get_call_trace()) + ERR.close() + if logact&EXITLG: + LOG = open("{}/{}".format(PGLOG['LOGPATH'], PGLOG['LOGFILE']), "a") + LOG.write(cmdstr) + LOG.close() + else: + LOG = open("{}/{}".format(PGLOG['LOGPATH'], PGLOG['LOGFILE']), 'a') + LOG.write(msg) + LOG.close() + + if not PGLOG['BCKGRND'] and logact&(ERRLOG|WARNLG): + OUT = sys.stderr if logact&(ERRLOG|EXITLG) else sys.stdout + if logact&BRKLIN: OUT.write("\n") + if logact&SEPLIN: OUT.write(PGLOG['SEPLINE']) + OUT.write(msg) + + + if logact&EXITLG: + pgexit(1) + else: + return (retmsg if retmsg else FAILURE) + +# +# check and disconnet database before exit +# +def pgexit(stat = 0): + + if PGLOG['PGDBBUF']: PGLOG['PGDBBUF'].close() + sys.exit(stat) + +# +# get a command string for error log dump +# +def get_error_command(ctime, logact): + + if not CPID['PID']: CPID['PID'] = "{}-{}-{}".format(PGLOG['HOSTNAME'], get_command(), PGLOG['CURUID']) + cmdstr = "{} {}".format((("ABORTS" if logact&ERRLOG else "QUITS") if logact&EXITLG else "ERROR"), CPID['PID']) + cmdstr = cmd_execute_time(cmdstr, (ctime - CPID['CTM'])) + if CPID['CPID']: cmdstr += " {} <=".format(CPID['CPID']) + cmdstr += " {} at {}\n".format(break_long_string(CPID['CMD'], 40, "...", 1), current_datetime(ctime)) + + return cmdstr + +# +# get call trace track +# +def get_call_trace(cut = 1): + + t = traceback.extract_stack() + n = len(t) - cut + str = '' + sep = 'Trace: ' + for i in range(n): + tc = t[i] + str += "{}{}({}){}".format(sep, tc[0], tc[1], ("" if tc[2] == '' else "{%s()}" % tc[2])) + if i == 0: sep = '=>' + + return str + "\n" if str else "" + +# +# log message, msg, for degugging processes according to the debug level +# +def pgdbg(level, msg = None, do_trace = True): + + if not PGLOG['DBGLEVEL']: return # no further action + + if not isinstance(level, int): + ms = re.match(r'^(\d+)', level) + level = int(ms.group(1)) if ms else 0 + + levels = [0, 0] + if isinstance(PGLOG['DBGLEVEL'], int): + levels[1] = PGLOG['DBGLEVEL'] + else: + ms = re.match(r'^(\d+)$', PGLOG['DBGLEVEL']) + if ms: + levels[1] = int(ms.group(1)) + else: + ms = re.match(r'(\d*)-(\d*)', PGLOG['DBGLEVEL']) + if ms: + levels[0] = int(ms.group(1)) if ms.group(1) else 0 + levels[1] = int(ms.group(2)) if ms.group(2) else 9999 + + if level > levels[1] or level < levels[0]: return # debug level is out of range + + if 'DBGPATH' in PGLOG: + dfile = PGLOG['DBGPATH'] + '/' + PGLOG['DBGFILE'] + else: + dfile = PGLOG['DBGFILE'] + if not msg: + pglog("Append debug Info (levels {}-{}) to {}".format(levels[0], levels[1], dfile), WARNLG) + msg = "DEBUG for " + CPID['PID'] + " " + if CPID['CPID']: msg += CPID['CPID'] + " <= " + msg += break_long_string(CPID['CMD'], 40, "...", 1) + + # logging debug info + DBG = open(dfile, 'a') + DBG.write("{}:{}\n".format(level, msg)) + if do_trace: DBG.write(get_call_trace()) + DBG.close() + +# +# return trimed string (strip leading and trailling spaces); remove comments led by '#' if rmcmt > 0 +# +def pgtrim(line, rmcmt = 1): + + if line: + if rmcmt: + if re.match(r'^\s*#', line): # comment line + line = '' + elif rmcmt > 1: + ms = re.search(r'^(.+)\s\s+\#', line) + if ms: line = ms.group(1) # remove comment and its leading whitespaces + else: + ms = re.search(r'^(.+)\s+\#', line) + if ms: line = ms.group(1) # remove comment and its leading whitespace + + line = line.strip() # remove leading and trailing whitespaces + + return line + +# +# Function: show_usage(progname: Perl program name to get file "progname.usg") +# +# show program usage in file "PGLOG['PUSGDIR']/progname.usg" on screen with unix +# system function 'pg', exit program when done. +# +def show_usage(progname, opts = None): + + usgname = PGLOG['PUSGDIR'] + '/' + progname + '.usg' + if opts: + # show usage for individual option of dsarch + for opt in opts: + if opts[opt][0] == 0: + msg = "Mode" + elif opts[opt][0] == 1: + msg = "Single-Value Information" + elif opts[opt][0] == 2: + msg = "Multi-Value Information" + else: + msg = "Action" + + sys.stdout.write("\nDescription of {} Option -{}:\n".format(msg, opt)) + IN = open(usgname, 'r') + nilcnt = begin = 0 + for line in IN: + if begin == 0: + rx = " -{} or -".format(opt) + if re.match(rx, line): begin = 1 + elif re.match(r'^\s*$', line): + if nilcnt: break + nilcnt = 1 + else: + if re.match(r'\d[\.\s\d]', line): break # section title + if nilcnt and re.match(r' -\w\w or -', line): break + nilcnt = 0 + if begin: sys.stdout.write(line) + IN.close() + else: + os.system("more " + usgname) + + pgexit(0) + +# +# compare error message to patterns saved in PGLOG['ERR2STD'] +# return 1 if matched; 0 otherwise +# +def err2std(line): + + for err in PGLOG['ERR2STD']: + if line.find(err) > -1: return 1 + return 0 + +# +# compare message to patterns saved in PGLOG['STD2ERR'] +# return 1 if matched; 0 otherwise +# +def std2err(line): + + for out in PGLOG['STD2ERR']: + if line.find(out) > -1: return 1 + return 0 + +# +# Function: pgsystem(pgcmd, logact, cmdopt, instr) +# pgcmd - Linux system command, can be a string, "ls -l", or a list, ['ls', '-l'] +# logact - logging action option, defaults to PgLOG.LOGWRN +# cmdopt - command control option, default to 5 (1+4) +# 0 - no command control, +# 1 - log pgcmd (include the sub command calls), +# 2 - log standard output, +# 4 - log error output +# 7 - log all (pgcmd, and standard/error outputs), +# 8 - log command with time, +# 16 - return standard output message upon success +# 32 - log error as standard output +# 64 - force returning FAILURE if called process aborts +# 128 - tries 2 times for failed command before quits +# 256 - cache standard error message +# 512 - log instr & seconds with pgcmd if cmdopt&1 +# 1024 - turn on shell +# instr - input string passing to the command via stdin if not None +# seconds - number of seconds to wait for a timeout process if > 0 +# +def pgsystem(pgcmd, logact = LOGWRN, cmdopt = 5, instr = None, seconds = 0): + + ret = SUCCESS + if not pgcmd: return ret # empty command + + act = logact&~EXITLG + if act&ERRLOG: + act &= ~ERRLOG + act |= WARNLG + + if act&MSGLOG: act |= FRCLOG # make sure system calls always logged + cmdact = act if cmdopt&1 else 0 + doshell = True if cmdopt&1024 else PGLOG['DOSHELL'] + + if isinstance(pgcmd, str): + cmdstr = pgcmd + if not doshell and re.search(r'[*?<>|;]', pgcmd): doshell = True + execmd = pgcmd if doshell else shlex.split(pgcmd) + else: + cmdstr = shlex.join(pgcmd) + execmd = cmdstr if doshell else pgcmd + + if cmdact: + if cmdopt&8: + cmdlog("starts '{}'".format(cmdstr), None, cmdact) + else: + pglog("> " + cmdstr, cmdact) + if cmdopt&512 and (instr or seconds): + msg = '' + if seconds: msg = 'Timeout = {} Seconds'.format(seconds) + if instr: msg += ' With STDIN:\n' + instr + if msg: pglog(msg, cmdact) + stdlog = act if cmdopt&2 else 0 + cmdflg = cmdact|stdlog + abort = -1 if cmdopt&64 else 0 + loops = 2 if cmdopt&128 else 1 + PGLOG['SYSERR'] = error = retbuf = outbuf = errbuf = '' + for loop in range(1, loops+1): + last = time.time() + try: + if instr: + FD = Popen(execmd, shell=doshell, stdout=PIPE, stderr=PIPE, stdin=PIPE) + if seconds: + outbuf, errbuf = FD.communicate(input=instr.encode(), timeout=seconds) + else: + outbuf, errbuf = FD.communicate(input=instr.encode()) + else: + FD = Popen(execmd, shell=doshell, stdout=PIPE, stderr=PIPE) + if seconds: + outbuf, errbuf = FD.communicate(timeout=seconds) + else: + outbuf, errbuf = FD.communicate() + except TimeoutError as e: + errbuf = str(e) + FD.kill() + ret = FAILURE + except Exception as e: + errbuf = str(e) + ret = FAILURE + else: + ret = FAILURE if FD.returncode else SUCCESS + if isinstance(outbuf, bytes): outbuf = str(outbuf, errors='replace') + if isinstance(errbuf, bytes): errbuf = str(errbuf, errors='replace') + + if errbuf and cmdopt&32: + outbuf += errbuf + if cmdopt&256: PGLOG['SYSERR'] = errbuf + errbuf = '' + + if outbuf: + lines = outbuf.split('\n') + for line in lines: + line = strip_output_line(line.strip()) + if not line: continue + if PGLOG['STD2ERR'] and std2err(line): + if cmdopt&260: error += line + "\n" + if abort == -1 and re.match('ABORTS ', line): abort = 1 + else: + if re.match(r'^>+ ', line): + line = '>' + line + if cmdflg: pglog(line, cmdflg) + elif stdlog: + pglog(line, stdlog) + if cmdopt&16: retbuf += line + "\n" + + if errbuf: + lines = errbuf.split('\n') + for line in lines: + line = strip_output_line(line.strip()) + if not line: continue + if PGLOG['ERR2STD'] and err2std(line): + if stdlog: pglog(line, stdlog) + if cmdopt&16: retbuf += line + "\n" + else: + if cmdopt&260: error += line + "\n" + if abort == -1 and re.match('ABORTS ', line): abort = 1 + + if ret == SUCCESS and abort == 1: ret = FAILURE + end = time.time() + last = end - last + + if error: + if ret == FAILURE: + error = "Error Execute: {}\n{}".format(cmdstr, error) + else: + error = "Error From: {}\n{}".format(cmdstr, error) + + if loop > 1: error = "Retry " + if cmdopt&256: PGLOG['SYSERR'] += error + if cmdopt&4: + errlog = (act|ERRLOG) + if ret == FAILURE and loop >= loops: errlog |= logact + pglog(error, errlog) + + if last > PGLOG['CMDTIME'] and not re.search(r'(^|/|\s)(dsarch|dsupdt|dsrqst|rdacp|rdasub)\s', cmdstr): + cmdstr = "> {} Ends By {}".format(break_long_string(cmdstr, 100, "...", 1), current_datetime()) + cmd_execute_time(cmdstr, last, cmdact) + + if ret == SUCCESS or loop >= loops: break + time.sleep(6) + + if ret == FAILURE and retbuf and cmdopt&272 == 272: + if PGLOG['SYSERR']: PGLOG['SYSERR'] += '\n' + PGLOG['SYSERR'] += retbuf + retbuf = '' + + return (retbuf if cmdopt&16 else ret) + +# +# strip carrage return '\r', but keep ending newline '\n' +# +def strip_output_line(line): + + ms = re.search(r'\r([^\r]+)\r*$', line) + if ms: return ms.group(1) + + ms = re.search(r'\s\.+\s+(\d+)%\s+', line) + if ms and int(ms.group(1)) != 100: return None + + return line + +# +# show command running time string formated by seconds_to_string_time() +# +def cmd_execute_time(cmdstr, last, logact = None): + + msg = cmdstr + + if last >= PGLOG['CMDTIME']: # show running for at least one minute + msg += " ({})".format(seconds_to_string_time(last)) + + if logact: + return pglog(msg, logact) + else: + return msg + +# +# convert given seconds to string time with units of S-Second,M-Minute,H-Hour,D-Day +# +def seconds_to_string_time(seconds, showzero = 0): + + msg = '' + s = m = h = 0 + + if seconds > 0: + s = seconds%60 # seconds (0-59) + minutes = int(seconds/60) # total minutes + m = minutes%60 # minutes (0-59) + if minutes >= 60: + hours = int(minutes/60) # total hours + h = hours%24 # hours (0-23) + if hours >= 24: + msg += "{}D".format(int(hours/24)) # days + if h: msg += "{}H".format(h) + if m: msg += "{}M".format(m) + if s: + msg += "%dS"%(s) if isinstance(s, int) else "{:.3f}S".format(s) + elif showzero: + msg = "0S" + + return msg + +# +# wrap function to call pgsystem() with a timeout control +# return FAILURE if error eval or time out +# +def tosystem(cmd, timeout = 0, logact = LOGWRN, cmdopt = 5, instr = None): + + if not timeout: timeout = PGLOG['TIMEOUT'] # set default timeout if missed + return pgsystem(cmd, logact, cmdopt, instr, timeout) + +# +# insert breaks, default to '\n', for every length, default to 1024, +# for long string; return specified number lines if mline given +# +def break_long_string(lstr, limit = 1024, bsign = "\n", mline = 200, bchars = ' &;', minlmt = 20, eline = 0): + + length = len(lstr) if lstr else 0 + if length <= limit: return lstr + + if bsign is None: bsign = "\n" + if bchars is None: bchars = ' &;' + addbreak = offset = 0 + retstr = "" + elines = [] + if eline > mline: eline = mline + mcnt = mline - eline + ecnt = 0 + + while offset < length: + bpos = lstr[offset:].find(bsign) + blen = bpos if bpos > -1 else (length - offset) + if blen == 0: + offset += 1 + substr = "" if addbreak else bsign + addbreak = 0 + elif blen <= limit: + blen += 1 + substr = lstr[offset:(offset+blen)] + offset += blen + addbreak = 0 + else: + substr = lstr[offset:(offset+limit)] + bpos = limit - 1 + while bpos > minlmt: + char = substr[bpos] + if bchars.find(char) >= 0: break + bpos -= 1 + if bpos > minlmt: + bpos += 1 + substr = substr[:bpos] + offset += bpos + else: + offset += limit + addbreak = 1 + substr += bsign + + if mcnt: + retstr += substr + mcnt -= 1 + if mcnt == 0 and eline == 0: break + elif eline > 0: + elines.append(substr) + ecnt += 1 + else: + break + + if ecnt > 0: + if ecnt > eline: + retstr += "..." + bsign + mcnt = ecnt - eline + else: + mcnt = 0 + + while mcnt < ecnt: + retstr += elines[mcnt] + mcnt += 1 + + return retstr + +# +# join two paths by remove overlapping directories +# diff = 0: join given pathes +# 1: remove path1 from path2 +# +def join_paths(path1, path2, diff = 0): + + if not path2: return path1 + if not path1 or not diff and re.match('/', path2): return path2 + + if diff: + ms = re.match(r'{}/(.*)'.format(path1), path2) + if ms: return ms.group(1) + + adir1 = path1.split('/') + adir2 = path2.split('/') + while adir2 and not adir2[0]: adir2.pop(0) + while adir1 and adir2 and adir2[0] == "..": + adir2.pop(0) + adir1.pop() + while adir2 and adir2[0] == ".": adir2.pop(0) + + if adir1 and adir2: + len1 = len(adir1) + len2 = len(adir2) + idx1 = len1-1 + idx2 = mcnt = 0 + while idx2 < len1 and idx2 < len2: + if adir1[idx1] == adir2[idx2]: + mcnt = 1 + break + idx2 += 1 + + if mcnt > 0: + while mcnt <= idx2: + if adir1[idx1-mcnt] != adir2[idx2-mcnt]: break + mcnt += 1 + + if mcnt > idx2: # remove mcnt matching directories + while mcnt > 0: + adir2.pop(0) + mcnt -= 1 + + if diff: + return '/'.join(adir2) + else: + return '/'.join(adir1 + adir2) + +# +# validate if a command for a given BATCH host is accessable and executable +# +# Return SUCCESS if valid; FAILURE if not +# +def valid_batch_host(host, logact = 0): + + HOST = host.upper() + return SUCCESS if HOST in BCHCMDS and valid_command(BCHCMDS[HOST], logact) else FAILURE + +# +# validate if a given command is accessable and executable +# +# Return the full command path if valid; '' if not +# +def valid_command(cmd, logact = 0): + + ms = re.match(r'^(\S+)( .*)$', cmd) + if ms: + option = ms.group(2) + cmd = ms.group(1) + else: + option = '' + if cmd not in COMMANDS: + buf = shutil.which(cmd) + if buf is None: + if logact: pglog(cmd + ": executable command not found", logact) + buf = '' + elif option: + buf += option + COMMANDS[cmd] = buf + + return COMMANDS[cmd] + +# +# add carbon copies to PGLOG['CCDADDR'] +# +def add_carbon_copy(cc = None, isstr = None, exclude = 0, specialist = None): + + + if not cc: + if cc is None and isstr is None: PGLOG['CCDADDR'] = '' + else: + emails = re.split(r'[,\s]+', cc) if isstr else cc + for email in emails: + if not email or email.find('/') >= 0 or email == 'N': continue + if email == "S": + if not specialist: continue + email = specialist + + if email.find('@') == -1: email += "@ucar.edu" + if exclude and exclude.find(email) > -1: continue + if PGLOG['CCDADDR']: + if PGLOG['CCDADDR'].find(email) > -1: continue # email Cc'd already + PGLOG['CCDADDR'] += ", " + PGLOG['CCDADDR'] += email + +# +# get the current host name; or batch sever name if getbatch is 1 +# +def get_host(getbatch = 0): + + if getbatch and PGLOG['CURBID'] != 0: + host = PGLOG['PGBATCH'] + elif PGLOG['HOSTNAME']: + return PGLOG['HOSTNAME'] + else: + host = socket.gethostname() + + return get_short_host(host) + +# +# strip domain names and retrun the server name itself +# +def get_short_host(host): + + if not host: return '' + ms = re.match(r'^([^\.]+)\.', host) + if ms: host = ms.group(1) + if PGLOG['HOSTNAME'] and (host == 'localhost' or host == PGLOG['HOSTNAME']): return PGLOG['HOSTNAME'] + HOST = host.upper() + if HOST in BCHCMDS: return HOST + + return host + +# +# get a live SLURM host name +# +def get_slurm_host(): + + global SLMHOSTS + + if not SLMSTATS and PGLOG['SLMHOSTS']: + SLMHOSTS = PGLOG['SLMHOSTS'].split(':') + for host in SLMHOSTS: + SLMSTATS[host] = 1 + + for host in SLMHOSTS: + if host in SLMSTATS and SLMSTATS[host]: return host + + return None + +# +# get a live PBS host name +# +def get_pbs_host(): + + global PBSHOSTS + + if not PBSSTATS and PGLOG['PBSHOSTS']: + PBSHOSTS = PGLOG['PBSHOSTS'].split(':') + for host in PBSHOSTS: + PBSSTATS[host] = 1 + + for host in PBSHOSTS: + if host in PBSSTATS and PBSSTATS[host]: return host + + return None + +# +# set host status, 0 dead & 1 live, for one or all avalaible slurm hosts +# +def set_slurm_host(host = None, stat = 0): + + global SLMHOSTS + + if host: + SLMSTATS[host] = stat + else: + if not SLMHOSTS and PGLOG['SLMHOSTS']: + SLMHOSTS = PGLOG['SLMHOSTS'].split(':') + for host in SLMHOSTS: + SLMSTATS[host] = stat + +# +# set host status, 0 dead & 1 live, for one or all avalaible pbs hosts +# +def set_pbs_host(host = None, stat = 0): + + global PBSHOSTS + + if host: + PBSSTATS[host] = stat + else: + if not PBSHOSTS and PGLOG['PBSHOSTS']: + PBSHOSTS = PGLOG['PBSHOSTS'].split(':') + for host in PBSHOSTS: + PBSSTATS[host] = stat + +# +# reset the batch host name in case was not set properly +# +def reset_batch_host(bhost, logact = LOGWRN): + + BCHHOST = bhost.upper() + + if BCHHOST != PGLOG['PGBATCH']: + if PGLOG['CURBID'] > 0: + pglog("{}-{}: Batch ID is set, cannot change Batch host to {}".format(PGLOG['PGBATCH'], PGLOG['CURBID'], BCHHOST) , logact) + else: + ms = re.search(r'(^|:){}(:|$)'.format(BCHHOST), PGLOG['BCHHOSTS']) + if ms: + PGLOG['PGBATCH'] = BCHHOST + if PGLOG['CURBID'] == 0: PGLOG['CURBID'] = -1 + elif PGLOG['PGBATCH']: + PGLOG['PGBATCH'] = '' + PGLOG['CURBID'] = 0 + +# +# return the base command name of the current process +# +def get_command(cmdstr = None): + + if not cmdstr: cmdstr = sys.argv[0] + cmdstr = op.basename(cmdstr) + ms = re.match(r'^(.+)\.(py|pl)$', cmdstr) + if ms: + return ms.group(1) + else: + return cmdstr + +# +# wrap a given command cmd for either sudo or setuid wrapper pgstart_['username'] +# to run as user asuser +# +def get_local_command(cmd, asuser = None): + + cuser = PGLOG['SETUID'] if PGLOG['SETUID'] else PGLOG['CURUID'] + if not asuser or cuser == asuser: return cmd + + if cuser == PGLOG['RDAUSER']: + wrapper = "pgstart_" + asuser + if valid_command(wrapper): return "{} {}".format(wrapper, cmd) + elif PGLOG['SUDORDA'] and asuser == PGLOG['RDAUSER']: + return "sudo -u {} {}".format(PGLOG['RDAUSER'], cmd) # sudo as user rdadata + + return cmd + +# +# wrap a given command cmd for either sudo or setuid wrapper pgstart_['username'] +# to run as user asuser on a given remote host +# +def get_remote_command(cmd, host, asuser = None): + +# if host and not re.match(PGLOG['HOSTNAME'], host): cmd = "ssh {} {}".format(host, cmd) + return get_local_command(cmd, asuser) + +# +# wrap a given hpss command cmd with sudo either before his of after hsi +# to run as user asuser +# +def get_hpss_command(cmd, asuser = None, hcmd = None): + + cuser = PGLOG['SETUID'] if PGLOG['SETUID'] else PGLOG['CURUID'] + if not hcmd: hcmd = 'hsi' + + if asuser and cuser != asuser: + if cuser == PGLOG['RDAUSER']: + return "{} sudo -u {} {}".format(hcmd, asuser, cmd) # setuid wrapper as user asuser + elif PGLOG['SUDORDA'] and asuser == PGLOG['RDAUSER']: + return "sudo -u {} {} {}".format(PGLOG['RDAUSER'], hcmd, cmd) # sudo as user rdadata + + if cuser != PGLOG['RDAUSER']: + if re.match(r'^ls ', cmd) and hcmd == 'hsi': + return "hpss" + cmd # use 'hpssls' instead of 'hsi ls' + elif re.match(r'^htar -tvf', hcmd): + hcmd.replace('htar -tvf', 'htarmember', 1) # use 'htarmember' instead of 'htar -tvf' + elif re.match(r'^hsi ls', hcmd): + hcmd.replce('hsi ls', 'hpssls', 1) # use 'hpssls' instead of 'hsi ls' + + return "{} {}".format(hcmd, cmd) + +# +# wrap a given sync command for given host name with/without sudo +# +def get_sync_command(host, asuser = None): + + host = get_short_host(host) + + if (not (PGLOG['SETUID'] and PGLOG['SETUID'] == PGLOG['RDAUSER']) and + (not asuser or asuser == PGLOG['RDAUSER'])): + return "sync" + host + + return host + "-sync" + +# +# set PGLOG['SETUID'] as needed +# +def set_suid(cuid = 0): + + if not cuid: cuid = PGLOG['EUID'] + if cuid != PGLOG['EUID'] or cuid != PGLOG['RUID']: + os.setreuid(cuid, cuid) + PGLOG['SETUID'] = pwd.getpwuid(cuid).pw_name + if not (PGLOG['SETUID'] == PGLOG['RDAUSER'] or cuid == PGLOG['RUID']): + set_specialist_environments(PGLOG['SETUID']) + PGLOG['CURUID'] == PGLOG['SETUID'] # set CURUID to a specific specialist + +# +# set comman pglog +# +def set_common_pglog(): + + PGLOG['CURDIR'] = os.getcwd() + + # set current user id + PGLOG['RUID'] = os.getuid() + PGLOG['EUID'] = os.geteuid() + PGLOG['CURUID'] = pwd.getpwuid(PGLOG['RUID']).pw_name + try: + PGLOG['RDAUID'] = pwd.getpwnam(PGLOG['RDAUSER']).pw_uid + PGLOG['RDAGID'] = grp.getgrnam(PGLOG['RDAGRP']).gr_gid + except: + PGLOG['RDAUID'] = 0 + PGLOG['RDAGID'] = 0 + if PGLOG['CURUID'] == PGLOG['RDAUSER']: PGLOG['SETUID'] = PGLOG['RDAUSER'] + + PGLOG['HOSTNAME'] = get_host() + for htype in HOSTTYPES: + ms = re.match(r'^{}(-|\d|$)'.format(htype), PGLOG['HOSTNAME']) + if ms: + PGLOG['HOSTTYPE'] = HOSTTYPES[htype] + break + PGLOG['DEFDSID'] = 'd000000' if PGLOG['NEWDSID'] else 'ds000.0' + PGLOG['NOTAROOT'] = '|'.join([PGLOG['OLDAROOT'], PGLOG['OLDBROOT'], PGLOG['BACKROOT']]) + PGLOG['NOTBROOT'] = '|'.join([PGLOG['OLDAROOT'], PGLOG['OLDBROOT'], PGLOG['ARCHROOT']]) + PGLOG['ALLROOTS'] = '|'.join([PGLOG['OLDAROOT'], PGLOG['OLDBROOT'], PGLOG['ARCHROOT'], PGLOG['BACKROOT']]) + SETPGLOG("USRHOME", "PGUSRHOME") + SETPGLOG("DSSHOME", "PGDSSHOME") + SETPGLOG("ADDPATH", "PGADDPATH") + SETPGLOG("ADDLIB", "PGADDLIB") + SETPGLOG("OTHPATH", "PGOTHPATH") + SETPGLOG("PSQLHOME", "PGPSQLHOME") + SETPGLOG("DSGHOSTS", "PGDSGHOSTS") + SETPGLOG("DSIDCHRS", "PGDSIDCHRS") + + if not os.getenv('HOME'): os.environ['HOME'] = "{}/{}".format(PGLOG['USRHOME'], PGLOG['CURUID']) + SETPGLOG("HOMEBIN", os.environ.get('HOME') + "/bin") + + if 'SLURM_JOBID' in os.environ: + PGLOG['CURBID'] = int(os.getenv('SLURM_JOBID')) + PGLOG['PGBATCH'] = PGLOG['SLMNAME'] + elif 'PBS_JOBID' in os.environ: + sbid = os.getenv('PBS_JOBID') + ms = re.match(r'^(\d+)', sbid) + PGLOG['CURBID'] = int(ms.group(1)) if ms else -1 + PGLOG['PGBATCH'] = PGLOG['PBSNAME'] + else: + PGLOG['CURBID'] = 0 + PGLOG['PGBATCH'] = '' + + pgpath = PGLOG['HOMEBIN'] + PGLOG['LOCHOME'] = "/ncar/rda/setuid" + if not op.isdir(PGLOG['LOCHOME']): PGLOG['LOCHOME'] = "/usr/local/decs" + pgpath += ":{}/bin".format(PGLOG['LOCHOME']) + locpath = "{}/bin/{}".format(PGLOG['DSSHOME'], PGLOG['HOSTTYPE']) + if op.isdir(locpath): pgpath += ":" + locpath + pgpath = add_local_path("{}/bin".format(PGLOG['DSSHOME']), pgpath, 1) + if PGLOG['PSQLHOME']: + locpath = PGLOG['PSQLHOME'] + "/bin" + if op.isdir(locpath): pgpath += ":" + locpath + pgpath = add_local_path(os.getenv('PATH'), pgpath, 1) + if PGLOG['HOSTTYPE'] == 'dav': pgpath = add_local_path('/glade/u/apps/opt/qstat-cache/bin:/opt/pbs/bin', pgpath, 1) + if 'OTHPATH' in PGLOG and PGLOG['OTHPATH']: + pgpath = add_local_path(PGLOG['OTHPATH'], pgpath, 1) + if PGLOG['ADDPATH']: pgpath = add_local_path(PGLOG['ADDPATH'], pgpath, 1) + pgpath = add_local_path("/bin:/usr/bin:/usr/local/bin:/usr/sbin", pgpath, 1) + + os.environ['PATH'] = pgpath + os.environ['SHELL'] = '/bin/sh' + # set PGLOG values with environments and defaults + SETPGLOG("DSSDBHM", PGLOG['DSSHOME']+"/dssdb") # dssdb home dir + SETPGLOG("LOGPATH", PGLOG['DSSDBHM']+"/log") # path to log file + SETPGLOG("LOGFILE", "pgdss.log") # log file name + SETPGLOG("EMLFILE", "pgemail.log") # email log file name + SETPGLOG("ERRFILE", '') # error file name + SETPGLOG("EMLSEND", "/usr/lib/sendmail -t") # send email command + SETPGLOG("DBGLEVEL", '') # debug level + SETPGLOG("DBGPATH", PGLOG['DSSDBHM']+"/log") # path to debug log file + SETPGLOG("OBJCTBKT", "rda-data") # default Bucket on Object Store + SETPGLOG("BACKUPEP", "rda-quasar") # default Globus Endpoint on Quasar + SETPGLOG("DRDATAEP", "rda-quasar-drdata") # DRDATA Globus Endpoint on Quasar + SETPGLOG("DBGFILE", "pgdss.dbg") # debug file name + SETPGLOG("CNFPATH", PGLOG['DSSHOME']+"/config") # path to configuration files + SETPGLOG("PUSGDIR", PGLOG['DSSDBHM']+"/prog_usage") # path to program usage files + SETPGLOG("DSSURL", "https://rda.ucar.edu") # current dss web URL + SETPGLOG("RQSTURL", "/datasets/request") # request URL path + SETPGLOG("WEBSERVERS", "PGWEBSERVERS") # webserver names for Web server + PGLOG['WEBHOSTS'] = PGLOG['WEBSERVERS'].split(':') if PGLOG['WEBSERVERS'] else [] + SETPGLOG("DBMODULE", '') + SETPGLOG("LOCDATA", "/data") + + # set dss web homedir + SETPGLOG("DSSWEB", PGLOG['LOCDATA']+"/web") + SETPGLOG("DSWHOME", PGLOG['DSSWEB']+"/datasets") # datast web root path + PGLOG['HOMEROOTS'] = "{}|{}".format(PGLOG['DSSHOME'], PGLOG['DSWHOME']) + SETPGLOG("DSSDATA", "PGDSSDATA") # dss data root path + SETPGLOG("DSDHOME", PGLOG['DSSDATA']+"/data") # dataset data root path + SETPGLOG("DECSHOME", PGLOG['DSSDATA']+"/decsdata") # dataset decsdata root path + SETPGLOG("DSHHOME", PGLOG['DECSHOME']+"/helpfiles") # dataset help root path + SETPGLOG("UPDTWKP", PGLOG['DSSDATA']+"/work") # dsupdt work root path + SETPGLOG("TRANSFER", PGLOG['DSSDATA']+"/transfer") # dss transfer partition + SETPGLOG("RQSTHOME", PGLOG['TRANSFER']+"/dsrqst") # dsrqst home + SETPGLOG("DSAHOME", "PGDSAHOME") # dataset data alternate root path + SETPGLOG("RQSTALTH", "PGRQSTALTH") # alternate dsrqst path + SETPGLOG("GPFSHOST", "PGGPFSHOST") # empty if writable to glade + SETPGLOG("PSQLHOST", "PGPSQLHOST") # host name for postgresql server + SETPGLOG("SLMHOSTS", "PGSLMHOSTS") # host names for SLURM server + SETPGLOG("PBSHOSTS", "PGPBSHOSTS") # host names for PBS server + SETPGLOG("CHKHOSTS", "PGCHKHOSTS") # host names for dscheck daemon + SETPGLOG("PVIEWHOST", "PGPVIEWHOST") # host name for view only postgresql server + SETPGLOG("FTPUPLD", PGLOG['TRANSFER']+"/rossby") # ftp upload path + PGLOG['GPFSROOTS'] = "{}|{}|{}".format(PGLOG['DSDHOME'], PGLOG['UPDTWKP'], PGLOG['RQSTHOME']) + + if 'ECCODES_DEFINITION_PATH' not in os.environ: + os.environ['ECCODES_DEFINITION_PATH'] = "/usr/local/share/eccodes/definitions" + os.environ['history'] = '0' + + # set tmp dir + SETPGLOG("TMPPATH", "PGTMPPATH") + if not PGLOG['TMPPATH']: PGLOG['TMPPATH'] = "/data/ptmp" + + SETPGLOG("TMPDIR", '') + if not PGLOG['TMPDIR']: + PGLOG['TMPDIR'] = "/glade/campaign/collections/rda/scratch/" + PGLOG['CURUID'] + os.environ['TMPDIR'] = PGLOG['TMPDIR'] + + # empty diretory for HOST-sync + PGLOG['TMPSYNC'] = PGLOG['DSSDBHM'] + "/tmp/.syncdir" + if 'DSSHOME' in PGLOG and PGLOG['DSSHOME'] and not op.exists(PGLOG['TMPSYNC']): + pgsystem("mkdir " + PGLOG['TMPSYNC'], 0, LGWNEX, 4) + pgsystem("chmod 775 " + PGLOG['TMPSYNC'], LOGWRN, 4) + + os.umask(2) + +# +# append or prepend locpath to pgpath +# +def add_local_path(locpath, pgpath, append = 0): + + if not locpath: + return pgpath + elif not pgpath: + return locpath + + paths = locpath.split(':') + + for path in paths: + if re.match(r'^\./*$', path): continue + path = path.rstrip('\\') + ms = re.search(r'(^|:){}(:|$)'.format(path), pgpath) + if ms: continue + if append: + pgpath += ":" + path + else: + pgpath = path + ":" + pgpath + + return pgpath + +# +# set PGLOG value; return a string or an array reference if sep is not emty +# +def SETPGLOG(name, value = ''): + + oval = PGLOG[name] if name in PGLOG else '' + nval = get_environment(name, ('' if re.match('PG', value) else value)) + PGLOG[name] = nval if nval else oval + +# +# set specialist home and return the default shell +# +def set_specialist_home(specialist): + + if specialist == PGLOG['CURUID']: return # no need reset + if 'MAIL' in os.environ and re.search(PGLOG['CURUID'], os.environ['MAIL']): + os.environ['MAIL'] = re.sub(PGLOG['CURUID'], specialist, os.environ['MAIL']) + + home = "{}/{}".format(PGLOG['USRHOME'], specialist) + shell = "tcsh" + buf = pgsystem("grep ^{}: /etc/passwd".format(specialist), LOGWRN, 20) + if buf: + lines = buf.split('\n') + for line in lines: + ms = re.search(r':(/.+):(/.+)', line) + if ms: + home = ms.group(1) + shell = op.basename(ms.group(2)) + break + + if home != os.environ['HOME'] and op.exists(home): + os.environ['HOME'] = home + + return shell + +# +# set environments for a specified specialist +# +def set_specialist_environments(specialist): + + shell = set_specialist_home(specialist) + resource = os.environ['HOME'] + "/.tcshrc" + checkif = 0 # 0 outside of if; 1 start if, 2 check envs, -1 checked already + missthen = 0 + try: + rf = open(resource, 'r') + except: + return # skip if cannot open + + nline = rf.readline() + while nline: + line = pgtrim(nline) + nline = rf.readline() + if not line: continue + if checkif == 0: + ms = re.match(r'^if(\s|\()', line) + if ms: checkif = 1 # start if + elif missthen: + missthen = 0 + if re.match(r'^then$', line): continue # then on next line + checkif = 0 # end of inline if + elif re.match(r'^endif', line): + checkif = 0 # end of if + continue + elif checkif == -1: # skip the line + continue + elif checkif == 2 and re.match(r'^else', line): + checkif = -1 # done check envs in if + continue + + if checkif == 1: + if line == 'else': + checkif = 2 + continue + elif re.search(r'if\W', line): + if(re.search(r'host.*!', line, re.I) and not re.search(PGLOG['HOSTNAME'], line) or + re.search(r'host.*=', line, re.I) and re.search(PGLOG['HOSTNAME'], line)): + checkif = 2 + if re.search(r'\sthen$', line): + continue + else: + missthen = 1 + if checkif == 1: continue + else: + continue + + ms = re.match(r'^setenv\s+(.*)', line) + if ms: one_specialist_environment(ms.group(1)) + + rf.close() + + SETPGLOG("HOMEBIN", PGLOG['PGBINDIR']) + os.environ['PATH'] = add_local_path(PGLOG['HOMEBIN'], os.environ['PATH'], 0) + +# +# set one environment for specialist +# +def one_specialist_environment(line): + + ms = re.match(r'^(\w+)[=\s]+(.+)$', line) + if not ms: return + (var, val) = ms.groups() + if re.match(r'^(PATH|SHELL|IFS|CDPATH|)$', var): return + if val.find('$') > -1: val = replace_environments(val) + ms = re.match(r'^(\"|\')(.*)(\"|\')$', val) + if ms: val = ms.group(2) # remove quotes + os.environ[var] = val + +# +# get and repalce environment variables in ginve string; defaults to the values in PGLOG +# +def replace_environments(envstr, default = '', logact = 0): + + ishash = isinstance(default, dict) + ms = re.search(r'(^|.)\$({*)(\w+)(}*)', envstr) + if ms: + lead = ms.group(1) + name = ms.group(3) + rep = ms.group(2) + name + ms.group(4) + env = get_environment(name, (PGLOG[name] if name in PGLOG else (default[name] if ishash else default)), logact) + pre = (lead if (env or lead != ":") else '') + envstr = re.sub(r'{}\${}'.format(lead, rep), (pre+env), envstr) + + return envstr + +# +# validate if the current host is a valid host to process +# +def check_process_host(hosts, chost = None, mflag = None, pinfo = None, logact = None): + + ret = 1 + error = '' + if not mflag: mflag = 'G' + if not chost: chost = get_host(1) + + if mflag == 'M': # exact match + if not hosts or hosts != chost: + ret = 0 + if pinfo: error = "not matched exactly" + elif mflag == 'I': # inclusive match + if not hosts or hosts.find('!') == 0 or hosts.find(chost) < 0: + ret = 0 + if pinfo: error = "not matched inclusively" + elif hosts: + if hosts.find(chost) >= 0: + if hosts.find('!') == 0: + ret = 0 + if pinfo: error = "matched exclusively" + elif hosts.find('!') != 0: + ret = 0 + if pinfo: error = "not matched" + + if error: + if logact is None: logact = LOGERR + pglog("{}: CANNOT be processed on {} for hosthame {}".format(pinfo, chost, error), logact) + + return ret + +# +# convert special characters +# +def convert_chars(name, default = None): + + if not name or re.match(r'^[a-zA-Z0-9]+$', name): return name # no need convert + + z = ord('z') + newchrs = ochrs = '' + if default == None: default = name + for i in range(len(name)): + ch = name[i] + if re.match(r'^[a-zA-Z0-9]$', ch): + newchrs += ch + elif ord(ch) > z and ochrs != None: + if not ochrs: + ochrs = None + with open(PGLOG['DSSHOME'] + "/lib/ExtChrs.txt", "r") as CHR: + ochrs = CHR.readline() + nchrs = CHR.readline() + if ochrs is None: continue + idx = ochrs.find(ch) + if idx >= 0: newchrs += nchrs[idx] + + if newchrs: + return newchrs + else: + return default + +# +# Retrieve host and process id +# +def current_process_info(realpid = 0): + + if realpid or PGLOG['CURBID'] < 1: + return [PGLOG['HOSTNAME'], os.getpid()] + else: + return [PGLOG['PGBATCH'], PGLOG['CURBID']] + +# +# convert given @ARGV to string. quote the entries with spaces +# +def argv_to_string(argv = None, quote = 1, action = None): + + argstr = '' + if argv is None: argv = sys.argv[1:] + for arg in argv: + if argstr: argstr += ' ' + ms = re.search(r'([<>\|\s])', arg) + if ms: + if action: + pglog("{}: Cannot {} for special character '{}' in argument value".format(arg, action, ms.group(1)), LGEREX) + if quote: + if re.search(r"\'", arg): + arg = "\"{}\"".format(arg) + else: + arg = "'{}'".format(arg) + argstr += arg + + return argstr + +# +# convert an integer to non-10 based string +# +def int2base(x, base): + + if x == 0: return '0' + negative = 0 + if x < 0: + negative = 1 + x = -x + + dgts = [] + while x: + dgts.append(str(int(x%base))) + x = int(x/base) + if negative: dgts.append('-') + dgts.reverse() + + return ''.join(dgts) + +# +# convert a non-10 based string to an integer +# +def base2int(x, base): + + if not isinstance(x, int): x = int(x) + if x == 0: return 0 + + negative = 0 + if x < 0: + negative = 1 + x = -x + + num = 0 + fact = 1 + while x: + num += (x%10)*fact + fact *= base + x = int(x/10) + if negative: num = -num + + return num + +# +# convert integer to ordinal string +# +def int2order(num): + + ordstr = ['th', 'st', 'nd', 'rd'] + snum = str(num) + num %= 100 + if num > 19: num %= 10 + if num > 3: num = 0 + + return snum + ordstr[num] + +# +# Always call this function to initialize global variables for all applications +# +set_common_pglog() diff --git a/src/rda_python_common/PgLock.py b/src/rda_python_common/PgLock.py new file mode 100644 index 0000000..1347d64 --- /dev/null +++ b/src/rda_python_common/PgLock.py @@ -0,0 +1,639 @@ +# +############################################################################### +# +# Title : PgLock.py +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/118/2020 +# Purpose : python library module for functions to lock RDADB records +# +# Work File : $DSSHOME/lib/python/PgLock.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### +# +import re +import time +import PgLOG +import PgSIG +import PgUtil +import PgFile +import PgDBI + +DOLOCKS = {-2 : 'Force Unlock', -1 : 'Unlock', 0 : 'Unlock', 1 : 'Relock', 2 : 'Force Relock'} + +def end_db_transaction(idx): + + if idx > 0: + PgDBI.endtran() + else: + PgDBI.aborttran() + return idx + +# +# check and return running process status: 1-running/uncheckable,0-stopped +# +def check_process_running_status(host, pid, dolock, lmsg, logact): + + if not PgFile.local_host_action(host, DOLOCKS[dolock], lmsg, logact): return 1 + stat = PgSIG.check_host_pid(host, pid) + if stat > 0: + if logact: PgLOG.pglog("{}: Cannot {}".format(lmsg, DOLOCKS[dolock]), logact) + return 1 + if stat < 0 and dolock > -2 and dolock < 2: + if logact: PgLOG.pglog("{}: Fail checking lock info to {}".format(lmsg, DOLOCKS[dolock]), logact) + return 1 + return 0 + +# +# lock/unlock dscheck record +# +# lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1 +# force unlock if < -1 or force lock if 2 +# +def lock_dscheck(cidx, dolock, logact = 0): + + if not cidx: return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = PgLOG.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dscheck" + cnd = "cindex = {}".format(cidx) + fields = "command, pid, lockhost, lockcmd" + pgrec = PgDBI.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + + pid = pgrec['pid'] + host = pgrec['lockhost'] + lockcmd = pgrec['lockcmd'] + (chost, cpid) = PgLOG.current_process_info() + clockcmd = PgLOG.get_command() + + if pid == 0 and dolock <= 0: return cidx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not PgUtil.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return cidx # no need lock again + + cinfo = "{}-{}-Chk{}({})".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), cidx, pgrec['command']) + if lckpid > 0 and (clockcmd == "dscheck" or lockcmd != "dscheck"): + lmsg = "{} Locked by {}/{}/{}".format(cinfo, pid, host, lockcmd) + if check_process_running_status(host, pid, dolock, lmsg, logout): return -cidx + + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + if lockcmd != clockcmd: record['lockcmd'] = clockcmd + else: + if pid: record['pid'] = 0 + if not record: return cidx + + lkrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not lkrec: return end_db_transaction(0) # dscheck is gone or db error + + if (not lkrec['pid'] or + lkrec['pid'] == pid and PgUtil.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and PgUtil.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(cinfo + ": Error update lock", logout) + cidx = -cidx + else: + if logout: PgLOG.pglog("{}: Relocked {}/{}".format(cinfo, lkrec['pid'], lkrec['lockhost']), logout) + cidx = -cidx + + return end_db_transaction(cidx) + +# +# lock dscheck record for given cidx, pid and host +# +def lock_host_dscheck(cidx, pid, host, logact = 0): + + if not (cidx and pid): return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = 0 + table = "dscheck" + cnd = "cindex = {}".format(cidx) + fields = "command, pid, lockhost, lockcmd" + pgrec = PgDBI.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + (chost, cpid) = PgLOG.current_process_info() + + cinfo = "{}-{}-Chk{}({})".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), cidx, pgrec['command']) + if pgrec['pid']: + if pid == pgrec['pid'] and PgUtil.pgcmp(pgrec['lockhost'], host, 1) == 0: + return -cidx # locked by the real process already + elif cpid != pgrec['pid'] or PgUtil.pgcmp(pgrec['lockhost'], chost, 1): + if logout: + lmsg = "{} Locked by {}/{}/{}".format(cinfo, pid, host, pgrec['lockcmd']) + PgLOG.pglog(lmsg +": Cannot Lock", logout) + return -cidx # locked by other process + + record = {} + record['pid'] = pid + record['lockhost'] = host + record['lockcmd'] = PgLOG.get_command(pgrec['command']) + + lkrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not lkrec: return end_db_transaction(0) + + if (not lkrec['pid'] or + lkrec['pid'] == pid and PgUtil.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and PgUtil.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(cinfo + ": Error update lock", logout) + cidx = -cidx + else: + if logout: PgLOG.pglog("{}: Relocked {}/{}".format(cinfo, lkrec['pid'], lkrec['lockhost']), logout) + cidx = -cidx + + return end_db_transaction(cidx) + +# +# lock/unlock data request record +# +# lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1 +# force unlock if < -1 or 2 +# +def lock_request(ridx, dolock, logact = 0): + + if not ridx: return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = PgLOG.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dsrqst" + cnd = "rindex = {}".format(ridx) + fields = "pid, lockhost" + pgrec = PgDBI.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # request is gone or db error + + pid = pgrec['pid'] + host = pgrec['lockhost'] + (chost, cpid) = PgLOG.current_process_info() + + if pid == 0 and dolock <= 0: return ridx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not PgUtil.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return ridx # no need lock again + + rinfo = "{}-{}-Rqst{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), ridx) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(rinfo, pid, host) + if check_process_running_status(host, pid, dolock, lmsg, logout): return -ridx + + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + if record: record['locktime'] = int(time.time()) + else: + if pid: record['pid'] = 0 + if host: record['lockhost'] = "" + if not record: return ridx + + lkrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not lkrec: return end_db_transaction(0) # request is gone or db error + + if (not lkrec['pid'] or + lkrec['pid'] == pid and PgUtil.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and PgUtil.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(rinfo + ": Error update lock", logout) + ridx = -ridx + else: + if logout: PgLOG.pglog("{}: Relocked {}/{}".format(rinfo, lkrec['pid'], lkrec['lockhost']), logout) + ridx = -ridx + + return end_db_transaction(ridx) + +# +# lock dsrqst record for given cidx, pid and host +# +def lock_host_request(ridx, pid, host, logact = 0): + + if not (ridx and pid): return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = 0 + table = "dsrqst" + cnd = "rindex = {}".format(ridx) + fields = "pid, lockhost" + pgrec = PgDBI.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + + rinfo = "{}-{}-Rqst{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), ridx) + if pgrec['pid']: + if pid == pgrec['pid'] and PgUtil.pgcmp(pgrec['lockhost'], host, 1) == 0: return ridx + if logout: + lmsg = "{} Locked by {}/{}".format(rinfo, pid, host) + PgLOG.pglog(lmsg +": Cannot Lock", logout) + return -ridx + record = {} + record['pid'] = pid + record['lockhost'] = host + record['locktime'] = int(time.time()) + + pgrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not pgrec: return end_db_transaction(0) + + if not pgrec['pid'] or pid == pgrec['pid'] and PgUtil.pgcmp(pgrec['lockhost'], host, 1) == 0: + if not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(rinfo + ": Error update lock", logout) + ridx = -ridx + else: + if logout: PgLOG.pglog("{}: Relocked {}/{}".format(rinfo, pgrec['pid'], pgrec['lockhost']), logout) + ridx = -ridx + + return end_db_transaction(ridx) + +# +# lock/unlock dataset update record +# +# lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1 +# force unlock if < -1 or 2 +# +def lock_update(lidx, linfo, dolock, logact = 0): + + if not lidx: return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = PgLOG.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dlupdt" + cnd = "lindex = {}".format(lidx) + fields = "pid, hostname" + pgrec = PgDBI.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # update record is deleted + + pid = pgrec['pid'] + host = pgrec['hostname'] + (chost, cpid) = PgLOG.current_process_info() + + if pid == 0 and dolock <= 0: return lidx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not PgUtil.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return lidx # no need lock again + + if not linfo: linfo = "{}-{}-Updt{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), lidx) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(linfo, pid, host) + if check_process_running_status(host, pid, dolock, lmsg, logout): return -lidx + + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['hostname'] = chost + if record: record['locktime'] = int(time.time()) + else: + if pid: record['pid'] = 0 + if host: record['hostname'] = '' + if not record: return lidx + + lkrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not lkrec: return end_db_transaction(0) # update record is deleted + + if not lkrec['pid'] or lkrec['pid'] == pid and PgUtil.pgcmp(lkrec['hostname'], host, 1) == 0: + if not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(linfo + ": Error update lock", logout) + lidx = -lidx + else: + if logout: PgLOG.pglog("{}: Relocked {}/{}".format(linfo, lkrec['pid'], lkrec['hostname']), logout) + lidx = -lidx + + return end_db_transaction(lidx) + +# +# lock/unlock dataset update control record +# +# lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1, +# unlock dead process if < -1 or 2, force unlock if -2 +# +def lock_update_control(cidx, dolock, logact = 0): + + if not cidx: return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = PgLOG.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dcupdt" + cnd = "cindex = {}".format(cidx) + fields = "pid, lockhost" + pgrec = PgDBI.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # update control record is deleted + + pid = pgrec['pid'] + host = pgrec['lockhost'] + (chost, cpid) = PgLOG.current_process_info() + + if pid == 0 and dolock <= 0: return cidx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not PgUtil.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return cidx # no need lock again + + cinfo = "{}-{}-UC{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), cidx) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(cinfo, pid, host) + if check_process_running_status(host, pid, dolock, lmsg, logout): return -cidx + + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + if record: record['chktime'] = int(time.time()) + else: + if pid: record['pid'] = 0 + if host: record['lockhost'] = '' + if not record: return cidx + + lkrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not lkrec: return end_db_transaction(0) # update control record is deleted + + if (not lkrec['pid'] or + lkrec['pid'] == pid and PgUtil.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and PgUtil.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(cinfo + ": Error update lock", logout) + cidx = -cidx + else: + if logout: PgLOG.pglog("{}: Relocked {}/{}".format(cinfo, lkrec['pid'], lkrec['lockhost']), logout) + cidx = -cidx + + return end_db_transaction(cidx) + +# +# lock dscheck record for given cidx, pid and host +# +def lock_host_update_control(cidx, pid, host, logact = 0): + + if not (cidx and pid): return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = 0 + table = "dcupdt" + cnd = "cindex = {}".format(cidx) + fields = "pid, lockhost" + pgrec = PgDBI.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + + cinfo = "{}-{}-UC{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), cidx) + if pgrec['pid']: + if pid == pgrec['pid'] and PgUtil.pgcmp(pgrec['lockhost'], host, 1) == 0: return cidx + if logout: + lmsg = "{} Locked by {}/{}".format(cinfo, pid, host) + PgLOG.pglog(lmsg +": Cannot Lock", logout) + return -cidx + + record = {} + record['pid'] = pid + record['lockhost'] = host + record['chktime'] = int(time.time()) + + pgrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not pgrec: return end_db_transaction(0) + + if not pgrec['pid'] or pid == pgrec['pid'] and PgUtil.pgcmp(pgrec['lockhost'], host, 1) == 0: + if not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(cinfo + ": Error update lock", logout) + cidx = -cidx + else: + if logout: PgLOG.pglog("{}: Relocked {}/{}".format(cinfo, pgrec['pid'], pgrec['lockhost']), logout) + cidx = -cidx + + return end_db_transaction(cidx) + +# +# return lock information of a locked process +# +def lock_process_info(pid, lockhost, runhost = None, pcnt = 0): + + retstr = " {}<{}".format(lockhost, pid) + if pcnt: retstr += "/{}".format(pcnt) + retstr += ">" + if runhost and runhost != lockhost: retstr += '/' + runhost + return retstr + +# +# lock/unlock data request partition record +# +# lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1 +# force unlock if < -1 or 2 +# +def lock_partition(pidx, dolock, logact = 0): + + if not pidx: return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = PgLOG.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "ptrqst" + cnd = "pindex = {}".format(pidx) + fields = "pid, lockhost" + pgrec = PgDBI.pgget(table, "rindex, ptorder, " + fields, cnd, logerr) + if not pgrec: return 0 # request is gone or db error + + ridx = pgrec['rindex'] + pid = pgrec['pid'] + host = pgrec['lockhost'] + (chost, cpid) = PgLOG.current_process_info() + + if pid == 0 and dolock <= 0: return pidx # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not PgUtil.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return pidx # no need lock again + + pinfo = "{}-{}-RPT{}(Rqst{}/PTO{})".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), pidx, ridx, pgrec['ptorder']) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(pinfo, pid, host) + if check_process_running_status(host, pid, dolock, lmsg, logout): return -pidx + + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + if record: record['locktime'] = int(time.time()) + else: + if pid: record['pid'] = 0 + if host: record['lockhost'] = "" + if not record: return pidx + + lkrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not lkrec: return end_db_transaction(0) # request partition is gone or db error + + if (not lkrec['pid'] or + lkrec['pid'] == pid and PgUtil.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and PgUtil.pgcmp(lkrec['lockhost'], chost, 1) == 0): + lmsg = update_partition_lock(ridx, record, logout) + if lmsg: + if logout: PgLOG.pglog("{}: {}".format(pinfo, lmsg), logout) + pidx = -pidx + elif not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(pinfo + ": error update lock", logout) + pidx = -pidx + else: + PgLOG.pglog("{}: Relocked {}/{}".format(pinfom, lkrec['pid'], lkrec['lockhost']), logout) + pidx = -pidx + + return end_db_transaction(pidx) + +# +# lock dsrqst partition record for given cidx, pid and host +# +def lock_host_partition(pidx, pid, host, logact = 0): + + if not (pidx and pid): return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = 0 + table = "ptrqst" + cnd = "pindex = {}".format(pidx) + fields = "pid, lockhost" + pgrec = PgDBI.pgget(table, "rindex, ptorder, " + fields, cnd, logerr) + if not pgrec: return 0 # dscheck is gone or db error + + ridx = pgrec['rindex'] + pinfo = "{}-{}-RPT{}(Rqst{}/PTO{})".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), pidx, ridx, pgrec['ptorder']) + if pgrec['pid']: + if pid == pgrec['pid'] and PgUtil.pgcmp(pgrec['lockhost'], host, 1) == 0: return pidx + if logout: + lmsg = "{} Locked by {}/{}".format(pinfo, pid, host) + PgLOG.pglog(lmsg +": Cannot Lock", logout) + return -pidx + + record = {} + record['pid'] = pid + record['lockhost'] = host + record['locktime'] = int(time.time()) + + pgrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not pgrec: return end_db_transaction(0) + + if not pgrec['pid'] or pid == pgrec['pid'] and PgUtil.pgcmp(pgrec['lockhost'], host, 1) == 0: + lmsg = update_partition_lock(ridx, record, logout) + if lmsg: + if logout: PgLOG.pglog("{}: {}".format(pinfo, lmsg), logout) + pidx = -pidx + elif not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(pinfo + ": error update lock", logout) + pidx = -pidx + else: + if logout: PgLOG.pglog("{}: Relocked {}/{}".format(pinfo, pgrec['pid'], pgrec['lockhost']), logout) + pidx = -pidx + + return end_db_transaction(pidx) + +# +# update dsrqst lock info for given partition lock status +# Return None if all is fine; error message otherwise +# +def update_partition_lock(ridx, ptrec, logact = 0): + + if not ridx: return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = PgLOG.LOGWRN + table = "dsrqst" + lockhost = "partition" + cnd = "rindex = {}".format(ridx) + pgrec = PgDBI.pgget(table, "pid, lockhost", cnd, logact|PgLOG.DOLOCK) + if not pgrec: return "Error get Rqst{} record".format(ridx) # should not happen + + if pgrec['pid'] > 0 and pgrec['lockhost'] != lockhost: + return "Rqst{} locked by non-lockhost process ({}/{})".format(ridx, pgrec['pid'], pgrec['lockhost']) + + record = {} + if ptrec['pid'] > 0: + record['pid'] = pgrec['pid'] + 1 + record['lockhost'] = lockhost + record['locktime'] = ptrec['locktime'] + else: + if pgrec['pid'] > 1: + pcnt = PgDBI.pgget('ptrqst', '', cnd + " AND pid > 0") + if pgrec['pid'] > pcnt: pgrec['pid'] = pcnt + record['pid'] = pgrec['pid'] - 1 + record['lockhost'] = lockhost + else: + record['pid'] = 0 + record['lockhost'] = '' + if not PgDBI.pgupdt(table, record, cnd, logact): + return "Error update Rqst{} lock".format(ridx) + + return None + +# +# lock/unlock dataset record for Quasar Backup +# +# lock if dolock > 0, unlock if <= 0, skip for locked on different host if 0 or 1, +# unlock dead process if < -1 or 2, force unlock if -2 +# +def lock_dataset(dsid, dolock, logact = 0): + + if not dsid: return 0 + if logact: + logerr = logact|PgLOG.ERRLOG + logout = logact&(~PgLOG.EXITLG) + else: + logerr = PgLOG.LOGERR + logout = PgLOG.LOGWRN if dolock > 1 or dolock < 0 else 0 + table = "dataset" + cnd = "dsid = '{}'".format(dsid) + fields = "pid, lockhost" + pgrec = PgDBI.pgget(table, fields, cnd, logerr) + if not pgrec: return 0 # dataset not exists + + pid = pgrec['pid'] + host = pgrec['lockhost'] + (chost, cpid) = PgLOG.current_process_info() + + if pid == 0 and dolock <= 0: return 1 # no need unlock + lckpid = -pid if pid > 0 and pid == cpid and not PgUtil.pgcmp(host, chost, 1) else pid + if dolock > 0 and lckpid < 0: return 1 # no need lock again + + dinfo = "{}-{}-{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.current_datetime(), dsid) + if lckpid > 0: + lmsg = "{} Locked by {}/{}".format(dinfo, pid, host) + if check_process_running_status(host, pid, dolock, lmsg, logout): return -1 + + record = {} + if dolock > 0: + if pid != cpid: record['pid'] = cpid + if host != chost: record['lockhost'] = chost + else: + if pid: record['pid'] = 0 + if not record: return 1 + + lkrec = PgDBI.pgget(table, fields, cnd, logerr|PgLOG.DOLOCK) + if not lkrec: return end_db_transaction(0) # dscheck is gone or db error + + lstat = 1 + if (not lkrec['pid'] or + lkrec['pid'] == pid and PgUtil.pgcmp(lkrec['lockhost'], host, 1) == 0 or + lkrec['pid'] == cpid and PgUtil.pgcmp(lkrec['lockhost'], chost, 1) == 0): + if not PgDBI.pgupdt(table, record, cnd, logerr): + if logout: PgLOG.pglog(dinfo + ": Error update lock", logout) + lstat = -1 + else: + if logout: PgLOG.pglog("{}: Relocked {}/{}".format(dinfo, lkrec['pid'], lkrec['lockhost']), logout) + lstat = -1 + + return end_db_transaction(lstat) diff --git a/src/rda_python_common/PgOPT.py b/src/rda_python_common/PgOPT.py new file mode 100644 index 0000000..55e25eb --- /dev/null +++ b/src/rda_python_common/PgOPT.py @@ -0,0 +1,1719 @@ +# +############################################################################### +# +# Title : PgOPT.py +# +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/26/2020 +# Purpose : python library module for holding global varaibles +# functions for processing options and other global functions +# +# Work File : $DSSHOME/lib/python/PgOPT.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### +# +import os +import sys +import re +import time +from os import path as op +import PgLOG +import PgUtil +import PgFile +import PgDBI + +OUTPUT = None +CMDOPTS = {} +INOPTS = {} + +# global variables are used by all applications and this package. +# they need be initialized in application specified packages +ALIAS = {} +TBLHASH = {} + +############################################################################### +# valid options the first hash value: 0 means mode option, 1 means single-value +# option, 2 means multiple-value option, and >=4 means action option the second +# hash values are long option names, either hash keys (considered as short +# option names) or the associated long names can be used. All options, except for +# multi-line value ones, can be specified on command line, while single-value and +# multi-value options, except option -IM for input files, can also given in input +# files long value option names are used in output files all letters of option +# names are case insensitive. +# +# The third hash value define bit flags, +# For Action Options: +# -1 - VSN card actions +# >0 - setions +# +# For Mode Options: +# 1 - mode for archiving actions +# 2 - mode for set actions +# +# For Single-Value Info Options: +# 1(0x001) - auto set value +# 2(0x002) - manually set value +# 16(0x010) - convert to integer from commandline and input files, set to 0 if empty +# 32(0x020) - time field +# 128(0x080) - '' allowed for single letter value +# 256(0x100) - date field +# +# For Multi-Value Info Options: +# 1(0x001) - one for multiple +# 2(0x002) - auto-set, +# 4(0x004) - expanded from one +# 8(0x008) - validated +# 16(0x010) - convert to integer from commandline and input files, set to 0 if empty +# 32(0x020) - time field +# 64(0x040) - text field allowing multiple lines +# 128(0x080) - '' allowed for single letter value +# 256(0x100) - date field +# +# The fourth hash values defined retrictions for single letter values +############################################################################### +OPTS = {} + +# global initial optional values +PGOPT = { + 'ACTS' : 0, # carry current action bits + 'UACTS' : 0, # carry dsarch skip check UD action bits + 'CACT' : '', # current short action name + 'IFCNT' : 0, # 1 to read a single Input File at a time + 'ANAME' : '', # cache the application name if set + 'TABLE' : '', # table name the action is on + 'UID' : 0, # user.uid + 'MSET' : 'SA', # Action for multiple sets + 'WIDTH' : 128, # max column width + 'TXTBIT' : 64, # text field bit (0x1000) allow multiple lines + 'PEMAX' : 12, # max count of reuqest partition errors for auto reprocesses + 'REMAX' : 2, # max count of reuqest errors for auto reprocesses + 'RSMAX' : 2000, # max count of gatherxml with options -R -S + 'RCNTL' : None, # placehold for a request control record + 'dcm' : "dcm", + 'sdp' : "sdp", + 'rcm' : "rcm", + 'scm' : "scm", + 'wpg' : "", + 'gatherxml' : "gatherxml", + 'cosconvert' : "cosconvert", + 'emllog' : PgLOG.LGWNEM, + 'emlerr' : PgLOG.LOGERR|PgLOG.EMEROL, + 'emerol' : PgLOG.LOGWRN|PgLOG.EMEROL, + 'emlsum' : PgLOG.LOGWRN|PgLOG.EMLSUM, + 'emlsep' : PgLOG.LGWNEM|PgLOG.SEPLIN, + 'wrnlog' : PgLOG.LOGWRN, + 'errlog' : PgLOG.LOGERR, + 'extlog' : PgLOG.LGEREX, + 'PTYPE' : "CPRV", + 'WDTYP' : "ADNU", + 'HFTYP' : "DS", + 'SDTYP' : "PORWUV", + 'GXTYP' : "DP" +} + +# global default parameters +params = { + 'ES' : "<=>", + 'AO' : "", + 'DV' : "<:>" +} + +WTYPE = { + 'A' : "ARCO", + 'D' : "DATA", + 'N' : "NCAR", + 'U' : "UNKNOWN", +} + +HTYPE = { + 'D' : "DOCUMENT", + 'S' : "SOFTWARE", + 'U' : "UNKNOWN" +} + +HPATH = { + 'D' : "docs", + 'S' : "software", + 'U' : "help" +} + +MTYPE = { + 'P' : "PRIMARY", + 'A' : "ARCHIVING", + 'V' : "VERSION", + 'W' : "WORKING", + 'R' : "ORIGINAL", + 'B' : "BACKUP", + 'O' : "OFFSITE", + 'C' : "CHRONOPOLIS", + 'U' : "UNKNOWN" +} + +STYPE = { + 'O' : "OFFLINE", + 'P' : "PRIMARY", + 'R' : "ORIGINAL", + 'V' : "VERSION", + 'W' : "WORKING", + 'U' : "UNKNOWN" +} + +BTYPE = { + 'B' : "BACKUPONLY", + 'D' : "BACKDRDATA", +} + +# +# process and parsing input information +# aname - application name such as 'dsarch', 'dsupdt', and 'dsrqst' +# +def parsing_input(aname): + + PgLOG.PGLOG['LOGFILE'] = aname + ".log" + PGOPT['ANAME'] = aname + PgDBI.dssdb_dbname() + argv = sys.argv[1:] + if not argv: PgLOG.show_usage(aname) + + PgLOG.cmdlog("{} {}".format(aname, ' '.join(argv))) + + # process command line options to fill option values + option = infile = None + needhelp = 0 + helpopts = {} + for param in argv: + if re.match(r'^(-{0,2}help|-H)$', param, re.I): + if option: helpopts[option] = OPTS[option] + needhelp = 1 + continue + + ms = re.match(r'^-([a-zA-Z]\w*)$', param) + if ms: # option parameter + param = ms.group(1) + if option and not needhelp and option not in params: + val = get_default_info(option) + if val is not None: + set_option_value(option, val) + else: + parameter_error("-" + option, "missval") + option = get_option_key(param) + if needhelp: + helpopts[option] = OPTS[option] + break + + # set mode/action options + if OPTS[option][0]&3 == 0: set_option_value(option) + + elif option: + ms =re.match(r"^\'(.*)\'$", param) + if ms: param = ms.group(1) + set_option_value(option, param) + + elif PgUtil.find_dataset_id(param): + set_option_value('DS', param) + + else: + option = get_option_key(param, 3, 1) + if option: + set_option_value(option) + if needhelp: + helpopts[option] = OPTS[option] + break + elif op.exists(param): # assume input file + infile = param + else: + parameter_error(param) + + if needhelp: PgLOG.show_usage(aname, helpopts) + + if option and option not in params: + val = get_default_info(option) + if val is not None: + set_option_value(option, val) + else: + parameter_error("-" + option, "missval") + + # check if only an input filename is given on command line following aname + if infile: + if 'IF' in params: + parameter_error(infile) + else: + params['IF'] = [infile] + + # process given one or multiple input files to fill option values + if 'IF' in params: + PGOPT['IFCNT'] = 1 if PGOPT['CACT'] == 'AQ' else 0 + if OPTS['DS'][0] == 1: + param = validate_infile_names(params['DS']) if 'DS' in params else 0 + else: + param = 1 + get_input_info(params['IF']) + if not param and 'DS' in params: validate_infile_names(params['DS']) + + if not PGOPT['ACTS']: parameter_error(aname, "missact") # no action enter + + if 'DB' in params: + dcnt = len(params['DB']) + for i in range(dcnt): + if i == 0: + PgLOG.PGLOG['DBGLEVEL'] = params['DB'][0] + elif i == 1: + PgLOG.PGLOG['DBGPATH'] = params['DB'][1] + elif i == 2: + PgLOG.PGLOG['DBGFILE'] = params['DB'][2] + PgLOG.pgdbg(PgLOG.PGLOG['DBGLEVEL']) + + if 'GZ' in params: PgLOG.PGLOG['GMTZ'] = PgUtil.diffgmthour() + if 'BG' in params: PgLOG.PGLOG['BCKGRND'] = 1 + +# +# check and get default value for info option, return None if not available +# +def get_default_info(opt): + + olist = OPTS[opt] + if olist[0]&3 and len(olist) > 3: + odval = olist[3] + if not odval or isinstance(odval, int): + return odval + else: + return odval[0] # return the first char of a default string + + return None + +# +# set output file name handler now +# +def open_output(outfile = None): + + global OUTPUT + + if outfile: # result output file + try: + OUTPUT = open(outfile, 'w') + except Exception as e: + PgLOG.pglog("{}: Error open file to write - {}".format(outfile, str(e)), PGOPT['extlog']) + else: # result to STDOUT + OUTPUT = sys.stdout + +# +# return 1 if valid infile names; sys.exit(1) otherwise +# +def validate_infile_names(dsid): + + i = 0 + for infile in params['IF']: + if not validate_one_infile(infile, dsid): return PgLOG.FAILURE + i += 1 + if PGOPT['IFCNT'] and i >= PGOPT['IFCNT']: break + + return i + +# +# validate an input filename against dsid +# +def validate_one_infile(infile, dsid): + + ndsid = PgUtil.find_dataset_id(infile) + if ndsid == None: + return PgLOG.pglog("{}: No dsid identified in Input file name {}!".format(dsid, infile), PGOPT['extlog']) + + fdsid = PgUtil.format_dataset_id(ndsid) + if fdsid != dsid: + return PgLOG.pglog("{}: Different dsid {} found in Input file name {}!".format(dsid, fdsid, infile), PGOPT['extlog']) + + return PgLOG.SUCCESS + +# +# gather input information from input files +# +def get_input_info(infiles, table = None): + + i = 0 + for file in infiles: + i += process_infile(file, table) + if not PGOPT['IFCNT'] and PGOPT['CACT'] == 'AQ': PGOPT['IFCNT'] = 1 + if PGOPT['IFCNT']: break + + return i + +# +# validate and get info from a single input file +# +def read_one_infile(infile): + + dsid = params['DS'] + del params['DS'] + if OPTS['DS'][2]&2: OPTS['DS'][2] &= ~2 + if 'DS' in CMDOPTS: del CMDOPTS['DS'] + clean_input_values() + process_infile(infile) + if 'DS' in params: dsid = params['DS'] + if dsid: validate_one_infile(infile, dsid) + + return dsid + +# +# gather input option values from one input file +# +# return 0 if nothing retireved if table is not null +# +def process_infile(infile, table = None): + + if not op.exists(infile): PgLOG.pglog(infile + ": Input file not exists", PGOPT['extlog']) + if table: + PgLOG.pglog("Gather '{}' information from input file '{}'..." .format(table, infile), PGOPT['wrnlog']) + else: + PgLOG.pglog("Gather information from input file '{}'...".format(infile), PGOPT['wrnlog']) + + try: + fd = open(infile, 'r') + except Exception as e: + PgLOG.pglog("{}: Error Open input file - {}!".format(infile, str(e)), PGOPT['extlog']) + else: + lines = fd.readlines() + fd.close() + + opt = None + columns = [] + chktbl = 1 if table else -1 + mpes = r'^(\w+)\s*{}\s*(.*)$'.format(params['ES']) + mpao = r'^(\w+)\s*{}'.format(params['AO']) + # column count, column index, value count, value index, line index, option-set count, end divider flag + colcnt = colidx = valcnt = validx = linidx = setcnt = enddiv = 0 + for line in lines: + linidx += 1 + if linidx%50000 == 0: + PgLOG.pglog("{}: {} lines read".format(infile, linidx), PGOPT['wrnlog']) + if 'NT' not in params: line = PgLOG.pgtrim(line, 2) + if not line: + if opt: set_option_value(opt, '', 1, linidx, line, infile) + continue # skip empty lines + if chktbl > 0: + if re.match(r'^\[{}\]$'.format(table), line, re.I): # found entry for table + chktbl = 0 + clean_input_values() # clean previously saved input values + continue + else: + ms = re.match(r'^\[(\w+)\]$', line) + if ms: + if chktbl == 0: break # stop at next sub-title + if not PGOPT['MSET']: + input_error(linidx, line, infile, ms.group(1) + ": Cannt process sub-title") + elif PGOPT['CACT'] != PGOPT['MSET']: + input_error(linidx, line, infile, "Use Action -{} to Set multiple sub-titles".format(PGOPT['MSET'])) + break # stop getting info if no table given or a different table + + if colcnt == 0: # check single value and action lines first + ms = re.match(mpes, line) + if ms: # one value assignment + key = ms.group(1).strip() + val = ms.group(2) + if val and 'NT' not in params: val = val.strip() + opt = get_option_key(key, 1, 0, linidx, line, infile, table) + set_option_value(opt, val, 0, linidx, line, infile) + if not OPTS[opt][2]&PGOPT['TXTBIT']: opt = None + setcnt += 1 + continue + + ms = re.match(mpao, line) + if ms: # set mode or action option + key = get_option_key(ms.group(1).strip(), 4, 0, linidx, line, infile, table) + set_option_value(key, '', 0, linidx, line, infile) + setcnt += 1 + continue + + # check mutiple value assignment for one or more multi-value options + values = line.split(params['DV']) + valcnt = len(values) + if colcnt == 0: + while colcnt < valcnt: + key = values[colcnt].strip() + if not key: break + opt = get_option_key(key, 2, 1, linidx, line, infile, table) + if not opt: break + columns.append(opt) + if opt in params: del params[opt] + colcnt += 1 + if colcnt < valcnt: + if colcnt == (valcnt-1): + enddiv = 1 + else: + input_error(linidx, line, infile, "Multi-value Option Name missed for column {}".format(colcnt+1)) + opt = None + continue + + elif valcnt == 1: + if re.match(mpes, line): + input_error(linidx, line, infile, "Cannot set single value option after Multi-value Options") + elif re.match(mpao, line): + input_error(linidx, line, infile, "Cannot set acttion/mode option after Multi-value Options") + + if opt: # add to multipe line value + val = values.pop(0) + valcnt -= 1 + if val and 'NT' not in params: val = val.strip() + set_option_value(opt, val, 1, linidx, line, infile) + setcnt += 1 + if valcnt == 0: continue # continue to check multiple line value + colidx += 1 + opt = None + + reduced = 0 + valcnt += colidx + if valcnt > colcnt: + if enddiv: + val = values.pop() + if not val.strip(): + valcnt -= 1 + reduced = 1 + if valcnt > colcnt: + input_error(linidx, line, infile, "Too many values({}) provided for {} columns".format(valcnt+colidx, colcnt)) + + if values: + for val in values: + opt = columns[colidx] + colidx += 1 + if val and 'NT' not in params: val = val.strip() + set_option_value(opt, val, 0, linidx, line, infile) + setcnt += 1 + colidx += (reduced-enddiv) + + if colidx == colcnt: + colidx = 0 # done with gathering values of a multi-value line + opt = None + elif opt and not OPTS[opt][2]&PGOPT['TXTBIT']: + colidx += 1 + opt = None + + if setcnt > 0: + if colidx: + if colidx < colcnt: + input_error(linidx, '', infile, "{} of {} values missed".format(colcnt-colidx, colcnt)) + elif enddiv: + input_error(linidx, '', infile, "Miss end divider '{}'".format(params['DV'])) + return 1 # read something + else: + if table: PgLOG.pglog("No option information found for '{}'".format(table), PgLOG.WARNLG) + return 0 # read nothing + +# +# clean params for input option values when set mutiple tables +# +def clean_input_values(): + + global INOPTS + # clean previously saved input values if any + for opt in INOPTS: + del params[opt] + INOPTS = {} + +# +# build a hash record for add or update of a table record +# +def build_record(flds, pgrec, tname, idx = 0): + + record = {} + if not flds: return record + + hash = TBLHASH[tname] + + for key in flds: + if key not in hash: continue + opt = hash[key][0] + field = hash[key][3] if len(hash[key]) == 4 else hash[key][1] + ms = re.search(r'\.(.+)$', field) + if ms: field = ms.group(1) + if opt in params: + if OPTS[opt][0] == 1: + val = params[opt] + else: + if OPTS[opt][2]&2 and pgrec and field in pgrec and pgrec[field]: continue + val = params[opt][idx] + sval = pgrec[field] if pgrec and field in pgrec else None + if sval is None: + if val == '': val = None + elif isinstance(sval, int): + if isinstance(val, str): val = (int(val) if val else None) # change '' to None for int + if PgUtil.pgcmp(sval, val, 1): record[field] = val # record new or changed value + + return record + +# +# set global variable PGOPT['UID'] with value of user.uid, fatal if unsuccessful +# +def set_uid(aname): + + set_email_logact() + + if 'LN' not in params: + params['LN'] = PgLOG.PGLOG['CURUID'] + elif params['LN'] != PgLOG.PGLOG['CURUID']: + params['MD'] = 1 # make sure this set if running as another user + if 'NE' not in params: PgLOG.PGLOG['EMLADDR'] = params['LN'] + if 'DM' in params and re.match(r'^(start|begin)$', params['DM'], re.I): + msg = "'{}' must start Daemon '{} -{}' as '{}'".format(PgLOG.PGLOG['CURUID'], aname, PGOPT['CACT'], params['LN']) + else: + msg = "'{}' runs '{} -{}' as '{}'!".format(PgLOG.PGLOG['CURUID'], aname, PGOPT['CACT'], params['LN']) + PgLOG.pglog(msg, PGOPT['wrnlog']) + PgLOG.set_specialist_environments(params['LN']) + + if 'LN' not in params: PgLOG.pglog("Could not get user login name", PGOPT['extlog']) + + validate_dataset() + if OPTS[PGOPT['CACT']][2] > 0: validate_dsowner(aname) + + pgrec = PgDBI.pgget("dssdb.user", "uid", "logname = '{}' AND until_date IS NULL".format(params['LN']), PGOPT['extlog']) + if not pgrec: PgLOG.pglog("Could not get user.uid for " + params['LN'], PGOPT['extlog']) + PGOPT['UID'] = pgrec['uid'] + + open_output(params['OF'] if 'OF' in params else None) + +# +# set global variable PGOPT['UID'] as 0 for a sudo user +# +def set_sudo_uid(aname, uid): + + set_email_logact() + + if PgLOG.PGLOG['CURUID'] != uid: + if 'DM' in params and re.match(r'^(start|begin)$', params['DM'], re.I): + msg = "'{}': must start Daemon '{} -{} as '{}'".format(PgLOG.PGLOG['CURUID'], aname, params['CACT'], uid) + else: + msg = "'{}': must run '{} -{}' as '{}'".format(PgLOG.PGLOG['CURUID'], aname, params['CACT'], uid) + PgLOG.pglog(msg, PGOPT['extlog']) + + PGOPT['UID'] = 0 + params['LN'] = PgLOG.PGLOG['CURUID'] + +# +# set global variable PGOPT['UID'] as 0 for root user +# +def set_root_uid(aname): + + set_email_logact() + + if PgLOG.PGLOG['CURUID'] != "root": + if 'DM' in params and re.match(r'^(start|begin)$', params['DM'], re.I): + msg = "'{}': you must start Daemon '{} -{} as 'root'".format(PgLOG.PGLOG['CURUID'], aname, params['CACT']) + else: + msg = "'{}': you must run '{} -{}' as 'root'".format(PgLOG.PGLOG['CURUID'], aname, params['CACT']) + PgLOG.pglog(msg, PGOPT['extlog']) + + PGOPT['UID'] = 0 + params['LN'] = PgLOG.PGLOG['CURUID'] + +# +# set email logging bits +# +def set_email_logact(): + + if 'NE' in params: + PgLOG.PGLOG['LOGMASK'] &= ~PgLOG.EMLALL # remove all email bits + elif 'SE' in params: + PgLOG.PGLOG['LOGMASK'] &= ~PgLOG.EMLLOG # no normal email + +# +# validate dataset owner +# +# return: 0 or fatal if not valid, 1 if valid, -1 if can not be validated +# +def validate_dsowner(aname, dsid = None, logname = None, pgds = 0, logact = 0): + + if not logname: logname = (params['LN'] if 'LN' in params else PgLOG.PGLOG['CURUID']) + if logname == PgLOG.PGLOG['RDAUSER']: return 1 + + dsids = {} + if dsid: + dsids[dsid] = 1 + elif 'DS' in params: + if OPTS['DS'][0] == 2: + for dsid in params['DS']: + dsids[dsid] = 1 + else: + dsids[params['DS']] = 1 + else: + return -1 + + if not pgds and 'MD' in params: pgds = 1 + if not logact: logact = PGOPT['extlog'] + + for dsid in dsids: + if not PgDBI.pgget("dsowner", "", "dsid = '{}' AND specialist = '{}'".format(dsid, logname), PGOPT['extlog']): + if not PgDBI.pgget("dssgrp", "", "logname = '{}'".format(logname), PGOPT['extlog']): + return PgLOG.pglog("'{}' is not DSS Specialist!".format(logname), logact) + elif not pgds: + return PgLOG.pglog("'{}' not listed as Specialist of '{}'\nRun '{}' with Option -MD!".format(logname, dsid, aname), logact) + + return 1 + +# +# validate dataset +# +def validate_dataset(): + + cnt = 1 + if 'DS' in params: + if OPTS['DS'][0] == 2: + for dsid in params['DS']: + cnt = PgDBI.pgget("dataset", "", "dsid = '{}'".format(dsid), PGOPT['extlog']) + if cnt == 0: break + else: + dsid = params['DS'] + cnt = PgDBI.pgget("dataset", "", "dsid = '{}'".format(dsid), PGOPT['extlog']) + + if not cnt: PgLOG.pglog(dsid + " not exists in RDADB!", PGOPT['extlog']) + +# +# validate given group indices or group names +# +def validate_groups(parent = 0): + + if parent: + gi = 'PI' + gn = 'PN' + else: + gi = 'GI' + gn = 'GN' + if (OPTS[gi][2]&8): return # already validated + + dcnd = "dsid = '{}'".format(params['DS']) + if gi in params: + grpcnt = len(params[gi]) + i = 0 + while i < grpcnt: + gidx = params[gi][i] + if not isinstance(gidx, int) and re.match(r'^(!|<|>|<>)$', gidx): break + i += 1 + if i >= grpcnt: # normal group index given + for i in range(grpcnt): + gidx = params[gi][i] + gidx = int(gidx) if gidx else 0 + params[gi][i] = gidx + if gidx == 0 or (i > 0 and gidx == params[gi][i-1]): continue + if not PgDBI.pgget("dsgroup", '', "{} AND gindex = {}".format(dcnd, gidx), PGOPT['extlog']): + if i > 0 and parent and params['GI']: + j = 0 + while j < i: + if gidx == params['GI'][j]: break + j += 1 + if j < i: continue + PgLOG.pglog("Group Index {} not in RDADB for {}".format(gidx, params['DS']), PGOPT['extlog']) + else: # found none-equal condition sign + pgrec = PgDBI.pgmget("dsgroup", "DISTINCT gindex", dcnd + PgDBI.get_field_condition("gindex", params[gi]), PGOPT['extlog']) + grpcnt = (len(pgrec['gindex']) if pgrec else 0) + if grpcnt == 0: + PgLOG.pglog("No Group matches given Group Index condition for " + params['DS'], PGOPT['extlog']) + + params[gi] = pgrec['gindex'] + elif gn in params: + params[gi] = group_id_to_index(params[gn]) + + OPTS[gi][2] |= 8 # set validated flag + +# +# get group index array from given group IDs +# +def group_id_to_index(grpids): + + count = len(grpids) if grpids else 0 + if count == 0: return None + + indices = [] + dcnd = "dsid = '{}'".format(params['DS']) + i = 0 + while i < count: + gid = grpids[i] + if gid and (re.match(r'^(!|<|>|<>)$', gid) or gid.find('%') > -1): break + i += 1 + if i >= count: # normal group id given + for i in range(count): + gid = grpids[i] + if not gid: + indices.append(0) + elif i and gid == grpids[i-1]: + indices.append(indices[i-1]) + else: + pgrec = PgDBI.pgget("dsgroup", "gindex", "{} AND grpid = '{}'".format(dcnd, gid), PGOPT['extlog']) + if not pgrec: PgLOG.pglog("Group ID {} not in RDADB for {}".format(gid, params['DS']), PGOPT['extlog']) + indices.append(pgrec['gindex']) + return indices + else: # found wildcard and/or none-equal condition sign + pgrec = PgDBI.pgmget("dsgroup", "DISTINCT gindex", dcnd + PgDBI.get_field_condition("grpid", grpids, 1), PGOPT['extlog']) + count = (len(pgrec['gindex']) if pgrec else 0) + if count == 0: PgLOG.pglog("No Group matches given Group ID condition for " + params['DS'], PGOPT['extlog']) + return pgrec['gindex'] + +# +# get group ID array from given group indices +# +def group_index_to_id(indices): + + count = len(indices) if indices else 0 + if count == 0: return None + + grpids = [] + dcnd = "dsid = '{}'".format(params['DS']) + i = 0 + while i < count: + gidx = indices[i] + if not isinstance(gidx, int) and re.match(r'^(!|<|>|<>)$', gidx): break + i += 1 + if i >= count: # normal group index given + for i in range(count): + gidx = indices[i] + if not gidx: + grpids.append('') # default value + elif i and gidx == indices[i-1]: + grpids.append(grpids[i-1]) + else: + pgrec = PgDBI.pgget("dsgroup", "grpid", "{} AND gindex = {}".format(dcnd, gidx), PGOPT['extlog']) + if not pgrec: PgLOG.pglog("Group Index {} not in RDADB for {}".format(gidx, params['DS']), PGOPT['extlog']) + grpids.append(pgrec['grpid']) + return grpids + else: # found none-equal condition sign + pgrec = PgDBI.pgmget("dsgroup", "DISTINCT grpid", dcnd + PgDBI.get_field_condition("gindex", indices), PGOPT['extlog']) + count = (len(pgrec['grpid']) if pgrec else 0) + if count == 0: PgLOG.pglog("No Group matches given Group Index condition for " + params['DS'], PGOPT['extlog']) + return pgrec['grpid'] + +# +# validate order fields and +# get an array of order fields that are not in given fields +# +def append_order_fields(oflds, flds, tname, excludes = None): + + orders = '' + hash = TBLHASH[tname] + for ofld in oflds: + ufld = ofld.upper() + if ufld not in hash or excludes and excludes.find(ufld) > -1: continue + if flds and flds.find(ufld) > -1: continue + orders += ofld + + return orders + +# +# validate mutiple values for given fields +# +def validate_multiple_values(tname, count, flds = None): + + opts = [] + hash = TBLHASH[tname] + if flds: + for fld in flds: + if fld in hash: opts.append(hash[fld][0]) + else: + for fld in hash: + opts.append(hash[fld][0]) + + validate_multiple_options(count, opts, (1 if tname == 'htarfile' else 0)) + +# +# validate multiple values for given options +# +def validate_multiple_options(count, opts, remove = 0): + + for opt in opts: + if opt not in params or OPTS[opt][0] != 2: continue # no value given or not multiple value option + cnt = len(params[opt]) + if cnt == 1 and count > 1 and OPTS[opt][2]&1: + val0 = params[opt][0] + params[opt] = [val0]*count + OPTS[opt][2] |= 4 # expanded + cnt = count + if cnt != count: + if count == 1 and cnt > 1 and OPTS[opt][2]&PGOPT['TXTBIT']: + params[opt][0] = ' '.join(params[opt]) + elif remove and cnt == 1 and count > 1: + del params[opt] + elif cnt < count: + PgLOG.pglog("Multi-value Option {}({}): {} Given and {} needed".format(opt, OPTS[opt][1], cnt, count), PGOPT['extlog']) + +# +# get field keys for a RDADB table, include all if !include +# +def get_field_keys(tname, include = None, exclude = None): + + fields = '' + hash = TBLHASH[tname] + + for fld in hash: + if include and include.find(fld) < 0: continue + if exclude and exclude.find(fld) > -1: continue + opt = hash[fld][0] + if opt in params: fields += fld + + return fields if fields else None + +# +# get a string for fields of a RDADB table +# +def get_string_fields(flds, tname, include = None, exclude = None): + + fields = [] + hash = TBLHASH[tname] + + for fld in flds: + ufld = fld.upper() # in case + if include and include.find(ufld) < 0: continue + if exclude and exclude.find(ufld) > -1: continue + if ufld not in hash: + PgLOG.pglog("Invalid field '{}' to get from '{}'".format(fld, tname), PGOPT['extlog']) + elif hash[ufld][0] not in OPTS: + PgLOG.pglog("Option '{}' is not defined for field '{} - {}'".format(hash[ufld][0], ufld, hash[ufld][1]), PGOPT['extlog']) + if len(hash[ufld]) == 4: + fname = "{} {}".format(hash[ufld][3], hash[ufld][1]) + else: + fname = hash[ufld][1] + fields.append(fname) + + return ', '.join(fields) + +# +# get max count for given options +# +def get_max_count(opts): + + count = 0 + for opt in opts: + if opt not in params: continue + cnt = len(params[opt]) + if cnt > count: count = cnt + + return count + +# +# get a string of fields of a RDADB table for sorting +# +def get_order_string(flds, tname, exclude = None): + + orders = [] + hash = TBLHASH[tname] + + for fld in flds: + if fld.islower(): + desc = " DESC" + fld = fld.upper() + else: + desc = "" + if exclude and exclude.find(fld) > -1: continue + orders.append(hash[fld][1] + desc) + + return (" ORDER BY " + ', '.join(orders)) if orders else '' + +# +# get a string for column titles of a given table +# +def get_string_titles(flds, hash, lens): + + titles = [] + colcnt = len(flds) + for i in range(colcnt): + fld = flds[i] + if fld not in hash: continue + opt = hash[fld][0] + if opt not in OPTS: PgLOG.pglog("ERROR: Undefined option " + opt, PGOPT['extlog']) + title = OPTS[opt][1] + if lens: + if len(title) > lens[i]: title = opt + title = "{:{}}".format(title, lens[i]) + titles.append(title) + + return params['DV'].join(titles) + params['DV'] + +# +# display error message and exit +# +def parameter_error(p, opt = None, lidx = 0, line = 0, infile = None): + + if not opt: + errmsg = "value passed in without leading info option" + elif opt == "continue": + errmsg = "error processing input file on continue Line" + elif opt == 'specified': + errmsg = "option -{}/-{} is specified already".format(p, OPTS[p][1]) + elif opt == "mixed": + errmsg = "single-value option mixed with multi-value option" + elif opt == "missact": + errmsg = "No Action Option is specified" + elif opt == "missval": + errmsg = "No value provided following Info Option" + elif opt == 'duplicate': + errmsg = "multiple actions not allowed" + elif opt == "delayed": + errmsg = "delayed Mode option not supported" + elif OPTS[opt][0] == 0: + errmsg = "value follows Mode Option -{}/-{}".format(opt, OPTS[opt][1]) + elif OPTS[opt][0] == 1: + errmsg = "multiple values follow single-value Option -{}/-{}".format(opt, OPTS[opt][1]) + elif OPTS[opt][0] >= 4: + errmsg = "value follows Action Option -{}/-{}".format(opt, OPTS[opt][1]) + else: + errmsg = None + + if errmsg: + if lidx: + input_error(lidx, line, infile, "{} - {}".format(p, errmsg)) + else: + PgLOG.pglog("ERROR: {} - {}".format(p, errmsg), PGOPT['extlog']) + +# +# wrap function to PgLOG.pglog() for error in input files +# +def input_error(lidx, line, infile, errmsg): + + PgLOG.pglog("ERROR at {}({}): {}\n {}".format(infile, lidx, line, errmsg), PGOPT['extlog']) + +# +# wrap function to PgLOG.pglog() for error for action +# +def action_error(errmsg, cact = None): + + msg = "ERROR" + if PGOPT['ANAME']: msg += " " + PGOPT['ANAME'] + if not cact: cact = PGOPT['CACT'] + if cact: msg += " for Action {} ({})".format(cact, OPTS[cact][1]) + + if 'DS' in params: + if OPTS['DS'][0] == 1: + msg += " of " + params['DS'] + elif OPTS['DS'][0] == 2 and len(params['DS']) == 1: + msg += " of " + params['DS'][0] + + msg += ": " + errmsg + if PgLOG.PGLOG['DSCHECK']: PgDBI.record_dscheck_error(msg) + PgLOG.pglog(msg, PGOPT['extlog']) + +# +# get the valid option for given parameter by checking if the given option +# name matches either an valid option key (short name) or its long name +# flag: 1 - value key only, 2 - multi-value key only, 3 - action key only, +# 4 - mode&action key only +# +def get_option_key(p, flag = 0, skip = 0, lidx = 0, line = None, infile = None, table = None): + + if p is None: p = '' + opt = get_short_option(p) + errmsg = None + if opt: + if flag == 1: + if OPTS[opt][0]&3 == 0: errmsg = "NOT a Value Option" + elif flag == 2: + if OPTS[opt][0]&2 == 0: errmsg = "NOT a Multi-Value Option" + elif flag == 3: + if OPTS[opt][0] < 4: + if lidx: + errmsg = "NOT an Action Option" + else: + errmsg = "Miss leading '-' for none action option" + elif flag == 4: + if OPTS[opt][0]&3: + errmsg = "NOT a Mode/Action Option" + if errmsg: errmsg = "{}({}) - {}".format(opt, OPTS[opt][1], errmsg) + elif not skip: + if p: + errmsg = "-{} - Unknown Option".format(p) + else: + errmsg = "'' - Empty Option Name" + + if errmsg: + if lidx: + input_error(lidx, line, infile, errmsg) + else: + PgLOG.pglog("ERROR: " + errmsg, PGOPT['extlog']) + elif opt and (table or PGOPT['IFCNT'] and OPTS[opt][0] == 2): + INOPTS[opt] = 1 + + return opt + +# +# set values to given options, ignore options set in input files if the options +# already set on command line +# +def set_option_value(opt, val = None, cnl = 0, lidx = 0, line = None, infile = None): + + if opt in CMDOPTS and lidx: # in input file, but given on command line already + if opt not in params: params[opt] = CMDOPTS[opt] + return + + if val is None: val = '' + if OPTS[opt][0]&3: + if OPTS[opt][2]&16: + if not val: + val = 0 + elif re.match(r'^\d+$', val): + val = int(val) + elif val and (opt == 'DS' or opt == 'OD'): + val = PgUtil.format_dataset_id(val) + + errmsg = None + if not cnl and OPTS[opt][0]&3: + if opt in params: + if OPTS[opt][0] == 2: + if OPTS[opt][2]&2: del params[opt] # clean auto set values + elif params[opt] != val and not OPTS[opt][2]&1: + errmsg = "'{}', multiple values not allowed for Single-Value Option".format(val) + if not errmsg and (not PGOPT['CACT'] or OPTS[PGOPT['CACT']][2]): + dstr = OPTS[opt][3] if len(OPTS[opt]) > 3 else None + if dstr: + vlen = len(val) + ms = re.match(r'^!(\w*)', dstr) + if ms: + dstr = ms.group(1) + if vlen == 1 and dstr.find(val) > -1: errmsg = "{}: character must not be one of '{}'".format(val, str) + elif vlen > 1 or (vlen == 0 and not OPTS[opt][2]&128) or (vlen == 1 and dstr.find(val) < 0): + errmsg = "{} single-letter value must be one of '{}'".format(val, dstr) + + if not errmsg: + if OPTS[opt][0] == 2: # multiple value option + if opt not in params: + params[opt] = [val] # set the first value + if opt == 'QF' and PGOPT['ACTS'] == OPTS['DL'][0]: OPTS['FS'][3] = 'ANT' + else: + if cnl: + rowidx = len(params[opt]) - 1 + if params[opt][rowidx]: + if not re.match(r'^(DE|DI|DM|DW)$', opt): + errmsg = "Multi-line value not allowed" + else: + params[opt][rowidx] += "\n" + val # multiple line value + else: + params[opt][rowidx] = val + else: + params[opt].append(val) # add next value + elif OPTS[opt][0] == 1: # single value option + if cnl and opt in params: + if val: errmsg = "Multi-line value not allowed" + elif OPTS[opt][2]&2 and PgUtil.pgcmp(params[opt], val): + errmsg = "{}: Single-Value Info Option has value '{}' already".format(val, params[opt]) + else: + params[opt] = val + OPTS[opt][2] |= 2 + elif val: + if OPTS[opt][0] == 0 and re.match(r'^(Y|N)$', val, re.I): + params[opt] = 1 if (val == 'Y' or val == 'y') else 0 + else: + parameter_error(val, opt, lidx, line, infile) # no value for flag or action options + elif opt not in params: + params[opt] = 1 # set flag or action option + if OPTS[opt][0] > 2: + if PGOPT['ACTS']: parameter_error(opt, "duplicate", lidx ,line, infile) # no duplicated action options + PGOPT['ACTS'] = OPTS[opt][0] # add action bit + PGOPT['CACT'] = opt # add action name + if opt == "SB": PGOPT['MSET'] = opt + + if errmsg: + if lidx: + input_error(lidx, line, infile, "{}({}) - {}".format(opt, OPTS[opt][1], errmsg)) + else: + PgLOG.pglog("ERROR: {}({}) - {}".format(opt, OPTS[opt][1], errmsg), PGOPT['extlog']) + + if not lidx: CMDOPTS[opt] = params[opt] # record options set on command lines + +# +# get width for a single row if in column format +# +def get_row_width(pgrec): + + slen = len(params['DV']) + width = 0 + for key in pgrec: + wd = 0 + for val in pgrec[key]: + if not val: continue + if not isinstance(val, str): val = str(val) + if key == 'note': + vlen = val.find('\n') + 1 + else: + vlen = 0 + if vlen < 1: vlen = len(val) + if vlen > wd: wd = vlen # get max width of each column + + # accumulate all column width plus length of delimiter to get row width + if width: width += slen + width += wd + + return width + +# +# get a short option name by searching dict OPTS and ALIAS +# +def get_short_option(p): + + plen = len(p) + if plen == 2: + p = p.upper() + if p in OPTS: return p + + for opt in OPTS: # get main option first + if not PgUtil.pgcmp(OPTS[opt][1], p, 1): return opt + + for opt in ALIAS: # then check alias option + for key in ALIAS[opt]: + if not PgUtil.pgcmp(key, p, 1): return opt + + return None + +# +# print result in column format, with multiple values each row +# +def print_column_format(pgrec, flds, hash, lens, retbuf = 0): + + rowcnt = -1 + colcnt = len(flds) + buf = '' + fields = [] + flens = [] + for i in range(colcnt): + fld = flds[i] + if fld in hash: + fld = hash[fld][1] + ms = re.search(r'\.(.+)$', fld) + if ms: fld = ms.group(1) + if fld in pgrec: + fields.append(fld) + flens.append((lens[i] if lens else 0)) + if rowcnt < 0: rowcnt = len(pgrec[fld]) + else: + PgLOG.pglog(fld + ": Unkown field name", PGOPT['extlog']) + + colcnt = len(fields) + for i in range(rowcnt): + offset = 0 + values = [] + for j in range(colcnt): + fld = fields[j] + idx = -1 + val = pgrec[fld][i] + slen = flens[j] + if val is None: + val = '' + elif isinstance(val, str): + idx = val.find("\n") + if idx > 0: + val = "\n" + val + idx = 0 + else: + val = str(val) + if slen: + if idx < 0: + val = "{:{}}".format(val, slen) + else: + val += "\n{:{}}".format(' ', offset) + offset += slen + values.append(val) + line = params['DV'].join(values) + params['DV'] + "\n" + if retbuf: + buf += line + else: + OUTPUT.write(line) + + return buf if retbuf else rowcnt + +# +# print result in row format, with single value on each row +# +def print_row_format(pgrec, flds, hash): + + for fld in flds: + if fld not in hash: continue + line = "{}{}".format(OPTS[hash[fld][0]][1], params['ES']) + field = hash[fld][1] + ms = re.search(r'\.(.+)$', field) + if ms: field = ms.group(1) + if field in pgrec: + value = pgrec[field] + if value is not None: line += str(value) + OUTPUT.write(line + "\n") + +# +# compress/uncompress given files and change the formats accordingly +# +def compress_files(files, formats, count): + + if 'UZ' in params: + strcmp = 'Uncompress' + actcmp = 0 + else: + strcmp = 'Compress' + actcmp = 1 + fmtcnt = len(formats) + if not fmtcont: return files # just in case + s = 's' if count > 1 else '' + PgLOG.pglog("{}ing {} File{} for {} ...".format(strcmp, count, s, params['DS']), PGOPT['wrnlog']) + cmpcnt = 0 + for i in range(count): + fmt = formats[i] if(i < fntcmt and formats[i]) else formats[0] + (ofile, fmt) = PgFile.compress_local_file(files[i], fmt, cmpact, MUYOPT['extlog']) + if ofiles != files[i]: + files[i] = ofile + cmpcnt += 1 + + PgLOG.pglog("{}/{} Files {}ed for {}".format(cmpcnt, count, strcmp, params['DS']) , PGOPT['emllog']) + + if 'ZD' in params: del params['ZD'] + if 'UZ' in params: del params['UZ'] + + return files + +# +# get hash condition +# tname - table name to identify a table hash +# noand - 1 for not add leading 'AND' +# +def get_hash_condition(tname, include = None, exclude = None, noand = 0): + + condition = '' + hash = TBLHASH[tname] + + for key in hash: + if include and include.find(key) < 0: continue + if exclude and exclude.find(key) > -1: continue + opt = hash[key][0] + if opt not in params: continue # no option value + flg = hash[key][2] + if flg < 0: # condition is ignore for this option + PgLOG.pglog("Condition given per Option -{} (-{}) is ignored".format(opt, OPTS[opt][1]), PGOPT['errlog']) + continue + + fld = hash[key][1] + condition += PgDBI.get_field_condition(fld, params[opt], flg, noand) + noand = 0 + + return condition + +# +# set default params value for given opt empty the value if 'all' is given +# +def set_default_value(opt, dval = None): + + flag = OPTS[opt][0] + if flag&3 == 0: return # skip if not single&multiple value options + + oval = 0 + if opt in params: + if flag == 1: + oval = params[opt] + else: + count = len(params[opt]) + if count == 1: + oval = params[opt][0] + elif count > 1: + return # multiple values given already + + if oval: + if re.match(r'^all$', oval, re.I): + del params[opt] # remove option value for all + return # value given already + + if dval: + # set default value + if flag == 1: + params[opt] = dval + else: + params[opt] = [dval] + +# +# add/strip COS block for give file name and cosflg if given/not-given cosfile +# return the file size after the convertion +# +def cos_convert(locfile, cosflg, cosfile = None): + + if cosfile: + cmd = "cosconvert -{} {} {}".format(cosflg, cosfile, locfile) + else: + cmd = "cosconvert -{} {}".format(cosflg.lower(), locfile) + cosfile = locfile + + PgLOG.pgsystem(cmd) + info = PgFile.check_local_file(cosfile) + if not info: + return PgLOG.pglog("Error - " + cmd, PGOPT['errlog']) # should not happen + else: + return info['data_size'] + +# +# evaluate count of values for given options +# +def get_option_count(opts): + + count = 0 + for opt in opts: + if opt in params: + cnt = len(params[opt]) + if cnt > count: count = cnt + if count > 0: validate_multiple_options(count, opts) + + return count + +# +# gather subgroup indices recursively for given condition +# dsid: Dataset Id +# pidx: parent group index +# gtype: group type if not empty, P - public groups only) +# +# Return: array reference of group indices +# +def get_all_subgroups(dcnd, pidx, gtype = None): + + gidxs = [pidx] + gflds = "gindex" + if gtype: gflds += ", grptype" + grecs = PgDBI.pgmget("dsgroup", gflds, "{} and pindex = {}".format(dcnd, pidx), PgLOG.LGWNEX) + if not grecs: return gidxs + + gcnt = len(grecs['gindex']) + for i in range(gcnt): + gidx = grecs['gindex'][i] + if abs(gidx) <= abs(pidx) or gtype and grecs['grptype'][i] != gtype: continue + subs = get_all_subgroups(dcnd, gidx, gtype) + gidxs.extend(subs) + + return gidxs + +# +# gather public subgroup indices recursively for given condition. A group index is +# gathered only if there are data files right under it. The pidx is included too +# if file count of it larger then zero. +# dsid: Dataset Id +# pidx: parent group index +# cfld: count field (dwebcnt, nwebcnt, savedcnt) +# pfcnt: file count for parent group index pidx 0 to skip) +# +# Return: array reference of group indices +# +def get_data_subgroups(dcnd, pidx, cfld, pfcnt = 0): + + if not pfcnt: # get file count for the parent group + pfcnt = group_file_count(dcnd, pidx, cfld) + if not pfcnt: return None + + gflds = "gindex, " + cfld + gcnd = "{} AND pindex = {} AND {} > 0".format(dcnd, pidx, cfld) + grecs = PgDBI.pgmget("dsgroup", gflds, gcnd, PgLOG.LGWNEX) + if not grecs: return ([pidx] if pfcnt > 0 else None) + + gcnt = len(grecs['gindex']) + gidxs = [] + for i in range(gcnt): + gidx = grecs['gindex'][i] + fcnt = grecs[cfld][i] + if fcnt == 0 or abs(gidx) <= abs(pidx): continue + subs = get_data_subgroups(dcnd, gidx, cfld, fcnt) + if subs: gidxs.extend(subs) + pfcnt -= fcnt + if pfcnt > 0: gidxs.insert(0, pidx) + + return (gidxs if gidxs else None) + +# +# get group file count for given count field name +# +def group_file_count(cnd, gidx, cfld): + + if gidx: + table = "dsgroup" + cnd += " AND gindex = {}".format(gidx) + else: + table = "dataset" + pgrec = PgDBI.pgget(table, cfld, cnd) + + return (pgrec[cfld] if pgrec else 0) + +# +# set file format for actions -AM/-AW from given local files +# +def set_file_format(count): + + if 'LF' in params: + files = params['LF'] + else: + return + + fmtcnt = 0 + fmts = [None] * count + for i in range(count): + fmt = PgFile.get_file_format(files[i]) + if fmt: + fmtcnt += 1 + fmts[i] = fmt + + if fmtcnt: + params['AF'] = fmts + OPTS['AF'][2] |= 2 + +# +# get frequency information +# +def get_control_frequency(frequency): + + val = nf = 0 + unit = None + ms = re.match(r'^(\d+)([YMWDHNS])$', frequency, re.I) + if ms: + val = int(ms.group(1)) + unit = ms.group(2).upper() + else: + ms = re.match(r'^(\d+)M/(\d+)', frequency, re.I) + if ms: + val = int(ms.group(1)) + nf = int(ms.group(2)) + unit = 'M' + if nf < 2 or nf > 10 or (30%nf): val = 0 + + if not val: + if nf: + unit = "fraction of month frequency '{}' MUST be (2,3,5,6,10)".format(frequency) + elif unit: + val = "frequency '{}' MUST be larger than 0".format(frequency) + elif re.search(r'/(\d+)$', frequency): + val = "fractional frequency '{}' for month ONLY".format(frequency) + else: + val = "invalid frequency '{}', unit must be (Y,M,W,D,H)".format(frequency) + return (None, unit) + + freq = [0]*7 # initialize the frequence list + uidx = {'Y' : 0, 'D' : 2, 'H' : 3, 'N' : 4, 'S' : 5} + if unit == 'M': + freq[1] = val + if nf: freq[6] = nf # number of fractions in a month + elif unit == 'W': + freq[2] = 7 * val + elif unit in uidx: + freq[uidx[unit]] = val + + return (freq, unit) + +# +# check if valid data time for given pindex +# +def valid_data_time(pgrec, cstr = None, logact = 0): + + if pgrec['pindex'] and pgrec['datatime']: + (freq, unit) = get_control_frequency(pgrec['frequency']) + if not freq: + if cstr: PgLOG.pglog("{}: {}".format(cstr, unit), logact) + return PgLOG.FAILURE + + dtime = PgUtil.adddatetime(pgrec['datatime'], freq[0], freq[1], freq[2], freq[3], freq[4], freq[5], freq[6]) + if PgDBI.pgget("dcupdt", "", "cindex = {} AND datatime < '{}'".format(pgrec['pindex'], dtime), PGOPT['extlog']): + if cstr: PgLOG.pglog("{}: MUST be processed After Control Index {}".format(cstr, pgrec['pindex']), logact) + return PgLOG.FAILURE + + return PgLOG.SUCCESS + +# +# publish filelists for given datasets +# +def publish_dataset_filelist(dsids): + + for dsid in dsids: + PgLOG.pgsystem("publish_filelist " + dsid, PGOPT['wrnlog'], 7) + +# +# get the current active version index for given dsid +# +def get_version_index(dsid, logact = 0): + + pgrec = PgDBI.pgget("dsvrsn", "vindex", "dsid = '{}' AND status = 'A'".format(dsid), logact) + + return (pgrec['vindex'] if pgrec else 0) + +# +# append given format (data or archive) sfmt to format string sformat +# +def append_format_string(sformat, sfmt, chkend = 0): + + mp = r'(^|\.){}$' if chkend else r'(^|\.){}(\.|$)' + if sfmt: + if not sformat: + sformat = sfmt + else: + for fmt in re.split(r'\.', sfmt): + if not re.search(mp.format(fmt), sformat, re.I): sformat += '.' + fmt + + return sformat + +# +# get request type string or shared info +# +def request_type(rtype, idx = 0): + + RTYPE = { + 'C' : ["Customized Data", 0], + 'D' : ["CDP Link", 0], + 'M' : ["Delayed Mode Data", 1], + 'N' : ["NCARDAP(THREDDS) Data Server", 0], + 'Q' : ["Database Query", 0], + 'R' : ["Realtime Data", 0], + 'S' : ["Subset Data", 0], + 'T' : ["Subset/Format-Conversion Data", 0], + 'F' : ["Format Conversion Data", 1], # web + 'A' : ["Archive Format Conversion", 1], # web + 'P' : ["Plot Chart", 0], + 'U' : ["Data", 0] + } + + if rtype not in RTYPE: rtype = 'U' + + return RTYPE[rtype][idx] + +# +# email notice of for user +# +def send_request_email_notice(pgrqst, errmsg, fcount, rstat, readyfile = None, pgpart = None): + + pgcntl = PGOPT['RCNTL'] + rhome = params['WH'] if 'WH' in params and params['WH'] else PgLOG.PGLOG['RQSTHOME'] + if errmsg: + if pgpart: + if cache_partition_email_error(pgpart['rindex'], errmsg): return rstat + enote = "email_part_error" + else: + enote = "email_error" + elif fcount == 0: + if pgcntl and pgcntl['empty_out'] == 'Y': + enote = "email_empty" + else: + errmsg = "NO output data generated" + if pgpart: + if cache_partition_email_error(pgpart['rindex'], errmsg): return rstat + enote = "email_part_error" + else: + enote = "email_error" + elif 'EN' in params and params['EN'][0]: + enote = params['EN'][0] + elif pgrqst['enotice']: + enote = pgrqst['enotice'] + elif pgcntl and pgcntl['enotice']: + enote = pgcntl['enotice'] + elif pgrqst['globus_transfer'] == 'Y' and pgrqst['task_id']: + enote = "email_notice_globus" + else: + enote = "email_" + ("command" if pgrqst['location'] else "notice") + + if enote[0] not in '/.': enote = "{}/notices/{}".format(rhome, enote) + + finfo = PgFile.check_local_file(enote, 128) + if not finfo: + if finfo is None: + ferror = "file not exists" + else: + ferror = "Error check file" + else: + ef = open(enote, 'r') # open email notice file + ferror = None + + if ferror: + if errmsg: + PgLOG.pglog("{}: {}\nCannot email error to {}@ucar.edu: {}".format(enote, ferror, PgLOG.PGLOG['CURUID'], errmsg), + (PGOPT['errlog'] if rstat else PGOPT['extlog'])) + return "E" + else: + errmsg = PgLOG.pglog("{}: {}\nCannot email notice to {}".format(enote, ferror, pgrqst['email']), PGOPT['errlog']|PgLOG.RETMSG) + enote = rhome + "/notices/email_error" + ef = open(enote, 'r') + rstat = 'E' + + ebuf = '' + ebuf += ef.read() + ef.close() + + einfo = {} + einfo['HOSTNAME'] = PgLOG.PGLOG['HOSTNAME'] + einfo['DSID'] = pgrqst['dsid'] + einfo['DSSURL'] = PgLOG.PGLOG['DSSURL'] + if pgrqst['location']: + einfo['WHOME'] = pgrqst['location'] + else: + einfo['WHOME'] = PgLOG.PGLOG['RQSTURL'] + einfo['SENDER'] = pgrqst['specialist'] + "@ucar.edu" + einfo['RECEIVER'] = pgrqst['email'] + einfo['RTYPE'] = request_type(pgrqst['rqsttype']) + PgLOG.add_carbon_copy() # clean carbon copy email in case not empty + exclude = (einfo['SENDER'] if errmsg else einfo['RECEIVER']) + if not errmsg and pgcntl and pgcntl['ccemail']: + PgLOG.add_carbon_copy(pgcntl['ccemail'], 1, exclude, pgrqst['specialist']) + if PgLOG.PGLOG['CURUID'] != pgrqst['specialist'] and PgLOG.PGLOG['CURUID'] != PgLOG.PGLOG['RDAUSER']: + PgLOG.add_carbon_copy(PgLOG.PGLOG['CURUID'], 1, exclude) + if 'CC' in params: PgLOG.add_carbon_copy(params['CC'], 0, exclude) + einfo['CCD'] = PgLOG.PGLOG['CCDADDR'] + einfo['RINDEX'] = str(pgrqst['rindex']) + einfo['RQSTID'] = pgrqst['rqstid'] + pgrec = PgDBI.pgget("dataset", "title", "dsid = '{}'".format(pgrqst['dsid']), PGOPT['extlog']) + einfo['DSTITLE'] = pgrec['title'] if pgrec and pgrec['title'] else '' + einfo['SUBJECT'] = '' + if errmsg: + einfo['ERRMSG'] = PgLOG.get_error_command(int(time.time()), PGOPT['errlog']) + errmsg + einfo['SUBJECT'] = "Error " + if pgpart: + einfo['PARTITION'] = " partition" + einfo['PTIDX'] = "(PTIDX{})".format(pgpart['pindex']) + einfo['SUBJECT'] += "Process Partitions of " + else: + einfo['PARTITION'] = einfo['PTIDX'] = '' + einfo['SUBJECT'] += "Build " + einfo['SUBJECT'] += "{} Rqst{} from {}".format(einfo['RTYPE'], pgrqst['rindex'], pgrqst['dsid']) + else: + if fcount == 0: + einfo['SUBJECT'] += "NO Output:" + else: + einfo['SUBJECT'] += "Completed:" + einfo['DAYS'] = str(PGOPT['VP']) + pgrec = PgDBI.pgget("dssgrp", "lstname, fstname, phoneno", + "logname = '{}'".format(PgLOG.PGLOG['CURUID']), PGOPT['extlog']) + if pgrec: + einfo['SPECIALIST'] = "{} {}".format(pgrec['fstname'], pgrec['lstname']) + einfo['PHONENO'] = pgrec['phoneno'] + einfo['SUBJECT'] += " {} {} request {} - {}!".format(pgrqst['dsid'], einfo['RTYPE'], pgrqst['rindex'], pgrqst['email']) + + if pgrqst['note']: + einfo['RNOTE'] = "\nRequest Detail:\n{}\n".format(pgrqst['note']) + elif fcount > 0 and pgrqst['rinfo']: + einfo['RNOTE'] = "\nRequest Detail:\n{}\n".format(pgrqst['rinfo']) + else: + einfo['RNOTE'] = "" + + if pgrqst['globus_transfer'] == 'Y' and pgrqst['task_id']: + einfo['GLOBUS_TASK_URL'] = "https://app.globus.org/activity/" + pgrqst['task_id'] + + for ekey in einfo: + ebuf = re.sub(r'<{}>'.format(ekey), einfo[ekey], ebuf) + + if PgLOG.PGLOG['DSCHECK'] and not pgpart: + tbl = "dscheck" + cnd = "cindex = {}".format(PgLOG.PGLOG['DSCHECK']['cindex']) + else: + tbl = "dsrqst" + cnd = "rindex = {}".format(pgrqst['rindex']) + + if not PgDBI.cache_customized_email(tbl, "einfo", cnd, ebuf, 0): return 'E' + if errmsg: + PgLOG.pglog("Error Email {} cached to {}.einfo for {}:\n{}".format(einfo['SENDER'], tbl, cnd, errmsg), + PGOPT['errlog']) + else: + PgLOG.pglog("{}Email {} cached to {}.einfo for {}\nSubset: {}".format(("Customized " if pgrqst['enotice'] else ""), einfo['RECEIVER'], tbl, cnd, einfo['SUBJECT']), + PGOPT['wrnlog']|PgLOG.FRCLOG) + if readyfile: + rf = open(readyfile, 'w') + rf.write(ebuf) + rf.close() + PgFile.set_local_mode(readyfile, 1, PgLOG.PGLOG['FILEMODE']) + + return rstat + +# +# cache partition process error to existing email buffer +# +def cache_partition_email_error(ridx, errmsg): + + pkey = "" + pgrec = PgDBI.pgget("dsrqst", 'einfo', "rindex = {}".format(ridx), PGOPT['extlog']) + if not (pgrec and pgrec['einfo'] and pgrec['einfo'].find(pkey) > -1): return 0 + + errmsg = PgLOG.get_error_command(int(time.time()), PGOPT['errlog']) + ("{}\n{}".format(errmsg, pkey)) + pgrec['einfo'] = re.sub(pkey, errmsg, pgrec['einfo']) + + return PgDBI.pgupdt("dsrqst", pgrec, "rindex = {}".format(ridx), PGOPT['extlog']) diff --git a/src/rda_python_common/PgPGS.py b/src/rda_python_common/PgPGS.py new file mode 100644 index 0000000..4f92dec --- /dev/null +++ b/src/rda_python_common/PgPGS.py @@ -0,0 +1,152 @@ +# +############################################################################### +# +# Title : PgPGS.py -- PostgreSQL Interface for CDP DataBase Per psql +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/31/2020 +# Purpose : python library module to handle sql scripts to retrieve info +# from cdp database per psql +# +# Work File : $DSSHOME/lib/python/PgPGS.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### +# +import PgLOG +import os +import re + +PGPGS = {} +PGPGS["PGSSERV"] = PgLOG.get_environment("PGSSERV", '-h vetsdbprod -p 5432 -U acadmin access_control'); +PGPGS["SQLPATH"] = PgLOG.get_environment("SQLPATH", PgLOG.PGLOG['DSSHOME']+ "/dssdb/sql"); + +# +# local function: create sql file +# +def pgs_sql_file(tablenames, fields, condition = None): + + sqlfile = "{}/pgs{}.sql".format(PGPGS['SQLPATH'], os.getpid()) + + sqlstr = "SELECT {}\nFROM {}".format(fields, tablename) + if condition: + if re.match(r'^\s*(ORDER|GROUP|HAVING)\s', condition, re.I): + slqstr += "\n{}".format(condition) + else: + sqlstr += "\nWHERE {}".format(condition) + asqlstr += ";\n" + try: + SQL = open(sqlfile, 'w') + SQL.write(sqlstr) + close(SQL) + except Exception as e: + PgLOG.pglog("Error Open '{}': {}".format(sqlfile, str(e)), PgLOG.LGWNEX) + + if PgLOG.PGLOG['DBGLEVEL']: PgLOG.pgdbg(1000, sqlstr) + + return sqlfile + +# +# tablenames: comma deliminated string of table names +# fields: fieldnames for query pgscle database, +# condition: querry conditions for where clause) +# Return: one record from tablename, a hash reference with keys as field names +# and values as field values upon success, FAILURE otherwise +# +def pgsget(tablenames, fields, condition = None, logact = 0): + + sqlfile = pgs_sql_file(tablenames, fields, condition) + sqlout = PgLOG.pgsystem("psql {} < {}".format(PGPGS['PGSSERV'], sqlfile), logact, 273+1024) # 1+16+256 + + colcnt = 0 + record = {} + if sqlout: + for line in re.split(r'\n', sqlout): + vals = re.split(r'\s*\|\s+', line) + if colcnt: # gather data + record = dict(zip(filds, vals)) + break + else: # gather field names + flds = vals + colcnt = len(flds) + elif PgLOG.PGLOG['SYSERR']: # error happens + PgLOG.pglog(PgLOG.PGLOG['SYSERR'], logact|PgLOG.ERRLOG) + + if PgLOG.PGLOG['DBGLEVEL']: + if record: + PgLOG.pgdbg(1000, "pgsget: 1 record retrieved from {}:\n{}".format(tablename, str(record))) + else: + PgLOG.pgdbg(1000, "pgsget: 0 record retrieved from " + tablenames) + + os.remove(sqlfile) + + return record + +# +# tablenames: comma deliminated string of tables +# fields: fieldnames for query pgscle database, +# condition: querry conditions for where clause) +# Return: mutiple records from tablenames, a dict with field names as keys and lists +# of retrieved values. All arrays are same size. FAILURE if not success +# +def pgsmget(tablenames, fields, condition = None, logact = 0): + + sqlfile = pgs_sql_file(tablenames, fields, condition) + sqlout = pgsystem("psql {} < {}".format(PGPGS['PGSSERV'], sqlfile), logact, 273+1024) # 1+16+256 + + rowcnt = colcnt = 0 + records = {} + vals = [] + if sqlout: + for line in re.split(r'\n', sqlout): + row = re.split(r'\s*\|\s+', line) + if colcnt: # gather data + vals.append(row) + rowcnt += 1 + else: # gather field names + flds = row + colcnt = len(flds) + if rowcnt > 0: + records = dict(zip(flds, list(zip(*vals)))) + elif PgLOG.PGLOG['SYSERR']: # error happens + PgLOG.pglog(PgLOG.PGLOG['SYSERR'], logact|PgLOG.ERRLOG) + + if PgLOG.PGLOG['DBGLEVEL']: + PgLOG.pgdbg(1000, "pgsmget: {} record(s) retrieved from {}".format(rowcnt, tablenames)) + + os.remove(sqlfile) # remove sqlfile when successful + + return records + +# +# email: cdp user email address, +# userid: cdp user ID, +# username: cdp user name +# Return: one record from CDP PostGreSQL database; PGLOG.FAILURE otherwise +# +def get_cdp_user(email, userid = 0, username = None, logact = 0): + + if userid: + condition = "id = {}".format(userid) + elif email: + condition = "email = '{}'".format(email) + elif username: + condition = "username = '{}'".format(username) + else: + return PgLOG.FAILURE + + fields = ("id as cdpid, firstname as fstname, middlename as midinit, " + + "lastname as lstname, email, username as cdpname, " + + "organization as org_name, organization_type as org_type, country") + return pgsget('users', fields, condition, logact) + +# +# name: field name +# value: field value +# Return: converted value from upcases to lower case +# +def convert_pgs_case(name, value): + + if name == "username" or name == "email": + return value.lower() + else: + return value # no change diff --git a/src/rda_python_common/PgSIG.py b/src/rda_python_common/PgSIG.py new file mode 100644 index 0000000..f4dc5de --- /dev/null +++ b/src/rda_python_common/PgSIG.py @@ -0,0 +1,1157 @@ +# +############################################################################### +# +# Title : PgSIG.py +# +# Author : Zaihua Ji, zji@ucar.edu +# Date : 08/05/2020 +# Purpose : python library module for start and control daemon process +# +# Work File : $DSSHOME/lib/python/PgSIG.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### +# +import os +import re +import sys +import errno +import signal +import time +from contextlib import contextmanager +import PgLOG +import PgDBI + +VUSERS = [] # allow users to start this daemon +CPIDS = {} # allow upto 'mproc' processes at one time for daemon +CBIDS = {} # allow upto 'bproc' background processes at one time for each child +SDUMP = { + 'DEF' : '/dev/null', + 'ERR' : '', + 'OUT' : '' +} + +PGSIG = { + 'QUIT' : 0, # 1 if QUIT signal received, quit server if no child + 'MPROC' : 1, # default number of multiple processes + 'BPROC' : 1, # default number of multiple background processes + 'ETIME' : 20, # default error waiting time (in seconds) + 'WTIME' : 120, # default waiting time (in seconds) + 'DTIME' : 600, # the daemon record refresh time (in seconds) + 'RTIME' : 2400, # the web rda config unlocking and unconfigured system down waiting time (in seconds) + 'CTIME' : 4800, # the lock cleaning & configued system down waiting time (in seconds) + 'PPID' : -1, # 1 - server, (> 1) - child, 0 - non-daemon mode + 'PID' : 0, # current process ID + 'DNAME' : '', # daemon name + 'DSTR' : '', # string for daemon with user login name + 'MTIME' : 0, # maximum daemon running time in seconds, 0 for unlimited + 'STIME' : 0, # time the daemon is started + 'STRTM' : '', # string format of 'STIME' +} + +# +# add users for starting this daemon +# +def add_vusers(user = None, mores = None): + + global VUSERS + if not user: + VUSERS = [] # clean all vusers + else: + VUSERS.append(user) + + if mores: VUSERS.extend(mores) + +# +# valid user for starting this daemon +# +def check_vuser(user, aname = None): + + if user and VUSERS: + valid = 0; + for vuser in VUSERS: + if user == vuser: + valid = 1; + break + + if valid == 0: + vuser = ', '.join(VUSERS) + PgLOG.pglog("{}: must be '{}' to run '{}' in Daemon mode".format(user, vuser, aname), PgLOG.LGEREX) + +# +# turn this process into a daemon +# +# aname - application name, or daemon name +# uname - user login name to started the application +# mproc - upper limit of muiltiple child processes +# wtime - waiting time (in seconds) for next process for the daemon +# logon - turn on the logging if true +# bproc - multiple background processes if > 1 +# mtime - maximum running time for the daemon if provided +# +def start_daemon(aname, uname, mproc = 1, wtime = 120, logon = 0, bproc = 1, mtime = 0): + + dstr = "Daemon '{}'{} on {}".format(aname, (" By {}".format(uname) if uname else ''), PgLOG.PGLOG['HOSTNAME']) + + pid = check_daemon(aname, uname) + if pid: + PgLOG.pglog("***************** WARNNING **************************\n" + + "** {} is running as PID={}\n".format(dstr, pid) + + "** You need stop it before starting a new one!\n" + + "*****************************************************" , PgLOG.WARNLG) + PgLOG.pglog("{} is already running as PID={}".format(dstr, pid), PgLOG.FRCLOG|PgLOG.MSGLOG) + sys.exit(0) + + if mproc > 1: PGSIG['MPROC'] = mproc + if bproc > 1: PGSIG['BPROC'] = bproc + PGSIG['WTIME'] = get_wait_time(wtime, 120, "Polling Wait Time") + PGSIG['MTIME'] = get_wait_time(mtime, 0, "Maximum Running Time") + + pid = process_fork(dstr) + cpid = pid if pid > 0 else os.getpid() + msg = "PID={},PL={},WI={}".format(cpid, PGSIG['MPROC'], PGSIG['WTIME']) + if PGSIG['MTIME']: msg += ",MT={}".format(PGSIG['MTIME']) + logmsg = "{}({}) started".format(dstr, msg) + if logon: logmsg += " With Logging On" + if pid > 0: + PgLOG.pglog(logmsg, PgLOG.WARNLG) + sys.exit(0) + + os.setsid() + os.umask(0) + + # setup to catch signals in daemon only + signal.signal(signal.SIGCHLD, clean_dead_child) + signal.signal(signal.SIGQUIT, signal_catch) + signal.signal(signal.SIGUSR1, signal_catch) + signal.signal(signal.SIGUSR2, signal_catch) + PGSIG['DSTR'] = dstr + PGSIG['DNAME'] = aname + PGSIG['STIME'] = int(time.time()) + PGSIG['STRTM'] = PgLOG.current_datetime(PGSIG['STIME']) + PGSIG['PPID'] = 1 + PGSIG['PID'] = cpid + + sys.stdin = open(SDUMP['DEF']) + PgLOG.cmdlog("{} By {}".format(logmsg, PGSIG['STRTM'])) + + if logon: + PgLOG.PGLOG['LOGMASK'] &= ~(PgLOG.WARNLG|PgLOG.EMLLOG) # turn off warn/email in daemon + set_dump() + else: + PgLOG.PGLOG['LOGMASK'] &= ~(PgLOG.LGWNEM) # turn off log/warn/email in daemon + set_dump(SDUMP['DEF']) + + PgLOG.PGLOG['BCKGRND'] = 1 # make sure the background flag is always on + PgDBI.pgdisconnect(1) # disconnect database in daemon + +# +# set dump output file +# +def set_dump(default = None): + + errdump = PgLOG.get_environment("ERRDUMP", default) + outdump = PgLOG.get_environment("OUTDUMP", default) + + if not errdump: + if not PgLOG.PGLOG['ERRFILE']: + PgLOG.PGLOG['ERRFILE'] = re.sub(r'\.log$', '.err', PgLOG.PGLOG['LOGFILE'], 1) + errdump = "{}/{}".format(PgLOG.PGLOG['LOGPATH'], PgLOG.PGLOG['ERRFILE']) + + if errdump != SDUMP['ERR']: + sys.stderr = open(errdump, 'a') + SDUMP['ERR'] = errdump + + if not outdump: outdump = "{}/{}".format(PgLOG.PGLOG['LOGPATH'], PgLOG.PGLOG['LOGFILE']) + if outdump != SDUMP['OUT']: + sys.stdout = open(outdump, 'a') + SDUMP['OUT'] = outdump + +# +# stop daemon and log the ending info +# +def stop_daemon(msg): + + msg = " with " + msg if msg else '' + PgLOG.PGLOG['LOGMASK'] |= PgLOG.MSGLOG # turn on logging before daemon stops + PgLOG.pglog("{} Started at {}, Stopped gracefully{} by {}".format(PGSIG['DSTR'], PGSIG['STRTM'], msg, PgLOG.current_datetime()), PgLOG.LOGWRN) + +# +# check if a daemon is running already +# +# aname - application name for the daemon +# uname - user login name who started the daemon +# +# return the process id if yes and 0 if not +# +def check_daemon(aname, uname = None): + + if uname: + check_vuser(uname, aname) + pcmd = "ps -u {} -f | grep {} | grep ' 1 '".format(uname, aname) + mp = "^\s*{}\s+(\d+)\s+1\s+".format(uname) + else: + pcmd = "ps -C {} -f | grep ' 1 '".format(aname) + mp = "^\s*\w+\s+(\d+)\s+1\s+" + + buf = PgLOG.pgsystem(pcmd, PgLOG.LOGWRN, 20+1024) + if buf: + cpid = os.getpid() + lines = buf.split('\n') + for line in lines: + ms = re.match(mp, line) + pid = int(ms.group(1)) if ms else 0 + if pid > 0 and pid != cpid: return pid + + return 0 + +# +# check if an application is running already; other than the current processs +# +# aname - application name +# uname - user login name who started the application +# argv - argument string +# +# return the process id if yes and 0 if not +# +def check_application(aname, uname = None, sargv = None): + + if uname: + check_vuser(uname, aname) + pcmd = "ps -u {} -f | grep {} | grep -v ' grep '".format(uname, aname) + mp = "^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname) + else: + pcmd = "ps -C {} -f".format(aname) + mp = "^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname) + + buf = PgLOG.pgsystem(pcmd, PgLOG.LOGWRN, 20+1024) + if not buf: return 0 + + cpids = [os.getpid(), os.getppid()] + pids = [] + ppids = [] + astrs = [] + lines = buf.split('\n') + for line in lines: + ms = re.match(mp, line) + if not ms: continue + pid = int(ms.group(1)) + ppid = int(ms.group(2)) + if pid in cpids: + if ppid not in cpids: cpids.append(ppid) + continue + pids.append(pid) + ppids.append(ppid) + if sargv: astrs.append(ms.group(3)) + + pcnt = len(pids) + if not pcnt: return 0 + + i = 0 + while i < pcnt: + pid = pids[i] + if pid and pid in cpids: + pids[i] = 0 + ppid = ppids[i] + if ppid not in cpids: cpids.append(ppid) + i = 0 + else: + i += 1 + + for i in range(pcnt): + pid = pids[i] + if pid and (not sargv or sargv.find(astrs[i]) > -1): return pid + + return 0 + +# +# validate if the current process is a single one. Quit if not +# +def validate_single_process(aname, uname = None, sargv = None, logact = PgLOG.LOGWRN): + + pid = check_application(aname, uname, sargv) + if pid: + msg = aname + if sargv: msg += ' ' + sargv + msg += ": already running as PID={} on {}".format(pid, PgLOG.PGLOG['HOSTNAME']) + if uname: msg += ' By ' + uname + PgLOG.pglog(msg + ', Quit Now', logact) + sys.exit(0) + +# +# check how many processes are running for an application already +# +# aname - application name +# uname - user login name who started the application +# argv - argument string +# +# return the the number of processes (exclude the child one) +# +def check_multiple_application(aname, uname = None, sargv = None): + + if uname: + check_vuser(uname, aname) + pcmd = "ps -u {} -f | grep {} | grep -v ' grep '".format(uname, aname) + mp = "^\s*{}\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(uname, aname) + else: + pcmd = "ps -C {} -f".format(aname) + mp = "^\s*\w+\s+(\d+)\s+(\d+)\s+.*{}\S*\s+(.*)$".format(aname) + + buf = PgLOG.pgsystem(pcmd, PgLOG.LOGWRN, 20+1024) + if not buf: return 0 + + dpids = [os.getpid(), os.getppid()] + pids = [] + ppids = [] + astrs = [] + lines = buf.split('\n') + for line in lines: + ms = re.match(mp, line) + if not ms: continue + pid = int(ms.group(1)) + ppid = int(ms.group(2)) + if pid in dpids: + if ppid > 1 and ppid not in dpids: dpids.append(ppid) + continue + elif ppid in pids: + if pid not in dpids: dpids.append(pid) + continue + pids.append(pid) + ppids.append(ppid) + if sargv: astrs.append(ms.group(3)) + + pcnt = len(pids) + if not pcnt: return 0 + + i = 0 + while i < pcnt: + pid = pids[i] + ppid = ppids[i] + if pid: + if pid in dpids: + if ppid > 1 and ppid not in dpids: dpids.append(ppid) + i = pids[i] = 0 + continue + elif ppid in pids: + if pid not in dpids: dpids.append(pid) + i = pids[i] = 0 + continue + i += 1 + + ccnt = 0 + for i in range(pcnt): + if pids[i] and (not sargv or sargv.find(astrs[i]) > -1): ccnt += 1 + + return ccnt + +# +# validate if the running processes reach the limit for the given app; Quit if yes +# +def validate_multiple_process(aname, plimit, uname = None, sargv = None, logact = PgLOG.LOGWRN): + + pcnt = check_multiple_application(aname, uname, sargv) + if pcnt >= plimit: + msg = aname + if sargv: msg += ' ' + sargv + msg += ": already running in {} processes on {}".format(pid, PgLOG.PGLOG['HOSTNAME']) + if uname: msg += ' By ' + uname + PgLOG.pglog(msg + ', Quit Now', logact) + sys.exit(0) + +# +# fork process +# +# return the defined result from call of fork +# +def process_fork(dstr): + + for i in range(10): # try 10 times + try: + pid = os.fork() + return pid + except OSError as e: + if e.errno == errno.EAGAIN: + os.sleep(5) + else: + PgLOG.pglog("{}: {}".format(dstr, str(e)), PgLOG.LGEREX) + break + + PgLOG.pglog("{}: too many tries (10) for os.fork()".format(dstr), PgLOG.LGEREX) + +# +# process the predefined signals +# +def signal_catch(signum, frame): + + if PGSIG['PPID'] == 1: + tmp = 'Server' + elif PGSIG['PPID'] > 1: + tmp = 'Child' + else: + tmp = 'Process' + + if signum == signal.SIGQUIT: + sname = "<{} - signal.SIGQUIT - Quit>".format(signum) + elif signum == signal.SIGUSR1: + linfo = 'Logging On' + if PgLOG.PGLOG['LOGMASK']&PgLOG.MSGLOG: linfo += ' & Debugging On' + sname = "<{} - signal.SIGUSR1 - {}>".format(signum, linfo) + elif signum == signal.SIGUSR2: + if PgLOG.PGLOG['DBGLEVEL']: + linfo = 'Logging off & Debugging Off' + else: + linfo = 'Logging Off' + sname = "<{} - signal.SIGUSR2 - {}>".format(signum, linfo) + else: + sname = "<{} - Signal Not Supports Yet>".format(signum) + + dumpon = 1 if SDUMP['OUT'] and SDUMP['OUT'] != SDUMP['DEF'] else 0 + if not dumpon: set_dump() + PgLOG.pglog("catches {} in {} {}".format(sname, tmp, PGSIG['DSTR']), PgLOG.LOGWRN|PgLOG.FRCLOG) + + if signum == signal.SIGUSR1: + if PgLOG.PGLOG['LOGMASK']&PgLOG.MSGLOG: + PgLOG.PGLOG['DBGLEVEL'] = 1000 # turn logon twice + else: + PgLOG.PGLOG['LOGMASK'] |= PgLOG.MSGLOG # turn on logging + elif signum == signal.SIGUSR2: + PgLOG.PGLOG['LOGMASK'] &= ~(PgLOG.MSGLOG) # turn off logging + PgLOG.PGLOG['DBGLEVEL'] = 0 # turn off debugging + set_dump(SDUMP['DEF']) + else: + if not dumpon: set_dump(SDUMP['DEF']) + if signum == signal.SIGQUIT: PGSIG['QUIT'] = 1 + + if PGSIG['PPID'] <= 1 and len(CPIDS) > 0: # passing signal to child processes + for pid in CPIDS: kill_process(pid, signum) + +# +# wrapper function to call os.kill() logging caught error based on logact +# return PgLOG.SUCCESS is success; PgLog.FAILURE if not +# +def kill_process(pid, signum, logact = 0): + + try: + os.kill(pid, signum) + except Exception as e: + ret = PgLOG.FAILURE + if logact: + if type(signum) is int: + sigstr = str(signum) + else: + sigstr = "{}-{}".format(signum.name, int(signum)) + PgLOG.pglog("Error pass signal {} to pid {}: {}".format(sigstr, pid, str(e)), logact) + else: + ret = PgLOG.SUCCESS + + return ret + +# +# wait child process to finish +# +def clean_dead_child(signum, frame): + + live = 0 + + while True: + try: + dpid, status = os.waitpid(-1, os.WNOHANG) + except ChildProcessError as e: + break # no child process any more + except Exception as e: + PgLOG.PGLOG("Error check child process: {}".format(str(e)), PgLOG.ERRLOG) + break + else: + if dpid == 0: + if live > 0: break # wait twice if a process is still a live + live += 1 + elif PGSIG['PPID'] < 2: + if dpid in CPIDS: del CPIDS[dpid] + +# +# send signal to daemon and exit +# +def signal_daemon(sname, aname, uname): + + dstr = "Daemon '{}'{} on {}".format(aname, ((" By " + uname) if uname else ""), PgLOG.PGLOG['HOSTNAME']) + pid = check_daemon(aname, uname) + + if pid > 0: + dstr += " (PID = {})".format(pid) + if re.match(r'^(quit|stop)$', sname, re.I): + signum = signal.SIGQUIT + msg = "QUIT" + elif re.match(r'^(logon|on)$', sname, re.I): + signum = signal.SIGUSR1 + msg = "Logging ON" + elif re.match(r'^(logoff|off)$', sname, re.I): + signum = signal.SIGUSR2 + msg = "Logging OFF" + PgLOG.PGLOG['DBGLEVEL'] = 0 + else: + PgLOG.pglog("{}: invalid Signal for {}".format(sname, dstr), PgLOG.LGEREX) + + if kill_process(pid, signum, PgLOG.LOGERR) == PgLOG.SUCCESS: + PgLOG.pglog("{}: signal sent to {}".format(msg, dstr), PgLOG.LOGWRN|PgLOG.FRCLOG) + else: + PgLOG.pglog(dstr + ": not running currently", PgLOG.LOGWRN|PgLOG.FRCLOG) + + sys.exit(0) + +# +# start a time child to run the command in case hanging +# +def timeout_command(cmd, logact = PgLOG.LOGWRN, cmdopt = 4): + + if logact&PgLOG.EXITLG: logact &= ~PgLOG.EXITLG + + PgLOG.pglog("> " + cmd, logact) + if start_timeout_child(cmd, logact): + PgLOG.pgsystem(cmd, logact, cmdopt) + sys.exit(0) + +# +# start a timeout child process +# +# return: 1 - in child, 0 - in parent +# +def start_timeout_child(msg, logact = PgLOG.LOGWRN): + + pid = process_fork(msg) + + if pid == 0: # in child + signal.signal(signal.SIGQUIT, signal_catch) # catch quit signal only + PGSIG['PPID'] = PGSIG['PID'] + PGSIG['PID'] = pid = os.getpid() + PgLOG.cmdlog("Timeout child to " + msg, time.time(), 0) + PgDBI.pgdisconnect(0) # disconnect database in child + return 1 + + # in parent + for i in range(PgLOG.PGLOG['TIMEOUT']): + if not check_process(pid): break + sys.sleep(2) + + if check_process(pid): + msg += ": timeout({} secs) in CPID {}".format(2*PgLOG.PGLOG['TIMEOUT'], pid) + pids = kill_children(pid, 0) + sys.sleep(6) + if kill_process(pid, signal.SIGKILL, PgLOG.LOGERR): pids.insert(0, pid) + + if pids: msg += "\nProcess({}) Killed".format(','.join(map(str, pids))) + PgLOG.pglog(msg, logact) + + return 0 + +# +# kill children recursively start from the deepest and return the pids got killed +# +def kill_children(pid, logact = PgLOG.LOGWRN): + + buf = PgLOG.pgsystem("ps --ppid {} -o pid".format(pid), logact, 20) + pids = [] + if buf: + lines = buf.split('\n') + for line in lines: + ms = re.match(r'^\s*(\d+)', line) + if not ms: continue + cid = int(ms.group(1)) + if not check_process(cid): continue + cids = kill_children(cid, logact) + if cids: pids = cids + pids + if kill_process(cid, signal.SIGKILL, logact) == PgLOG.SUCCESS: pids.insert(0, cid) + + if logact and len(pids): PgLOG.pglog("Process({}) Killed".format(','.join(map(str, pids))), logact) + + return pids + +# +# start a child process +# pname - unique process name +# +def start_child(pname, logact = PgLOG.LOGWRN, dowait = 0): + + global CBIDS + if PGSIG['MPROC'] < 2: return 1 # no need child process + + if logact&PgLOG.EXITLG: logact &= ~PgLOG.EXITLG + if logact&PgLOG.MSGLOG: logact |= PgLOG.FRCLOG + + if PGSIG['QUIT']: + return PgLOG.pglog("{} is in QUIT mode, cannot start CPID for {}".format(PGSIG['DSTR'], pname), logact) + elif len(CPIDS) >= PGSIG['MPROC']: + i = 0 + while True: + pcnt = check_child(None, 0, logact) + if pcnt < PGSIG['MPROC']: break + if dowait: + show_wait_message(i, "{}-{}: wait any {} child processes".format(PGSIG['DSTR'], pname, pcnt), logact, dowait) + i += 1 + else: + return PgLOG.pglog("{}-{}: {} child processes already running at {}".format(PGSIG['DSTR'], pname, pcnt, PgLOG.current_datetime()), logact) + + if check_child(pname): return -1 # process is running already + + pid = process_fork(PGSIG['DSTR']) + if pid: + CPIDS[pid] = pname # record the child process id + PgLOG.pglog("{}: starts CPID {} for {}".format(PGSIG['DSTR'], pid, pname)) + else: + signal.signal(signal.SIGQUIT, signal.SIG_DFL) # turn off catch QUIT signal in child + PgLOG.PGLOG['LOGMASK'] &= ~PgLOG.WARNLG # turn off warn in child + PGSIG['PPID'] = PGSIG['PID'] + PGSIG['PID'] = pid = os.getpid() + PGSIG['MPROC'] = 1 # 1 in child process + CBIDS = {} # empty backgroud proces info in case not + PGSIG['DSTR'] += ": CPID {} for {}".format(pid, pname) + PgLOG.cmdlog("CPID {} for {}".format(pid, pname)) + PgDBI.pgdisconnect(0) # disconnect database in child + + return 1 # child started successfully + +# +# get child process id for given pname +# +def pname2cpid(pname): + + for cpid in CPIDS: + if CPIDS[cpid] == pname: return cpid + + return 0 + +# +# check one or all child processes if they are still running +# pname - unique process name if given +# pid - check this specified process id if given +# dowait - 0 no wait, 1 wait all done, -1 wait only when all children are running +# return the number of running processes if dowait == 0 or 1 +# return the number of none-running processes if dowait == -1 +# +def check_child(pname, pid = 0, logact = PgLOG.LOGWRN, dowait = 0): + + if PGSIG['MPROC'] < 2: return 0 # no child process + + if logact&PgLOG.EXITLG: logact &= ~PgLOG.EXITLG + ccnt = i = 0 + if dowait < 0: ccnt = 1 if (pid or pname) else PGSIG['MPROC'] + while True: + pcnt = 0 + if not pid and pname: pid = pname2cpid(pname) + if pid: + if check_process(pid): # process is not done yet + if pname: + PgLOG.pglog("{}({}): Child still running".format(pname, pid), logact) + else: + PgLOG.pglog("{}: Child still running".format(pid), logact) + pcnt = 1 + elif pid in CPIDS: + del CPIDS[pid] # clean the saved info for the process + elif not pname: + cpids = list(CPIDS) + for cpid in cpids: + if check_process(cpid): # process is not done yet + pcnt += 1 + elif cpid in CPIDS: + del CPIDS[cpid] + + if pcnt == 0 or dowait == 0 or pcnt < ccnt: break + show_wait_message(i, "{}: wait {}/{} child processes".format(PGSIG['DSTR'], pcnt, PGSIG['MPROC']), logact, dowait) + i += 1 + + return (ccnt - pcnt) if ccnt else pcnt + +# +# start this process in none daemon mode +# +# aname - application name, or daemon name +# cact - short action name +# uname - user login name to started the application +# mproc - upper limit of muiltiple child processes +# wtime - waiting time (in seconds) for next process +# +def start_none_daemon(aname, cact = None, uname = None, mproc = 1, wtime = 120, logon = 1, bproc = 1): + + dstr = aname + if cact: dstr += " for Action " + cact + if uname: + dstr += " By " + uname + check_vuser(uname, aname) + + signal.signal(signal.SIGQUIT, signal_catch) # catch quit signal only + signal.signal(signal.SIGCHLD, clean_dead_child) + PGSIG['DSTR'] = dstr + PGSIG['DNAME'] = aname + PGSIG['PPID'] = 0 + PGSIG['PID'] = os.getpid() + PGSIG['MPROC'] = mproc + PGSIG['BPROC'] = bproc + PgLOG.PGLOG['CMDTIME'] = PGSIG['WTIME'] = get_wait_time(wtime, 120, "Polling Wait Time") + if PGSIG['MPROC'] > 1: + PgLOG.cmdlog("starts non-daemon {}(ML={},WI={})".format(aname, PGSIG['MPROC'], PGSIG['WTIME'])) + if not logon: PgLOG.PGLOG['LOGMASK'] &= ~PgLOG.MSGLOG # turn off message logging + +# +# check one process id other than the current one if it is still running +# pid - specified process id +# pmsg - process message if given +# +def check_process(pid): + + buf = PgLOG.pgsystem("ps -p {} -o pid".format(pid), PgLOG.LGWNEX, 20) + if buf: + mp = r'^\s*{}$'.format(pid) + lines = buf.split('\n') + for line in lines: + if re.match(mp, line): return 1 + + return 0 + +# +# check a process id on give host +# +def check_host_pid(host, pid, pmsg = None, logact = PgLOG.LOGWRN): + + cmd = 'rdaps' + if host: cmd += " -h " + host + cmd += " -p {}".format(pid) + buf = PgLOG.pgsystem(cmd, logact, 276) # 4+16+256 + if not buf: return (-1 if PgLOG.PGLOG['SYSERR'] else 0) + if pmsg: PgLOG.pglog(pmsg, logact&(~PgLOG.EXITLG)) + return 1 + +# +# check one process id on a given host name if it is still running, with default timeout +# pid - specified process id +# ppid - specified parent process id +# uname - user login name who started the daemon +# host - host name the pid supposed to be running on +# aname - application name +# pmsg - process message if given +# +# return 1 if process is steal live, 0 died already, -1 error checking +# +def check_host_process(host, pid, ppid = 0, uname = None, aname = None, pmsg = None, logact = PgLOG.LOGWRN): + + cmd = "rdaps" + if host: cmd += " -h " + host + if pid: cmd += " -p {}".format(pid) + if ppid: cmd += " -P {}".format(ppid) + if uname: cmd += " -u " + uname + if aname: cmd += " -a " + aname + buf = PgLOG.pgsystem(cmd, logact, 276) # 4+16+256 + if not buf: return (-1 if PgLOG.PGLOG['SYSERR'] else 0) + if pmsg: PgLOG.pglog(pmsg, logact&(~PgLOG.EXITLG)) + return 1 + +# +# get a single slurm status record +# +def get_slurm_info(bcmd, logact = PgLOG.LOGWRN): + + stat = {} + buf = PgLOG.pgsystem(bcmd, logact, 16) + if not buf: return stat + + chkt = 1 + lines = buf.split('\n') + for line in lines: + if chkt: + if re.match(r'^\s*JOBID\s', line, re.I): + ckeys = re.split(r'\s+', PgLOG.pgtrim(line)) + kcnt = len(ckeys) + chkt = 0 + else: + if re.match(r'^-----', line): continue + vals = re.split(r'\s+', PgLOG.pgtrim(line)) + vcnt = len(vals) + if vcnt >= kcnt: + for i in range(kcnt): + ckeys[i] = ckeys[i].upper() + stat[ckeys[i]] = vals[i] + + if vcnt > kcnt: + for i in range(kcnt, vcnt): + stat[ckeys[kcnt-1]] += ' ' + str(vals[i]) + break + + return stat + +# +# get a single pbs status record via qstat +# +def get_pbs_info(qopts, multiple = 0, logact = 0, chkcnt = 1): + + stat = {} + loop = 0 + buf = None + while loop < chkcnt: + buf = PgLOG.pgsystem("qstat -n -w {}".format(qopts), logact, 16) + if buf: break + loop += 1 + time.sleep(6) + + if not buf: return stat + + chkt = chkd = 1 + lines = buf.split('\n') + for line in lines: + if chkt: + if re.match(r'^Job ID', line): + line = re.sub(r'^Job ID', 'JobID', line, 1) + ckeys = re.split(r'\s+', PgLOG.pgtrim(line)) + ckeys[1] = 'UserName' + ckeys[3] = 'JobName' + ckeys[7] = 'Reqd' + ckeys[7] + ckeys[8] = 'Reqd' + ckeys[7] + ckeys[9] = 'State' + ckeys[10] = 'Elap' + ckeys[7] + ckeys.append('Node') + kcnt = len(ckeys) + if multiple: + for i in range(kcnt): + stat[ckeys[i]] = [] + chkt = 0 + elif chkd: + if re.match(r'^-----', line): chkd = 0 + else: + vals = re.split(r'\s+', PgLOG.pgtrim(line)) + if len(vals) == kcnt: + ms = re.match(r'^(\d+)', vals[0]) + if ms: vals[0] = ms.group(1) + if multiple: + for i in range(kcnt): + stat[ckeys[i]].append(vals[i]) + else: + for i in range(kcnt): + stat[ckeys[i]] = vals[i] + break + + return stat + +# +# get multiple slurn status record +# +def get_slurm_multiple(bcmd, logact = PgLOG.LOGWRN): + + buf = PgLOG.pgsystem(bcmd, logact, 16) + if not buf: return 0 + + stat = {} + j = 0 + chkt = chkd = 1 + lines = buf.split('\n') + for line in lines: + if chkt: + if re.match(r'^\s*JOBID\s', line, re.I): + ckeys = re.split(r'\s+', PgLOG.pgtrim(line)) + kcnt = len(ckeys) + for i in range(kcnt): + ckeys[i] = ckeys[i].upper() + stat[ckeys[i]] = [] + chkt = 0 + elif chkd: + if re.match(r'^-----', line): chkd = 0 + else: + vals = re.split(r'\s+', PgLOG.pgtrim(line)) + vcnt = len(vals) + if vcnt >= kcnt: + for i in range(kcnt): + stat[ckeys[i]].append(vals[i]) + + if vcnt > kcnt: + for i in range(kcnt, vcnt): + stat[ckeys[kcnt-1]][j] += ' ' + str(vals[i]) + j += 1 + + return stat if j else 0 + +# +# check status of a slurm batch id +# bid - specified batch id +# +# return hash of batch status, 0 if cannot check any more +# +def check_slurm_status(bid, logact = PgLOG.LOGWRN): + + return get_slurm_info("sacct -o jobid,user,totalcpu,elapsed,ncpus,state,jobname,nodelist -j {}".format(bid), logact) + +# +# check status of a pbs batch id +# bid - specified batch id +# +# return hash of batch status, 0 if cannot check any more +# +def check_pbs_status(bid, logact = PgLOG.LOGWRN): + + stat = {} + buf = PgLOG.pgsystem("qhist -w -j {}".format(bid), logact, 20) + if not buf: return stat + + chkt = 1 + lines = buf.split('\n') + for line in lines: + if chkt: + if re.match(r'^Job', line): + line = re.sub(r'^Job ID', 'JobID', line, 1) + line = re.sub(r'Finish Time', 'FinishTime', line, 1) + line = re.sub(r'Req Mem', 'ReqMem', line, 1) + line = re.sub(r'Used Mem\(GB\)', 'UsedMem(GB)', line, 1) + line = re.sub(r'Avg CPU \(%\)', 'AvgCPU(%)', line, 1) + line = re.sub(r'Elapsed \(h\)', 'WallTime(h)', line, 1) + line = re.sub(r'Job Name', 'JobName', line, 1) + ckeys = re.split(r'\s+', PgLOG.pgtrim(line)) + ckeys[1] = 'UserName' + kcnt = len(ckeys) + chkt = 0 + else: + vals = re.split(r'\s+', PgLOG.pgtrim(line)) + for i in range(kcnt): + stat[ckeys[i]] = vals[i] + break + + return stat + +# +# check if a slurm batch id is live +# bid - specified batch id +# +# return 1 if process is steal live, 0 died already or error checking +# +def check_slurm_process(bid, pmsg = None, logact = PgLOG.LOGWRN): + + stat = get_slurm_info("squeue -l -j {}".format(bid), logact) + + if stat: + ms = re.match(r'^(RUNNING|PENDING|SUSPENDE|COMPLETI|CONFIGUR|REQUEUE_)$', stat['STATE']) + if ms: + if pmsg: PgLOG.pglog("{}, STATE={}".format(pmsg, ms.group(1)), logact&~PgLOG.EXITLG) + return 1 + else: + return 0 + + return -1 + +# +# check if a pbs batch id is live +# bid - specified batch id +# +# return 1 if process is steal live, 0 died already or error checking +# +def check_pbs_process(bid, pmsg = None, logact = PgLOG.LOGWRN): + + stat = get_pbs_info(bid, 0, logact) + + ret = -1 + if stat: + ms = re.match(r'^(B|R|Q|S|H|W|X)$', stat['State']) + if ms: + if pmsg: pmsg += ", STATE='{}' and returns 1".format(ms.group(1)) + ret = 1 + else: + if pmsg: pmsg += ", STATE='{}' and returns 0".format(stat['State']) + ret = 0 + elif pmsg: + pmsg += ", Process Not Exists and returns -1" + + if pmsg: PgLOG.pglog(pmsg, logact&~PgLOG.EXITLG) + + return ret + +# +# get wait time +# +def get_wait_time(wtime, default, tmsg): + + if not wtime: wtime = default # use default time + + if type(wtime) is int: return wtime + if re.match(r'^(\d*)$', wtime): return int(wtime) + + ms = re.match(r'^(\d*)([DHMS])$', wtime, re.I) + if ms: + ret = int(ms.group(1)) + unit = ms.group(2) + else: + PgLOG.pglog("{}: '{}' NOT in (D,H,M,S)".format(wtime, tmsg), PgLOG.LGEREX) + + if unit != 'S': + ret *= 60 # seconds in a minute + if unit != 'M': + ret *= 60 # minutes in an hour + if unit != 'H': + ret *= 24 # hours in a day + + return ret # in seconds + +# +# start a background process and record its id; check PgLOG.pgsystem() in PgLOG.pm for +# valid cmdopt values +# +def start_background(cmd, logact = PgLOG.LOGWRN, cmdopt = 5, dowait = 0): + + if PGSIG['BPROC'] < 2: return PgLOG.pgsystem(cmd, logact, cmdopt) # no background + + act = logact&(~PgLOG.EXITLG) + if act&PgLOG.MSGLOG: act |= PgLOG.FRCLOG # make sure background calls always logged + + if len(CBIDS) >= PGSIG['BPROC']: + i = 0 + while True: + bcnt = check_background(None, 0, act) + if bcnt < PGSIG['BPROC']: break + if dowait: + show_wait_message(i, "{}-{}: wait any {} background calls".format(PGSIG['DSTR'], cmd, bcnt), act, dowait) + i += 1 + else: + return PgLOG.pglog("{}-{}: {} background calls already at {}".format(PGSIG['DSTR'], cmd, bcnt, PgLOG.current_datetime()), act) + + cmdlog = (act if cmdopt&1 else PgLOG.WARNLG) + if cmdopt&8: + PgLOG.cmdlog("starts '{}'".format(cmd), None, cmdlog) + else: + PgLOG.pglog("{}({})-{} >{} &".format(PgLOG.PGLOG['HOSTNAME'], os.getpid(), PgLOG.current_datetime(), cmd), cmdlog) + bckcmd = cmd + if cmdopt&2: + bckcmd += " >> {}/{}".format(PgLOG.PGLOG['LOGPATH'], PgLOG.PGLOG['LOGFILE']) + + if cmdopt&4: + if not PgLOG.PGLOG['ERRFILE']: + PgLOG.PGLOG['ERRFILE'] = re.sub(r'\.log$', '.err', PgLOG.PGLOG['LOGFILE'], 1) + bckcmd += " 2>> {}/{}".format(PgLOG.PGLOG['LOGPATH'], PgLOG.PGLOG['ERRFILE']) + + bckcmd += " &" + os.system(bckcmd) + return record_background(cmd, logact) + +# +# get background process id for given bcmd +# +def bcmd2cbid(bcmd): + + for cbid in CBIDS: + if CBIDS[cbid] == bcmd: return cbid + + return 0 + +# +# check one or all child processes if they are still running +# bid - check this specified background process id if given +# return the number of processes are still running +# +def check_background(bcmd, bid = 0, logact = PgLOG.LOGWRN, dowait = 0): + + if PGSIG['BPROC'] < 2: return 0 # no background process + + if logact&PgLOG.EXITLG: logact &= ~PgLOG.EXITLG + if not bid and bcmd: bid = bcmd2cbid(bcmd) + i = 0 + while True: + bcnt = 0; + if bid: + if check_process(bid): # process is not done yet + if bcmd: + PgLOG.pglog("{}({}): Background process still running".format(bcmd, bid), logact) + else: + PgLOG.pglog("{}: Background process still running".format(bid), logact) + bcnt = 1 + elif bid in CBIDS: + del CBIDS[bid] # clean the saved info for the process + elif not bcmd: + for bid in CBIDS: + if check_process(bid): # process is not done yet + bcnt += 1 + else: + del CBIDS[bid] + + if not (bcnt and dowait): break + show_wait_message(i, "{}: wait {}/{} background processes".format(PGSIG['DSTR'], pcnt, PGSIG['MPROC']), logact, dowait) + i += 1 + + return bcnt + +# +# check and record process id for background command; return 1 if success full; +# 0 otherwise; -1 if done already +# +def record_background(bcmd, logact = PgLOG.LOGWRN): + + ms = re.match(r'^(\S+)', bcmd) + if ms: + aname = ms.group(1) + else: + aname = bcmd + + mp = r"^\s*(\S+)\s+(\d+)\s+1\s+.*{}(.*)$".format(aname) + pc = "ps -u {},{} -f | grep ' 1 ' | grep {}".format(PgLOG.PGLOG['CURUID'], PgLOG.PGLOG['RDAUSER'], aname) + for i in range(2): + buf = PgLOG.pgsystem(pc, logact, 20+1024) + if buf: + lines = buf.split('\n') + for line in lines: + ms = re.match(mp, line) + if not ms: continue + (uid, sbid, acmd) = ms.groups() + bid = int(sbid) + if bid in CBIDS: return -1 + if uid == PgLOG.PGLOG['RDAUSER']: + acmd = re.sub(r'^\.(pl|py)\s+', '', acmd, 1) + if re.match(r'^{}{}'.format(aname, acmd), bcmd): continue + CBIDS[bid] = bcmd + return 1 + time.sleep(2) + + return 0 + +# +# sleep for given period for the daemon, stops if maximum running time reached +# +def sleep_daemon(wtime = 0, mtime = None): + + if not wtime: wtime = PGSIG['WTIME'] + if mtime is None: mtime = PGSIG['MTIME'] + + if mtime > 0: + rtime = int(time.time()) - PGSIG['STIME'] + if rtime >= mtime: + PGSIG['QUIT'] = 1 + wtime = 0 + + if wtime: time.sleep(wtime) + return wtime + +# +# show wait message every dintv and then sleep for PGSIG['WTIME'] +# +def show_wait_message(loop, msg, logact = PgLOG.LOGWRN, dowait = 0): + + if loop > 0 and (loop%30) == 0: + PgLOG.pglog("{} at {}".format(msg, PgLOG.current_datetime()), logact) + + if dowait: time.sleep(PGSIG['WTIME']) + +# +# register a time out function to raise a time out error +# +@contextmanager +def pgtimeout(seconds = 0, logact = 0): + + if not seconds: seconds = PGLOG['TIMEOUT'] + signal.signal(signal.SIGALRM, raise_pgtimeout) + signal.alarm(seconds) + try: + yield + except TimeoutError as e: + pass + finally: + signal.signal(signal.SIGALRM, signal.SIG_IGN) + +def raise_pgtimeout(signum, frame): + raise TimeoutError + +def timeout_func(): + # Add a timeout block. + with pgtimeout(1): + print('entering block') + import time + time.sleep(10) + print('This should never get printed because the line before timed out') diff --git a/src/rda_python_common/PgSplit.py b/src/rda_python_common/PgSplit.py new file mode 100644 index 0000000..6a517d2 --- /dev/null +++ b/src/rda_python_common/PgSplit.py @@ -0,0 +1,298 @@ +# +############################################################################### +# +# Title : PgSplit.py -- PostgreSQL DataBase Interface foe table wfile +# Author : Zaihua Ji, zji@ucar.edu +# Date : 09/010/2024 +# Purpose : Python library module to handle query and manipulate table wfile +# +# Work File : $DSSHOME/lib/python/PgSplit.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### + +import os +import re +from os import path as op +import PgLOG +import PgDBI +import PgUtil + +# +# compare wfile records between tables wfile and wfile_dNNNNNN, +# and return the records need to be added, modified and deleted +# +def compare_wfile(wfrecs, dsrecs): + + flds = dsrecs.keys() + flen = len(flds) + arecs = dict(zip(flds, [[]]*flen)) + mrecs = {} + drecs = [] + wfcnt = len(wfrecs['wid']) + dscnt = len(dsrecs['wid']) + pi = pj = -1 + i = j = 0 + while i < wfcnt and j < dscnt: + if i > pi: + wfrec = PgUtil.onerecord(wfrecs, i) + wwid = wfrec['wid'] + pi = i + if j > pj: + dsrec = PgUtil.onerecord(dsrecs, j) + dwid = dsrec['wid'] + pj = j + if wwid == dwid: + mrec = compare_one_record(flds, wfrec, dsrec) + if mrec: mrecs[wwid] = mrec + i += 1 + j += 1 + elif wwid > dwid: + drecs.append(dwid) + j += 1 + else: + for fld in flds: + arecs[fld].append(wfrec[fld]) + i += 1 + if i < wfcnt: + for fld in flds: + arecs[fld].extend(wfrecs[fld][i:wfcnt]) + elif j < dscnt: + drecs.extend(dsrecs['wid'][j:dscnt]) + + if len(arecs['wid']) == 0: arecs = {} + + return (arecs, mrecs, drecs) + +# +# Compare column values and return the new one; empty if the same +# +def compare_one_record(flds, wfrec, dsrec): + + mrec = {} + for fld in flds: + if wfrec[fld] != dsrec[fld]: mrec[fld] = wfrec[fld] + + return mrec + +# +# convert wfile records to wfile_dsid records +# +def wfile2wdsid(wfrecs, wids = None): + + dsrecs = {} + if wfrecs: + for fld in wfrecs: + if fld == 'dsid': continue + dsrecs[fld] = wfrecs[fld] + if wids: dsrecs['wid'] = wids + return dsrecs + +# +# trim wfile records +# +def trim_wfile_fields(wfrecs): + + records = {} + if 'wfile' in wfrecs: records['wfile'] = wfrecs['wfile'] + if 'dsid' in wfrecs: records['dsid'] = wfrecs['dsid'] + + return records + +# +# check the condition string, and add dsid if needed +# +def get_dsid_condition(dsid, condition): + + if condition: + if re.search('(^|.| )(wid|dsid)\s*=', condition): + return condition + else: + dscnd = "wfile.dsid = '{}' ".format(dsid) + if not re.match(r'^\s*(ORDER|GROUP|HAVING|OFFSET|LIMIT)\s', condition, re.I): dscnd += 'AND ' + return dscnd + condition # no where clause, append directly + else: + return "wfile.dsid = '{}'".format(dsid) + +# +# insert one record into wfile and/or wfile_dsid +# +def pgadd_wfile(dsid, wfrec, logact = PgLOG.LOGERR, getid = None): + + + record = {'wfile' : wfrec['wfile'], + 'dsid' : (wfrec['dsid'] if 'dsid' in wfrec else dsid)} + wret = PgDBI.pgadd('wfile', record, logact, 'wid') + if wret: + record = wfile2wdsid(wfrec, wret) + PgDBI.pgadd('wfile_' + dsid, record, logact|PgLOG.ADDTBL) + + if logact&PgLOG.AUTOID or getid: + return wret + else: + return 1 if wret else 0 + +# +# insert multiple records into wfile and/or wfile_dsid +# +def pgmadd_wfile(dsid, wfrecs, logact = PgLOG.LOGERR, getid = None): + + records = {'wfile' : wfrecs['wfile'], + 'dsid' : (wfrecs['dsid'] if 'dsid' in wfrecs else [dsid]*len(records['wfile']))} + wret = PgDBI.pgmadd('wfile', records, logact, 'wid') + wcnt = wret if isinstance(wret, int) else len(wret) + if wcnt: + records = wfile2wdsid(wfrecs, wret) + PgDBI.pgmadd('wfile_' + dsid, records, logact|PgLOG.ADDTBL) + + if logact&PgLOG.AUTOID or getid: + return wret + else: + return wcnt + +# +# update one or multiple rows in wfile and/or wfile_dsid +# exclude dsid in condition +# +def pgupdt_wfile(dsid, wfrec, condition, logact = PgLOG.LOGERR): + + record = trim_wfile_fields(wfrec) + if record: + wret = PgDBI.pgupdt('wfile', record, get_dsid_condition(dsid, condition), logact) + else: + wret = 1 + if wret: + record = wfile2wdsid(wfrec) + if record: wret = PgDBI.pgupdt("wfile_" + dsid, record, condition, logact|PgLOG.ADDTBL) + + return wret + +# +# update one row in wfile and/or wfile_dsid with dsid change +# exclude dsid in condition +# +def pgupdt_wfile_dsid(dsid, odsid, wfrec, wid, logact = PgLOG.LOGERR): + + record = trim_wfile_fields(wfrec) + cnd = 'wid = {}'.format(wid) + if record: + wret = PgDBI.pgupdt('wfile', record, cnd, logact) + else: + wret = 1 + if wret: + record = wfile2wdsid(wfrec) + tname = 'wfile_' + dsid + doupdt = True + if odsid and odsid != dsid: + oname = 'wfile_' + odsid + pgrec = PgDBI.pgget(oname, '*', cnd, logact|PgLOG.ADDTBL) + if pgrec: + for fld in record: + pgrec[fld] = record[fld] + wret = PgDBI.pgadd(tname, pgrec, logact|PgLOG.ADDTBL) + if wret: PgDBI.pgdel(oname, cnd, logact) + doupdt = False + if doupdt and record: + wret = PgDBI.pgupdt(tname, record, cnd, logact|PgLOG.ADDTBL) + + return wret + +# +# delete one or multiple rows in wfile and/or wfile_dsid, and add the record(s) into wfile_delete +# exclude dsid in conidtion +# +def pgdel_wfile(dsid, condition, logact = PgLOG.LOGERR): + + pgrecs = pgmget_wfile(dsid, '*', condition, logact|PgLOG.ADDTBL) + wret = PgDBI.pgdel('wfile', get_dsid_condition(dsid, condition), logact) + if wret: PgDBI.pgdel("wfile_" + dsid, condition, logact) + if wret and pgrecs: PgDBI.pgmadd('wfile_delete', pgrecs, logact) + + return wret + +# +# delete one or multiple rows in sfile, and add the record(s) into sfile_delete +# +def pgdel_sfile(condition, logact = PgLOG.LOGERR): + + pgrecs = PgDBI.pgmget('sfile', '*', condition, logact) + sret = PgDBI.pgdel('sfile', condition, logact) + if sret and pgrecs: PgDBI.pgmadd('sfile_delete', pgrecs, logact) + + return sret + +# +# update one or multiple rows in wfile and/or wfile_dsid for multiple dsid +# exclude dsid in condition +# +def pgupdt_wfile_dsids(dsid, dsids, brec, bcnd, logact = PgLOG.LOGERR): + + record = trim_wfile_fields(brec) + if record: + wret = PgDBI.pgupdt("wfile", record, bcnd, logact) + else: + wret = 1 + if wret: + record = wfile2wdsid(brec) + if record: + wret = 0 + dids = [dsid] + if dsids: dids.extend(dsids.split(',')) + for did in dids: + wret += PgDBI.pgupdt("wfile_" + did, record, bcnd, logact|PgLOG.ADDTBL) + + return wret + +# +# get one record from wfile or wfile_dsid +# exclude dsid in fields and condition +# +def pgget_wfile(dsid, fields, condition, logact = PgLOG.LOGERR): + + tname = "wfile_" + dsid + flds = fields.replace('wfile.', tname + '.') + cnd = condition.replace('wfile.', tname + '.') + record = PgDBI.pgget(tname, flds, cnd, logact|PgLOG.ADDTBL) + if record and flds == '*': record['dsid'] = dsid + return record + +# +# get one record from wfile or wfile_dsid joing other tables +# exclude dsid in fields and condition +# +def pgget_wfile_join(dsid, tjoin, fields, condition, logact = PgLOG.LOGERR): + + tname = "wfile_" + dsid + flds = fields.replace('wfile.', tname + '.') + jname = tname + ' ' + tjoin.replace('wfile.', tname + '.') + cnd = condition.replace('wfile.', tname + '.') + record = PgDBI.pgget(jname, flds, cnd, logact|PgLOG.ADDTBL) + if record and flds == '*': record['dsid'] = dsid + return record + +# +# get multiple records from wfile or wfile_dsid +# exclude dsid in fields and condition +# +def pgmget_wfile(dsid, fields, condition, logact = PgLOG.LOGERR): + + tname = "wfile_" + dsid + flds = fields.replace('wfile.', tname + '.') + cnd = condition.replace('wfile.', tname + '.') + records = PgDBI.pgmget(tname, flds, cnd, logact|PgLOG.ADDTBL) + if records and flds == '*': records['dsid'] = [dsid]*len(records['wid']) + return records + +# +# get multiple records from wfile or wfile_dsid joining other tables +# exclude dsid in fields and condition +# +def pgmget_wfile_join(dsid, tjoin, fields, condition, logact = PgLOG.LOGERR): + + tname = "wfile_" + dsid + flds = fields.replace('wfile.', tname + '.') + jname = tname + ' ' + tjoin.replace('wfile.', tname + '.') + cnd = condition.replace('wfile.', tname + '.') + records = PgDBI.pgmget(jname, flds, cnd, logact|PgLOG.ADDTBL) + if records and flds == '*': records['dsid'] = [dsid]*len(records['wid']) + return records diff --git a/src/rda_python_common/PgUtil.py b/src/rda_python_common/PgUtil.py new file mode 100644 index 0000000..20aec9b --- /dev/null +++ b/src/rda_python_common/PgUtil.py @@ -0,0 +1,1853 @@ +# +############################################################################### +# +# Title : PgUtil.py -- module for misc utilities. +# Author : Zaihua Ji, zji@ucar.edu +# Date : 07/27/2020 +# Purpose : python library module for global misc utilities +# +# Work File : $DSSHOME/lib/python/PgUtil.py +# Github : https://github.com/NCAR/rda-shared-libraries.git +# +############################################################################### +# +import os +import re +import time +import datetime +import calendar +import glob +from os import path as op +import PgLOG + +DATEFMTS = { + 'C' : '(CC|C)', # century + 'Y' : '(YYYY|YY00|YYY|YY|YEAR|YR|Y)', # YYY means decade + 'Q' : '(QQ|Q)', # quarter + 'M' : '(Month|Mon|MM|M)', # numeric or string month + 'W' : '(Week|Www|W)', # string or numeric weedday + 'D' : '(DDD|DD|D)', # days in year or month + 'H' : '(HHH|HH|H)', # hours in month or day + 'N' : '(NNNN|NN|N)', # minutes in day or hour + 'S' : '(SSSS|SS|S)' # seconds in hour or minute +} + +MONTHS = [ + "january", "february", "march", "april", "may", "june", + "july", "august", "september", "october", "november", "december" +] +MNS = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"] +WDAYS = ["sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"] + +WDS = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"] +MDAYS = [365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +# +# dt: optional given date in format of "YYYY-MM-DD" +# return weekday: 0 - Sunday, 1 - Monday, ..., 6 - Saturday +# +def get_weekday(date = None): + + if date is None: + ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime() + else: + ct = time.strptime(str(date), "%Y-%m-%d") + + return (ct[6]+1)%7 + +# +# mn: given month string like "Jan" or "January", or numeric number 1 to 12 +# Return: numeric Month if not fmt (default); three-charater or full month names for given fmt +# +def get_month(mn, fmt = None): + + if not isinstance(mn, int): + if re.match(r'^\d+$', mn): + mn = int(mn) + else: + for m in range(12): + if re.match(mn, MONTHS[m], re.I): + mn = m + 1 + break + + if fmt and mn > 0 and mn < 13: + slen = len(fmt) + if slen == 2: + smn = "{:02}".format(mn) + elif re.match(r'^mon', fmt, re.I): + smn = MNS[mn-1] if slen == 3 else MONTHS[mn-1] + if re.match(r'^Mon', fmt): + smn = smn.capitalize() + elif re.match(r'^MON', fmt): + smn = smn.upper() + else: + smn = str(mn) + return smn + else: + return mn + +# +# wday: given weekday string like "Sun" or "Sunday", or numeric number 0 to 6 +# Return: numeric Weekday if !fmt (default); three-charater or full week name for given fmt +# +def get_wday(wday, fmt = None): + + if not isinstance(wday, int): + if re.match(r'^\d+$', wday): + wday = int(wday) + else: + for w in range(7): + if re.match(wday, WDAYS[w], re.I): + wday = w + break + + if fmt and wday >= 0 and wday <= 6: + slen = len(fmt) + if slen == 4: + swday = WDAYS[w] + if re.match(r'^We', fmt): + swday = swday.capitalize() + elif re.match(r'^WE', fmt): + swday = swday.upper() + elif slen == 3: + swday = WDS[wday] + if re.match(r'^Ww', fmt): + swday = swday.capitalize() + elif re.match(r'^WW', fmt): + swday = swday.upper() + else: + swday = str(wday) + return swday + else: + return wday + +# +# file: given file name +# Return: type if given file name is a valid online file; '' otherwise +# +def valid_online_file(file, type = None, exists = None): + + if exists is None or exists: + if not op.exists(file): return '' # file does not exist + + bname = op.basename(file) + if re.match(r'^,.*', bname): return '' # hidden file + + if re.search(r'index\.(htm|html|shtml)$', bname, re.I): return '' # index file + + if type and type != 'D': return type + + if re.search(r'\.(doc|php|html|shtml)(\.|$)', bname, re.I): return '' # file with special extention + + return 'D' + +# +# Return: current time string in format of HH:MM:SS +# +def curtime(getdate = False): + + ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime() + + fmt = "%Y-%m-%d %H:%M:%S" if getdate else "%H:%M:%S" + + return time.strftime(fmt, ct) + +# +# wrapper function of curtime(True) to get datetime in form of YYYY-MM-DD HH:NN:SS +# +def curdatetime(): + return curtime(True) + +# +# fmt: optional date format, defaults to YYYY-MM-DD +# Return: current (date, hour) +# +def curdatehour(fmt = None): + + ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime() + + dt = fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct) + + return [dt, ct[3]] + +# +# tm: optional time in seconds since the Epoch +# Return: current date and time strings +# +def get_date_time(tm = None): + + act = ct = None + if tm == None: + ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime() + elif isinstance(tm, str): + act = tm.split(' ') + elif isinstance(tm, (int, float)): + ct = time.localtime(tm) + elif isinstance(tm, datetime.datetime): + act = str(tm).split(' ') + elif isinstance(tm, datetime.date): + act = [str(tm), '00:00:00'] + elif isinstance(tm, datetime.time): + act = [None, str(tm)] + + if ct == None: + return act if act else None + else: + return [time.strftime("%Y-%m-%d", ct), time.strftime("%H:%M:%S", ct)] + +# +# tm: optional time in seconds since the Epoch +# Return: current datetime strings +# +def get_datetime(tm = None): + + if tm == None: + ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime() + return time.strftime("%Y-%m-%d %H:%M:%S", ct) + elif isinstance(tm, str): + return tm + elif isinstance(tm, (int, float)): + ct = time.localtime(tm) + return time.strftime("%Y-%m-%d %H:%M:%S", ct) + elif isinstance(tm, datetime.datetime): + return str(tm) + elif isinstance(tm, datetime.date): + return (str(tm) + ' 00:00:00') + + return tm + + +# +# file: file name, get curent timestamp if missed +# Return: timestsmp string in format of 'YYYYMMDDHHMMSS +# +def timestamp(file = None): + + if file is None: + ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime() + else: + mt = os.stat(file).st_mtime # file last modified time + ct = time.gmtime(mt) if PgLOG.PGLOG['GMTZ'] else time.localtime(mt) + + return time.strftime("%Y%m%d%H%M%S", ct) + +# +# dt: datetime string +# check date/time and set to default one if empty date +# +def check_datetime(date, default): + + if not date: return default + if not isinstance(date, str): date = str(date) + if re.match(r'^0000', date): return default + + return date + +# +# fmt: date format, default to "YYYY-MM-DD" +# Return: new formated current date string +# +def curdate(fmt = None): + + ct = time.gmtime() if PgLOG.PGLOG['GMTZ'] else time.localtime() + + return fmtdate(ct[0], ct[1], ct[2], fmt) if fmt else time.strftime("%Y-%m-%d", ct) + +# +# check given string to identify temporal pattern and their units +# defined in (keys DATEFMTS) +# +def temporal_pattern_units(string, seps): + + mkeys = ['D', 'Q', 'M', 'C', 'Y', 'H', 'N', 'S'] + units = {} + match = seps[0] + "([^" + seps[1] + "]+)" + seps[1] + patterns = re.findall(match, string) + + for pattern in patterns: + # skip generic pattern and current time + if re.match(r'^(P\d*|C.+C)$', pattern, re.I): continue + + for mkey in mkeys: + ms = re.findall(DATEFMTS[mkey], pattern, re.I) + if ms: + if mkey == 'Q': + units[mkey] = 3 + elif mkey == 'C': + units[mkey] = 100 + else: + units[mkey] = 1 + + for m in ms: + pattern = pattern.replace(m, '', 1) + + return units + +# +# format output for given date and hour +# +def format_datehour(date, hour, tofmt = None, fromfmt = None): + + if date: + datehour = format_date(str(date), tofmt, fromfmt) + elif tofmt: + datehour = tofmt + else: + datehour = '' + + if hour != None: + if tofmt: + fmts = re.findall(DATEFMTS['H'], datehour, re.I) + for fmt in fmts: + if len(fmt) > 1: + shr = "{:02}".format(int(hour)) + else: + shr = str(hour) + datehour = re.sub(fmt, shr, datehour, 1) + else: + datehour += " {:02}".format(int(hour)) + + return datehour + +# +# split a date, time or datetime into an array according to +# the sep value; str to int for digital values +# +def split_datetime(sdt, sep = '\D'): + + if not isinstance(sdt, str): sdt = str(sdt) + adt = re.split(sep, sdt) + acnt = len(adt) + for i in range(acnt): + if re.match('^\d+$', adt[i]): adt[i] = int(adt[i]) + return adt + +# +# date: given date in format of fromfmt +# tofmt: date formats; ex. "Month D, YYYY" +# fromfmt: date formats, default to YYYY-MM-DD +# Return: new formated date string according to tofmt +# +def format_date(cdate, tofmt = None, fromfmt = None): + + if not cdate: return cdate + if not isinstance(cdate, str): cdate = str(cdate) + dates = [None, None, None] + sep = '|' + mns = sep.join(MNS) + months = sep.join(MONTHS) + mkeys = ['D', 'M', 'Q', 'Y', 'C', 'H'] + PATTERNS = ['(\d\d\d\d)', '(\d+)', '(\d\d)', + '(\d\d\d)', '(' + mns + ')', '(' + months + ')'] + + if not fromfmt: + if not tofmt: + if re.match(r'^\d\d\d\d-\d\d-\d\d$', cdate): return cdate # no need formatting + ms = re.match(r'^\d+(\W)\d+(\W)\d+', cdate) + if ms: + fromfmt = "Y" + ms.group(1) + "M" + ms.group(2) + "D" + else: + PgLOG.pglog(cdate + ": Invalid date, should be in format YYYY-MM-DD", PgLOG.LGEREX) + + pattern = fromfmt + fmts = {} + formats = {} + for mkey in mkeys: + ms = re.search(DATEFMTS[mkey], pattern, re.I) + if ms: + fmts[mkey] = ms.group(1) + pattern = re.sub(fmts[mkey], '', pattern) + + cnt = 0 + for mkey in fmts: + fmt = fmts[mkey] + i = len(fmt) + if mkey == 'D': + if i == 4: i = 1 + elif mkey == 'M': + if i == 3: i = 4 + elif mkey == 'Y': + if i == 4: i = 0 + formats[fromfmt.find(fmt)] = fmt + fromfmt = fromfmt.replace(fmt, PATTERNS[i]) + cnt += 1 + + ms = re.findall(fromfmt, cdate) + mcnt = len(ms[0]) if ms else 0 + i = 0 + for k in sorted(formats): + if i >= mcnt: break + fmt = formats[k] + val = ms[0][i] + if re.match(r'^Y', fmt, re.I): + dates[0] = int(val) + if len(fmt) == 3: dates[0] *= 10 + elif re.match(r'^C', fmt, re.I): + dates[0] = 100 * int(val) # year at end of century + elif re.match(r'^M', fmt, re.I): + if re.match(r'^Mon', fmt, re.I): + dates[1] = get_month(val) + else: + dates[1] = int(val) + elif re.match(r'^Q', fmt, re.I): + dates[1] = 3 * int(val) # month at end of quarter + elif re.match(r'^H', fmt, re.I): # hour + dates.append(int(val)) + else: # day + dates[2] = int(val) + i += 1 + + if len(dates) > 3: + cdate = fmtdatehour(dates[0], dates[1], dates[2], dates[3], tofmt) + else: + cdate = fmtdate(dates[0], dates[1], dates[2], tofmt) + + return cdate + +# +# yr: year value +# mn: month value, 1-12 +# dy: day of the month +# hr: hour of the day +# nn: minute of the hour +# ss: second of the minute +# tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD HH:NN:SS" +# Return: new formated datehour string +# +def fmtdatetime(yr, mn, dy, hr = None, nn = None, ss = None, tofmt = None): + + if not tofmt: tofmt = "YYYY-MM-DD HH:NN:SS" + + tms = [ss, nn, hr, dy] + fks = ['S', 'N', 'H'] + ups = [60, 60, 24] + + # adjust second/minute/hour values out of range + for i in range(3): + if tms[i] != None and tms[i+1] != None: + if tms[i] < 0: + while tms[i] < 0: + tms[i] += ups[i] + tms[i+1] -= 1 + elif tms[i] >= ups[i]: + while tms[i] >= ups[i]: + tms[i] -= ups[i] + tms[i+1] += 1 + + sdt = fmtdate(yr, mn, dy, tofmt) + + # format second/minute/hour values + for i in range(3): + if tms[i] != None: + ms = re.search(DATEFMTS[fks[i]], sdt, re.I) + if ms: + fmt = ms.group(1) + if len(fmt) == 2: + str = "{:02}".format(tms[i]) + else: + str = str(tms[i]) + sdt = re.sub(fmt, str, sdt, 1) + + return sdt + +# +# yr: year value +# mn: month value, 1-12 +# dy: day of the month +# hr: hour of the day +# tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD:HH" +# Return: new formated datehour string +# +def fmtdatehour(yr, mn, dy, hr, tofmt = None): + + if not tofmt: tofmt = "YYYY-MM-DD:HH" + + if hr != None and dy != None: # adjust hour value out of range + if hr < 0: + while hr < 0: + hr += 24 + dy -= 1 + elif hr > 23: + while ht > 23: + hr -= 24 + dy += 1 + + datehour = fmtdate(yr, mn, dy, tofmt) + + if hr != None: + ms = re.search(DATEFMTS['H'], datehour, re.I) + if ms: + fmt = ms.group(1) + if len(fmt) == 2: + shr = "{:02}".format(hr) + else: + shr = str(hr) + datehour = re.sub(fmt, shr, datehour, 1) + + return datehour + +# +# yr: year value +# mn: month value, 1-12 +# dy: day of the month +# tofmt: date format, ex. "Month D, YYYY", default to "YYYY-MM-DD" +# Return: new formated date string +# +def fmtdate(yr, mn, dy, tofmt = None): + + (y, m, d) = adjust_ymd(yr, mn, dy) + if not tofmt or tofmt == 'YYYY-MM-DD': return "{}-{:02}-{:02}".format(y, m, d) + + if dy != None: + md = re.search(DATEFMTS['D'], tofmt, re.I) + if md: + fmt = md.group(1) # day + slen = len(fmt) + if slen > 2: # days of the year + for i in range(1, m): d += MDAYS[i] + sdy = "{:03}".format(d) + elif slen == 2: + sdy = "{:02}".format(d) + else: + sdy = str(d) + tofmt = re.sub(fmt, sdy, tofmt, 1) + + if mn != None: + md = re.search(DATEFMTS['M'], tofmt, re.I) + if md: + fmt = md.group(1) # month + slen = len(fmt) + if slen == 2: + smn = "{:02}".format(m) + elif re.match(r'^mon', fmt, re.I): + smn = MNS[m-1] if slen == 3 else MONTHS[m-1] + if re.match(r'^Mo', fmt): + smn = smn.capitalize() + elif re.match(r'^MO', fmt): + smn = smn.upper() + else: + smn = str(m) + tofmt = re.sub(fmt, smn, tofmt, 1) + else: + md = re.search(DATEFMTS['Q'], tofmt, re.I) + if md: + fmt = md.group(1) # quarter + m = int((m+2)/3) + smn = "{:02}".format(m) if len(fmt) == 2 else str(m) + tofmt = re.sub(fmt, smn, tofmt, 1) + + if yr != None: + md = re.search(DATEFMTS['Y'], tofmt, re.I) + if md: + fmt = md.group(1) # year + slen = len(fmt) + if slen == 2: + syr = "{:02}".format(y%100) + elif slen == 3: # decade + if y > 999: y = int(y/10) + syr = "{:03}".format(y) + else: + if re.search(r'^YY00', fmt, re.I): y = 100*int(y/100) # hundred years + syr = "{:04}".format(y) + tofmt = re.sub(fmt, syr, tofmt, 1) + else: + md = re.search(DATEFMTS['C'], tofmt, re.I) + if md: + fmt = md.group(1) # century + slen = len(fmt) + if y > 999: + y = 1 + int(y/100) + elif y > 99: + y = 1 + int(yr/10) + syr = "{:02}".format(y) + tofmt = re.sub(fmt, syr, tofmt, 1) + + return tofmt + +# +# format given date and time into standard timestamp +# +def join_datetime(sdate, stime): + + if not sdate: return None + if not stime: stime = "00:00:00" + if not isinstance(sdate, str): sdate = str(sdate) + if not isinstance(stime, str): stime = str(stime) + if re.match(r'^\d:', stime): stime = '0' + stime + + return "{} {}".format(sdate, stime) + +fmttime = join_datetime + +# +# split a date or datetime into an array of [date, time] +# +def date_and_time(sdt): + + if not sdt: return [None, None] + if not isinstance(sdt, str): sdt = str(sdt) + adt = re.split(' ', sdt) + acnt = len(adt) + if acnt == 1: adt.append('00:00:00') + return adt + +# +# convert given date/time to unix epoch time; -1 if cannot +# +def unixtime(stime): + + pt = [0]*9 + if not isinstance(stime, str): stime = str(stime) + ms = re.match(r'^(\d+)-(\d+)-(\d+)', stime) + if ms: + for i in range(3): + pt[i] = int(ms.group(i+1)) + ms = re.search(r'^(\d+):(\d+):(\d+)$', stime) + if ms: + for i in range(3): + pt[i+3] = int(ms.group(i+1)) + + pt[8] = -1 + return time.mktime(time.struct_time(pt)) + +# +# sdate: start date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD' +# edate: end date in form of 'YYYY' or 'YYYY-MM' or 'YYYY-MM-DD' +# Return: list of start and end dates in format of YYYY-MM-DD +# +def daterange(sdate, edate): + + if sdate: + if not isinstance(sdate, str): sdate = str(sdate) + if not re.search(r'\d+-\d+-\d+', sdate): + ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', sdate) + if ms: + sdate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4)) + else: + ms = re.match(r'^(\W*)(\d+)(\W*)$', sdate) + if ms: + sdate = "{}{}-01-01{}".format(ms.group(1), ms.group(2), ms.group(3)) + + if edate: + if not isinstance(edate, str): edate = str(edate) + if not re.search(r'\d+-\d+-\d+', edate): + ms = re.match(r'^(\W*)(\d+)-(\d+)(\W*)$', edate) + if ms: + edate = "{}{}-{}-01{}".format(ms.group(1), ms.group(2), ms.group(3), ms.group(4)) + edate = adddate(edate, 0, 1, -1) + else: + ms = re.match(r'^(\W*)(\d+)(\W*)$', edate) + if ms: + edate = "{}{}-12-31{}".format(ms.group(1), ms.group(2), ms.group(3)) + + return [sdate, edate] + +# +# date to datetime range +# +def dtrange(dates): + + date = dates[0] + if date: + if not isinstance(date, str): date = str(date) + dates[0] = date + ' 00:00:00' + date = dates[1] + if date: + if not isinstance(date, str): date = str(date) + dates[1] = date + ' 23:59:59' + + return dates + +# +# sdate: starting date in format of 'YYYY-MM-DD' +# edate: ending date +# fmt: period format, ex. "YYYYMon-YYYMon", default to "YYYYMM-YYYYMM" +# Return: a string of formated period +# +def format_period(sdate, edate, fmt = None): + + period = '' + + if not fmt: + sfmt = efmt = "YYYYMM" + sep = '-' + else: + ms = re.match(r'^(.*)(\s*-\s*)(.*)$', fmt) + if ms: + (sfmt, sep, efmt) = ms.groups() + else: + sfmt = fmt + efmt = None + sep = '' + + if sdate: + if not isinstance(sdate, str): sdate = str(sdate) + ms = re.search(r'(\d+)-(\d+)-(\d+)', sdate) + if ms: + (yr, mn, dy) = ms.groups() + period = fmtdate(int(yr), int(mn), int(dy), sfmt) + + if sep: period += sep + + if efmt: + if re.search(r'current', efmt, re.I): + period += efmt + elif edate: + if not isinstance(edate, str): edate = str(edate) + ms = re.search(r'(\d+)-(\d+)-(\d+)', edate) + if ms: + (yr, mn, dy) = ms.groups() + period += fmtdate(int(yr), int(mn), int(dy), efmt) + + return period + +# +# dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN +# newid: True to format a new dsid; defaults to False for now +# returns a new or old dsid according to the newid option +# +def format_dataset_id(dsid, newid = PgLOG.PGLOG['NEWDSID'], logact = PgLOG.LGEREX): + + dsid = str(dsid) + ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid) + if ms: + ids = list(ms.groups()) + if ids[0] not in PgLOG.PGLOG['DSIDCHRS']: + if logact: PgLOG.pglog("{}: dsid leading character must be '{}'".format(dsid, PgLOG.PGLOG['DSIDCHRS']), logact) + return dsid + if newid: return dsid + if ids[2][:2] != '00': + if logact: PgLOG.pglog(dsid + ": Cannot convert new dsid to old format", logact) + return dsid + return 'ds{}.{}'.format(ids[1], ids[2][2]) + + ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid, re.I) + if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid) + if ms: + if newid: + return "d{}00{}".format(ms.group(1), ms.group(3)) + else: + return 'ds{}.{}'.format(ms.group(1), ms.group(3)) + + if logact: PgLOG.pglog(dsid + ": invalid dataset id", logact) + return dsid + +# +# dsid: given dataset id in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN +# newid: True to format a new dsid; defaults to False for now +# returns a new or old metadata dsid according to the newid option +# +def metadata_dataset_id(dsid, newid = PgLOG.PGLOG['NEWDSID'], logact = PgLOG.LGEREX): + + ms = re.match(r'^([a-z])(\d\d\d)(\d\d\d)$', dsid) + if ms: + ids = list(ms.groups()) + if ids[0] not in PgLOG.PGLOG['DSIDCHRS']: + if logact: PgLOG.pglog("{}: dsid leading character must be '{}'".format(dsid, PgLOG.PGLOG['DSIDCHRS']), logact) + return dsid + if newid: return dsid + if ids[2][:2] != '00': + if logact: PgLOG.pglog(dsid + ": Cannot convert new dsid to old format", logact) + return dsid + return '{}.{}'.format(ids[1], ids[2][2]) + + ms = re.match(r'^ds(\d\d\d)(\.|)(\d)$', dsid) + if not ms: ms = re.match(r'^(\d\d\d)(\.)(\d)$', dsid) + if ms: + if newid: + return "d{}00{}".format(ms.group(1), ms.group(3)) + else: + return '{}.{}'.format(ms.group(1), ms.group(3)) + + if logact: PgLOG.pglog(dsid + ": invalid dataset id", logact) + return dsid + + +# +# idstr: string holding a dsid in form of dsNNN(.|)N, NNNN.N or [a-z]NNNNNN +# and find it according to the flag value O (Old), N (New) or B (Both) formats +# returns dsid if found in given id string; None otherwise +# +def find_dataset_id(idstr, flag = 'B', logact = 0): + + if flag in 'NB': + ms = re.search(r'(^|\W)(([a-z])\d{6})($|\D)', idstr) + if ms and ms.group(3) in PgLOG.PGLOG['DSIDCHRS']: return ms.group(2) + if flag in 'OB': + ms = re.search(r'(^|\W)(ds\d\d\d(\.|)\d)($|\D)', idstr) + if not ms: ms = re.search(r'(^|\W)(\d\d\d\.\d)($|\D)', idstr) + if ms: return ms.group(2) + + if logact: PgLOG.pglog("{} : No valid dsid found for flag {}".format(idstr, flag), logact) + return None + +# +# find and convert all found dsids according to old/new dsids +# for newid = False/True +# +def convert_dataset_ids(idstr, newid = PgLOG.PGLOG['NEWDSID'], logact = 0): + + flag = 'O' if newid else 'N' + cnt = 0 + if idstr: + while True: + dsid = find_dataset_id(idstr, flag = flag) + if not dsid: break + ndsid = format_dataset_id(dsid, newid = newid, logact = logact) + if ndsid != dsid: idstr = idstr.replace(dsid, ndsid) + cnt += 1 + + return (idstr, cnt) + +# +# records: dict of mutiple records, +# idx: index of the records to return +# Return: a dict to the idx record out of records +# +def onerecord(records, idx): + + record = {} + + for fld in records: + record[fld] = records[fld][idx] + + return record + +# +# records: dict of mutiple records, +# record: record to add +# idx: index of the record to add +# Return: add a record to a dict of lists +# +def addrecord(records, record, idx): + + if records is None: records = {} # initialize dist of lists structure + if not records: + for key in record: + records[key] = [] + + for key in record: + slen = len(records[key]) + if idx < slen: + records[key][idx] = record[key] + else: + while idx > slen: + records[key].append(None) + slen += 1 + records[key].append(record[key]) + + return records + +# +# convert a hash with multiple rows from pgmget() to an array of hashes +# +def hash2array(hrecs, hkeys = None): + + if not hkeys: hkeys = list(hrecs) + acnt = len(hrecs[hkeys[0]]) if hrecs and hkeys[0] in hrecs else 0 + arecs = [None]*acnt + for i in range(acnt): + arec = {} + for hkey in hkeys: arec[hkey] = hrecs[hkey][i] + arecs[i] = arec + + return arecs + +# +# convert an array of hashes to a hash with multiple rows for pgmget() +# +def array2hash(arecs, hkeys = None): + + hrecs = {} + acnt = len(arecs) if arecs else 0 + if acnt > 0: + if not hkeys: hkeys = list(arecs[0]) + for hkey in hkeys: + hrecs[hkey] = [None]*acnt + for i in range(acnt): hrecs[hkey][i] = arecs[i][hkey] + + return hrecs + +# +# records: dict of mutiple records, +# opt: 0 - column count, +# 1 - row count, +# 2 - both +# Return: a single number or list of two dependend on given opt +# +def hashcount(records, opt = 0): + + ret = [0, 0] + + if records: + clen = len(records) + if opt == 0 or opt == 2: + ret[0] = clen + if opt == 1 or opt == 2: + ret[1] = len(next(iter(records.values()))) + + return ret if opt == 2 else ret[opt] + +# +# adict: dict a +# bdict: dict b +# default: default values if missed +# unique: unique join if set +# Return: the joined dict records with default value for missing ones +# For unique join, a record in bdict must not be contained in adict already +# +def joinhash(adict, bdict, default = None, unique = None): + + if not bdict: return adict + if not adict: return bdict + + akeys = list(adict.keys()) + bkeys = list(bdict.keys()) + acnt = len(adict[akeys[0]]) + bcnt = len(bdict[bkeys[0]]) + ckeys = [] # common keys for unique joins + + # check and assign default value for missing keys in adict + for bkey in bkeys: + if bkey in akeys: + if unique and bkey not in ckeys: ckeys.append(bkey) + else: + adict[bkey] = [default]*acnt + + # check and assign default value for missing keys in bdict + for akey in akeys: + if akey in bkeys: + if unique and akey not in ckeys: ckeys.append(akey) + else: + bdict[akey] = [default]*bcnt + + if unique: # append bdict + kcnt = len(ckeys) + for i in range(bcnt): + j = 0 + while(j < acnt): + k = 0 + for ckey in ckeys: + if pgcmp(adict[ckey][j], bdict[ckey][i]): break + k += 1 + if k >= kcnt: break + j += 1 + + if j >= acnt: + for key in adict: + adict[key].append(bdict[key][i]) + else: + for key in adict: + adict[key].extend(bdict[key]) + + return adict + +# +# lst1: list 1 +# lst2: list 2 +# unique: unique join if set +# Return: the joined list +# +def joinarray(lst1, lst2, unique = None): + + if not lst2: return lst1 + if not lst1: return lst2 + + cnt1 = len(lst1) + cnt2 = len(lst2) + + if unique: + for i in (cnt2): + for j in (cnt1): + if pgcmp(lst1[j], lst2[i]) != 0: break + if j >= cnt1: + lst1.append(lst2[i]) + else: + lst1.extend(lst2) + + return lst1 + +# +# Function: crosshash(ahash, bhash) +# Return: a reference to the cross-joined hash records +# +def crosshash(ahash, bhash): + + if not bhash: return ahash + if not ahash: return bhash + + akeys = list(ahash.keys()) + bkeys = list(bhash.keys()) + acnt = len(ahash[akeys[0]]) + bcnt = len(bhash[bkeys[0]]) + rets = {} + for key in akeys: rets[key] = [] + for key in bkeys: rets[key] = [] + for i in range(acnt): + for j in range(bcnt): + for key in akeys: rets[key].append(ahash[key][i]) + for key in bkeys: rets[key].append(bhash[key][j]) + + return rets + +# +# strip database and table names for a field name +# +def strip_field(field): + ms = re.search(r'\.([^\.]+)$', field) + if ms: field = ms.group(1) + + return field + +# +# pgrecs: dict obterned from pgmget() +# flds: list of single letter fields to be sorted on +# hash: table dict for pre-defined fields +# patterns: optional list of temporal patterns for order fields +# Return: a sorted dict list +# +def sorthash(pgrecs, flds, hash, patterns = None): + + fcnt = len(flds) # count of fields to be sorted on + + # set sorting order, descenting (-1) or ascenting (1) + # get the full field names to be sorted on + desc = [1]*fcnt + fields = [] + nums = [1]*fcnt # initialize each column as numerical + for i in range(fcnt): + if flds[i].islower(): desc[i] = -1 + fld = strip_field(hash[flds[i].upper()][1]) + fields.append(fld) + + count = len(pgrecs[fields[0]]) # row count of pgrecs + + if count < 2: return pgrecs # no need of sording + pcnt = len(patterns) if patterns else 0 + + # prepare the dict list for sortting + srecs = [] + for i in range(count): + pgrec = onerecord(pgrecs, i) + rec = [] + for j in range(fcnt): + if j < pcnt and patterns[j]: + # get the temporal part of each value matching the pattern + val = format_date(pgrec[fields[j]], "YYYYMMDDHH", patterns[j]) + else: + # sort on the whole value if no pattern given + val = pgrec[fields[j]] + + if nums[j]: nums[j] = pgnum(val) + rec.append(val) + rec.append(i) # add column to cache the row index + srecs.append(rec) + + srecs = quicksort(srecs, 0, count-1, desc, fcnt, nums) + + # sort pgrecs according the cached row index column in ordered srecs + rets = {} + for fld in pgrecs: + rets[fld] = [] + + for i in range(count): + pgrec = onerecord(pgrecs, srecs[i][fcnt]) + for fld in pgrecs: + rets[fld].append(pgrec[fld]) + + return rets + +# +# Return: the number of days bewteen date1 and date2 +# +def diffdate(date1, date2): + + ut1 = ut2 = 0 + if date1: ut1 = unixtime(date1) + if date2: ut2 = unixtime(date2) + return round((ut1 - ut2)/86400) # 24*60*60 + +# +# Return: the number of seconds bewteen time1 and time2 +# +def difftime(time1, time2): + + ut1 = ut2 = 0 + if time1: ut1 = unixtime(time1) + if time2: ut2 = unixtime(time2) + return round(ut1 - ut2) + +diffdatetime = difftime + +# +# Return: the number of days between date and '1970-01-01 00:00:00' +# +def get_days(cdate): + + return diffdate(str(cdate), '1970-01-01') + +# +# Function: get_month_days(date) +# +# Return: the number of days in given month +# +def get_month_days(cdate): + + ms = re.match(r'^(\d+)-(\d+)', str(cdate)) + if ms: + yr = int(ms.group(1)) + mn = int(ms.group(2)) + return calendar.monthrange(yr, mn)[1] + else: + return 0 + +# +# Function: validate_date(date) +# +# Return: a date in format of YYYY-MM-DD thar all year/month/day are validated +# +def validate_date(cdate): + + ms = re.match(r'^(\d+)-(\d+)-(\d+)', str(cdate)) + if ms: + (yr, mn, dy) = (int(m) for m in ms.groups()) + if yr < 1000: + yr += 2000 + elif yr > 9999: + yr %= 10000 + if mn < 1: + mn = 1 + elif mn > 12: + mn = 12 + md = calendar.monthrange(yr, mn)[1] + if dy < 1: + dy = 1 + elif dy > md: + dy = md + cdate = '{}-{:02d}-{:02d}'.format(yr, mn, dy) + + return cdate + +# +# Function: get_date(days) +# +# Return: the date in format of "YYYY-MM-DD" for given number of days +# from '1970-01-01 00:00:00' +# +def get_date(days): + + return adddate('1970-01-01', 0, 0, int(days)) + +# +# compare date/hour and return the different hours +# +def diffdatehour(date1, hour1, date2, hour2): + + if hour1 is None: hour1 = 23 + if hour2 is None: hour2 = 23 + return (hour1 - hour2) + 24*diffdate(date1, date2) + +# +# hour difference between GMT and local time +# +def diffgmthour(): + + tg = time.gmtime() + tl = time.localtime() + dg = fmtdate(tg[0], tg[1], tg[2]) + dl = fmtdate(tl[0], tl[1], tl[2]) + hg = tg[3] + hl = tl[3] + + return diffdatehour(dg, hg, dl, hl) + +# +# compare date and time (if given) and return 1, 0 and -1 +# +def cmptime(date1, time1, date2, time2): + + stime1 = join_datetime(date1, time1) + stime2 = join_datetime(date2, time2) + + return pgcmp(stime1, stime2) + +# +# date: the original date in format of 'YYYY-MM-DD', +# mf: the number of month fractions to add +# nf: number of fractions of a month +# Return: new date +# +def addmonth(cdate, mf, nf = 1): + + if not mf: return cdate + if not nf or nf < 2: return adddate(cdate, 0, mf, 0) + + ms = re.match(r'^(\d+)-(\d+)-(\d+)$', cdate) + if ms: + (syr, smn, sdy) = ms.groups() + yr = int(syr) + mn = int(smn) + ody = int(sdy) + dy = 0 # set to end of previous month + ndy = int(30/nf) # number of days in each fraction + while ody > ndy: + dy += ndy + ody -= ndy + + dy += mf * ndy + if mf > 0: + while dy >= 30: + dy -= 30 + mn += 1 + else: + while dy < 0: + dy += 30 + mn -= 1 + + dy += ody + cdate = fmtdate(yr, mn, dy) + + return cdate + +# add yr years & mn months to yearmonth ym in format YYYYMM +def addyearmonth(ym, yr, mn): + + if yr == None: yr = 0 + if mn == None: mn = 0 + + ms =re.match(r'^(\d\d\d\d)(\d\d)$', ym) + if ms: + (syr, smn) = ms.groups() + yr = int(syr) + mn = int(smn) + if mn < 0: + while mn < 0: + yr -= 1 + mn += 12 + else: + while mn > 12: + yr += 1 + mn -= 12 + + ym = "{:04}{:02}".format(yr, mn) + + return ym + +# +# a wrapper to adddate() +# +def addNoLeapDate(cdate, yr, mn, dy): return adddate(cdate, yr, mn, dy) + +# +# set number of days in Beburary for Leap year according PgLOG.PGLOG['NOLEAP'] +# +def set_leap_mdays(year): + + if not PgLOG.PGLOG['NOLEAP'] and calendar.isleap(year): + MDAYS[0] = 366 + MDAYS[2] = 29 + ret = 1 + else: + MDAYS[0] = 365 + MDAYS[2] = 28 + ret = 0 + return ret + +# +# wrap on calendar.isleap() +# +def is_leapyear(year): return calendar.isleap(year) + +# +# reutn 1 if is end of month +# +def is_end_month(yr, mn, dy): + + set_leap_mdays(yr) + return 1 if dy == MDAYS[mn] else 0 + +# +# adust the year, month and day values that are out of ranges +# +def adjust_ymd(yr, mn, dy): + + if yr is None: yr = 1970 + if mn is None: mn = 1 + if dy is None: dy = 1 + + while True: + if mn > 12: + yr += 1 + mn -= 12 + continue + elif mn < 1: + yr -= 1 + mn += 12 + continue + + set_leap_mdays(yr) + + if dy < 1: + if(dy < -MDAYS[0]): + yr -= 1 + dy += MDAYS[0] + else: + mn -= 1 + if mn < 1: + yr -= 1 + mn += 12 + dy += MDAYS[mn] + continue + elif dy > MDAYS[mn]: + if(dy > MDAYS[0]): + dy -= MDAYS[0] + yr += 1 + else: + dy -= MDAYS[mn] + mn += 1 + continue + + break + + return [yr, mn, dy] + +# +# date: the original date in format of 'YYYY-MM-DD', +# yr: the number of years to add/subtract from the odate for positive/negative value, +# mn: the number of months to add/subtract from the odate for positive/negative value, +# dy: the number of days to add/subtract from the odate for positive/negative value) +# +# Return: new date +# +def adddate(cdate, yr, mn = 0, dy = 0, tofmt = None): + + if not cdate: return cdate + if not isinstance(cdate, str): cdate = str(cdate) + if yr is None: + yr = 0 + elif isinstance(yr, str): + yr = int(yr) + if mn is None: + mn = 0 + elif isinstance(mn, str): + mn = int(mn) + if dy is None: + dy = 0 + elif isinstance(dy, str): + dy = int(dy) + + ms = re.search(r'(\d+)-(\d+)-(\d+)', cdate) + if not ms: return cdate # non-standard date format + (nyr, nmn, ndy) = (int(m) for m in ms.groups()) + mend = 0 + if mn and ndy > 27: mend = is_end_month(nyr, nmn, ndy) + if yr: nyr += yr + if mn: + (nyr, nmn, tdy) = adjust_ymd(nyr, nmn+mn+1, 0) + if mend: ndy = tdy + if dy: ndy += dy + + return fmtdate(nyr, nmn, ndy, tofmt) + +# +# add given hours to the initial date and time +# +def addhour(sdate, stime, nhour): + + if nhour and isinstance(nhour, str): nhour = int(nhour) + if sdate and not isinstance(sdate, str): sdate = str(sdate) + if stime and not isinstance(stime, str): stime = str(stime) + if not nhour: return [sdate, stime] + + hr = dy = 0 + ms = re.match(r'^(\d+)', stime) + if ms: + shr = ms.group(1) + hr = int(shr) + nhour + if hr < 0: + while hr < 0: + dy -= 1 + hr += 24 + else: + while hr > 23: + dy += 1 + hr -= 24 + + shour = "{:02}".format(hr) + if shr != shour: stime = re.sub(shr, shour, stime, 1) + if dy: sdate = adddate(sdate, 0, 0, dy) + + return [sdate, stime] + +# +# add given years, months, days and hours to the initial date and hour +# +def adddatehour(sdate, nhour, yr, mn, dy, hr = 0): + + if sdate and not isinstance(sdate, str): sdate = str(sdate) + if hr: + if nhour != None: + if isinstance(nhour, str): nhour = int(nhour) + hr += nhour + if hr < 0: + while hr < 0: + dy -= 1 + hr += 24 + else: + while hr > 23: + dy += 1 + hr -= 24 + if nhour != None: nhour = hr + + if yr or mn or dy: sdate = adddate(sdate, yr, mn, dy) + + return [sdate, nhour] + +# +# add given yyyy, mm, dd, hh, nn, ss to sdatetime +# if nf, add fraction of month only +# +def adddatetime(sdatetime, yy, mm, dd, hh, nn, ss, nf = 0): + + if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime) + (sdate, stime) = re.split(' ', sdatetime) + + if hh or nn or ss: (sdate, stime) = addtime(sdate, stime, hh, nn, ss) + if nf: + sdate = addmonth(sdate, mm, nf) + mm = 0 + if yy or mm or dd: sdate = adddate(sdate, yy, mm, dd) + + return "{} {}".format(sdate, stime) + +# +# add given yyyy, mm, dd, hh, nn, ss to sdatetime +# if nf, add fraction of month only +# +def adddatetime(sdatetime, yy, mm, dd, hh, nn, ss, nf = 0): + + if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime) + (sdate, stime) = re.split(' ', sdatetime) + + if hh or nn or ss: (sdate, stime) = addtime(sdate, stime, hh, nn, ss) + if nf: + sdate = addmonth(sdate, mm, nf) + mm = 0 + if yy or mm or dd: sdate = adddate(sdate, yy, mm, dd) + + return "{} {}".format(sdate, stime) + +# +# add given hours, minutes and seconds to the initial date and time +# +def addtime(sdate, stime, h, m, s): + + if sdate and not isinstance(sdate, str): sdate = str(sdate) + if stime and not isinstance(stime, str): sdate = str(stime) + ups = (60, 60, 24) + tms = [0, 0, 0, 0] # (sec, min, hour, day) + + if s: tms[0] += s + if m: tms[1] += m + if h: tms[2] += h + if stime: + ms = re.match(r'^(\d+):(\d+):(\d+)$', stime) + if ms: + tms[2] += int(ms.group(1)) + tms[1] += int(ms.group(2)) + tms[0] += int(ms.group(3)) + + for i in range(3): + if tms[i] < 0: + while tms[i] < 0: + tms[i] += ups[i] + tms[i+1] -= 1 + elif tms[i] >= ups[i]: + while tms[i] >= ups[i]: + tms[i] -= ups[i] + tms[i+1] += 1 + + stime = "{:02}:{:02}:{:02}".format(tms[2], tms[1], tms[0]) + if tms[3]: sdate = adddate(sdate, 0, 0, tms[3]) + + return [sdate, stime] + +# +# add time interval array to datetime +# opt = -1 - minus, 0 - begin time, 1 - add (default) +# +def addintervals(sdatetime, intv, opt = 1): + + if not isinstance(sdatetime, str): sdatetime = str(sdatetime) + if not intervals: return sdatetime + tv = [0]*7 + i = 0 + for v in intv: + tv[i] = v + i += 1 + + # assume the given datetime is end of the current interval; + # add one second to set it to beginning of the next one + if opt == 0: sdatetime = adddatetime(sdatetime, 0, 0, 0 ,0, 0, 1) + + if opt < 1: # negative intervals for minus + for i in range(6): + if tv[i]: tv[i] = -tv[i] + + return adddatetime(sdatetime, tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6]) + +# +# adjust end date to the specified day days for frequency of year/month/week +# end of period if days == 0 +# nf - number of fractions of a month, for unit of 'M' only +# +def enddate(sdate, days, unit, nf = 0): + + if sdate and not isinstance(sdate, str): sdate = str(sdate) + if days and isinstance(days, str): days = int(days) + if not (unit and unit in 'YMW'): return sdate + + if unit == 'Y': + ms = re.match(r'^(\d+)', sdate) + if ms: + yr = int(ms.group(1)) + if days: + mn = 1 + dy = days + else: + mn = 12 + dy = 31 + sdate = fmtdate(yr, mn, dy) + elif unit == 'M': + ms = re.match(r'^(\d+)-(\d+)-(\d+)', sdate) + if ms: + (yr, mn, dy) = (int(m) for m in ms.groups()) + else: + ms = re.match(r'^(\d+)-(\d+)', sdate) + if ms: + (yr, mn) = (int(m) for m in ms.groups()) + dy = 1 + else: + return sdate + + if not nf or nf == 1: + nd = days if days else calendar.monthrange(yr, mn)[1] + if nd != dy: sdate = fmtdate(yr, mn, nd) + else: + val = int(30/nf) + if dy >= 28: + mf = nf + else: + mf = int(dy/val) + if (mf*val) < dy: mf += 1 + if days: + dy = (mf-1)*val + days + elif mf < nf: + dy = mf*val + else: + mn += 1 + dy = 0 + sdate = fmtdate(yr, mn, dy) + elif unit == 'W': + val = get_weekday(sdate) + if days != val: sdate = adddate(sdate, 0, 0, days-val) + + return sdate + +# +# adjust end time to the specified h/n/s for frequency of hour/mimute/second +# +def endtime(stime, unit): + + if stime and not isinstance(stime, str): stime = str(stime) + if not (unit and unit in 'HNS'): return stime + + if stime: + tm = split_datetime(stime, 'T') + else: + tm = [0, 0, 0] + + if unit == 'H': + tm[1] = tm[2] = 59 + elif unit == 'N': + tm[2] = 59 + elif unit != 'S': + tm[0] = 23 + tm[1] = tm[2] = 59 + + return "{:02}:{:02}:{:02}".format(tm[0], tm[1]. tm[2]) + +# +# adjust end time to the specified h/n/s for frequency of year/month/week/day/hour/mimute/second +# +def enddatetime(sdatetime, unit, days = 0, nf = 0): + + if sdatetime and not isinstance(sdatetime, str): sdatetime = str(sdatetime) + if not (unit and unit in 'YMWDHNS'): return sdatetime + (sdate, stime) = re.split(' ', sdatetime) + + if unit in 'HNS': + stime = endtime(stime, unit) + else: + sdate = enddate(sdate, days, unit, nf) + return "{} {}".format(sdate, stime) + +# +# get the string length dynamically +# +def get_column_length(colname, values): + + clen = len(colname) if colname else 2 # initial column length as the length of column title + + for val in values: + if val is None: continue + sval = str(val) + if sval and not re.search(r'\n', sval): + slen = len(sval) + if slen > clen: clen = slen + + return clen + +# +# Function: hour2time() +# Return: time string in format of date HH:MM:SS +# +def hour2time(sdate, nhour, endtime = 0): + + if sdate and not isinstance(sdate, str): sdate = str(sdate) + stime = "{:02}:".format(nhour) + if endtime: + stime += "59:59" + else: + stime += "00:00" + + if sdate: + return "{} {}".format(sdate, stime) + else: + return stime + +# +# Function: time2hour() +# Return: list of date and hour +# +def time2hour(stime): + + sdate = nhour = None + times = stime.split(' ') + + if len(times) == 2: + sdate = times[0] + stime = times[1] + + ms = re.match(r'^(\d+)', stime) + if ms: nhour = int(ms.group(1)) + + return [sdate, nhour] + +# +# get the all column widths +# +def all_column_widths(pgrecs, flds, tdict): + + colcnt = len(flds) + lens = [0]*colcnt + for i in range(colcnt): + fld = flds[i] + if fld not in tdict: continue + field = strip_field(tdict[fld][1]) + lens[i] = get_column_length(None, pgrecs[field]) + + return lens + +# +# check a give value, return 1 if numeric, 0 therwise +# +def pgnum(val): + + if not isinstance(val, str): val = str(val) + ms = re.match(r'^\-{0,1}(\d+|\d+\.\d*|d*\.\d+)([eE]\-{0,1}\d+)*$', val) + return 1 if ms else 0 + +# +# Function: pgcmp(val1, val2) +# Return: 0 if both empty or two values are identilcal; -1 if val1 < val2; otherwise 1 +# +def pgcmp(val1, val2, ignorecase = 0, num = 0): + + if val1 is None: + if val2 is None: + return 0 + else: + return -1 + elif val2 is None: + return 1 + typ1 = type(val1) + typ2 = type(val2) + if typ1 != typ2: + if num: + if typ1 is str: + typ1 = int + val1 = int(val1) + if typ2 is str: + typ2 = int + val2 = int(val2) + else: + if typ1 != str: + typ1 = str + val1 = str(val1) + if typ2 != str: + typ2 = str + val2 = str(val2) + + if typ1 is str: + if num: + if typ1 is str and pgnum(val1) and pgnum(val2): + val1 = int(val1) + val2 = int(val2) + elif ignorecase: + val1 = val1.lower() + val2 = val2.lower() + + if val1 > val2: + return 1 + elif val1 < val2: + return -1 + else: + return 0 + +# +# infiles: initial file list +# Return: final file list with all the subdirectories expanded +# +def recursive_files(infiles): + + ofiles = [] + + for file in infiles: + if op.isdir(file): + ofiles.extend(recursive_files(glob.glob(file + "/*"))) + else: + ofiles.append(file) + + return ofiles + +# +# lidx: lower index limit (including) +# hidx: higher index limit (excluding) +# key: string value to be searched, +# list: reference to a sorted list where the key is searched) +# Return: index if found; -1 otherwise +# +def asearch(lidx, hidx, key, list): + + ret = -1 + if (hidx - lidx) < 11: # use linear search for less than 11 items + for midx in range(lidx, hidx): + if key == list[midx]: + ret = midx + break + else: + midx = (lidx + hidx)/2 + if key == list[midx]: + ret = midx + elif key < list[midx]: + ret = asearch(lidx, midx, key, list) + else: + ret = asearch(midx + 1, hidx, key, list) + + return ret + +# +# lidx: lower index limit (including) +# hidx: higher index limit (excluding) +# key: string value to be searched, +# list: reference to a sorted list where the key is searched) +# Return: index if found; -1 otherwise +# +def psearch(lidx, hidx, key, list): + + ret = -1 + if (hidx - lidx) < 11: # use linear search for less than 11 items + for midx in range(lidx, hidx): + if re.search(list[midx], key): + ret = midx + break + else: + midx = int((lidx + hidx)/2) + if re.search(list[midx], key): + ret = midx + elif key < list[midx]: + ret = psearch(lidx, midx, key, list) + else: + ret = psearch(midx + 1, hidx, key, list) + + return ret + +# +# quicksort for pattern +# +def quicksort(srecs, lo, hi, desc, cnt, nums = None): + + i = lo + j = hi + mrec = srecs[int((lo+hi)/2)] + + while True: + while cmp_records(srecs[i], mrec, desc, cnt, nums) < 0: i += 1 + while cmp_records(srecs[j], mrec, desc, cnt, nums) > 0: j -= 1 + if i <= j: + if i < j: + tmp = srecs[i] + srecs[i] = srecs[j] + srecs[j] = tmp + i += 1 + j -= 1 + if i > j: break + + #recursion + if lo < j: srecs = quicksort(srecs, lo, j, desc, cnt, nums) + if i < hi: srecs = quicksort(srecs, i, hi, desc, cnt, nums) + + return srecs + +def cmp_records(arec, brec, desc, cnt, nums): + + for i in range(cnt): + num = nums[i] if nums else 0 + ret = pgcmp(arec[i], brec[i], 0, num) + if ret != 0: + return (ret*desc[i]) + + return 0 # identical records + +# +# format one floating point value +# +def format_float_value(val, precision = 2): + + units = ('B', 'KB', 'MB', 'GB', 'TB', 'PB') + + if val is None: + return '' + elif not isinstance(val, int): + val = int(val) + + idx = 0 + while val >= 1000 and idx < 5: + val /= 1000 + idx += 1 + + return "{:.{}f}{}".format(val, precision, units[idx]) + +# +# check a file is a ASCII text one +# return 1 if yes, 0 if not; or -1 if file not exists +# +def is_text_file(fname): + + ret = -1 + if op.isfile(fname): + buf = PgLOG.pgsystem("file -b " + fname, PgLOG.LOGWRN, 20) + ret = 1 if buf and re.search(r'(^|\s)(text|script|data)', buf) else 0 + + return ret diff --git a/src/rda_python_common/__init__.py b/src/rda_python_common/__init__.py new file mode 100644 index 0000000..e69de29