process-scram-results-es

#!/usr/bin/env python
from argparse import ArgumentParser
from commands import getstatusoutput
from operator import itemgetter
from string import maketrans
import json
import os
import re

# Given an IB, it queries the information from elasticsearch about the scram information 
# It then creates the corresponding json files with the results that it finds. 

GET_ALL_INFO_EXCEPTIONS_QUERY_FILE="es-queries/get-ib-scram-results.json"
JSON_OUT_FILE_TEMPLATE="data/scram/{arch}/{date}/{release_queue}_INCOMPLETE.json"

#
# makes a query to get information from elasticsearch
#
def query_es( query ):
  cmd = "curl -s -XPOST http://{hostname}/{index}/_search -d '{query}'".format(
         hostname=ES_HOSTNAME,
         index=ES_INDEX,
         query=query )
  err, out = getstatusoutput(cmd)
  if err:
    print( "Error while querying elasticsearch" )
    print( out )
    exit(0)

  response = json.loads( out )

  return response

#
# Queries elasticsearch to get the workflow_id, architecture and exception of
# the documents that have an exception 
#
def get_all_info_scram( ib ):
  query = open( GET_ALL_INFO_EXCEPTIONS_QUERY_FILE, 'r' ).read().replace( 'IB_NAME', ib )
  print( "Querying scram results for for %s" % ib )
  response = query_es( query )
  if response[ 'hits' ][ 'total' ] == 0:
    print 'No results found for ', ib, ' I cannot continue'
    exit( 0 )

  all_info_scram = [ x[ 'fields' ] for x in response[ 'hits' ][ 'hits' ] ]
  return all_info_scram
 

#
# organizes the results of the query in a estructure that simplyfies the generation
# of the json file
# The structure is as follows:
# results = {
#           "<arch1>":{ "<packageA>": { "hostname" : "<hostname>",
#                                       "start" : "<start date>",
#                                       "stop" : "<stop time>"
#                                     },
#                       "<packageB>": { "hostname" : "<hostname>",
#                                       "start" : "<start date>",
#                                       "stop" : "<stop time>"
#                                     }
#                     },
#           "<arch2>":{ "<packageA>": { "hostname" : "<hostname>",
#                                       "start" : "<start date>",
#                                       "stop" : "<stop time>"
#                                     },
#                      "<packageB>": { "hostname" : "<hostname>",
#                                       "start" : "<start date>",
#                                       "stop" : "<stop time>"
#                                     },
#                     } 
# }
#
def organize_results( all_info_scram ):
  results = {}

  for document in all_info_scram:
    package = document[ 'package' ][0]
    hostname = document[ 'hostname' ][0]
    stop = document[ 'stop' ][0] if document.get( 'stop' ) else ''
    start = document[ 'start' ][0] if document.get( 'start' ) else ''
    arch = document[ 'scram_arch' ][0]
    diff = document[ 'diff' ][0] if document.get( 'diff' ) else ''

    if not results.get( arch ):
      results[ arch ] = {}

    if not results[ arch ].get( package ):
      results[ arch ][ package ] = {}

    info_dict = results[ arch ][ package ]
    info_dict[ 'hostname' ] = hostname
    info_dict[ 'start' ] = start
    info_dict[ 'stop' ] = stop
    info_dict[ 'diff' ] = diff

  return results

#---------------------------------------------------------------
# Start
#--------------------------------------------------------------
if __name__ == "__main__":
  parser = ArgumentParser()
  parser.add_argument("input")
  parser.add_argument("--es_hostname", type=str, help="elasticsearch hostname", required=True)
  parser.add_argument("--es_scram_index", type=str, help="elasticsearch scram index", required=True)
  args = parser.parse_args()

  ES_HOSTNAME = args.es_hostname
  ES_INDEX = args.es_scram_index

  args = parser.parse_args()
  if ( not args.input ):
    print "Please specify an IB to query"
    exit(1)

  ib = args.input

  all_info_scram = get_all_info_scram( ib )

  results = organize_results( all_info_scram )

  # create the json file
  ib_rq = re.match("(.*)_2[0-9]*-[01][0-9]-[0-3][0-9]-[0-9]*$", ib).group(1)
  ib_date = re.match(".*(2[0-9]*-[01][0-9]-[0-3][0-9]-[0-9]*)$", ib).group(1)

  for arch in results.keys():
    out_file_path = JSON_OUT_FILE_TEMPLATE.format( arch=arch, date=ib_date, release_queue=ib_rq )
    dir_file_path = out_file_path[:out_file_path.rfind("/")]
    if not os.path.exists( dir_file_path ):
      os.makedirs( dir_file_path ) 
 
    print 'Generating exceptions file ', out_file_path
    out_file = open( out_file_path, 'w' )
    json.dump( results[ arch ], out_file, sort_keys=True, indent=4, separators=(',', ': ') )
    out_file.close()