|
| 1 | +KB = 1024.0 |
| 2 | +MB = 1024.0*KB |
| 3 | +GB = 1024.0*MB |
| 4 | + |
| 5 | +import collections, os, sys, math, json, subprocess |
| 6 | + |
| 7 | +ddict = collections.defaultdict |
| 8 | +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) |
| 9 | +TRACE_RUN = True |
| 10 | + |
| 11 | +def run(cmd): |
| 12 | + if TRACE_RUN: |
| 13 | + print 'EXEC ' + cmd |
| 14 | + return subprocess.check_output(cmd, shell=True) |
| 15 | + |
| 16 | +def run_js(cmd): |
| 17 | + return json.loads(run(cmd)) |
| 18 | + |
| 19 | +def panic(): |
| 20 | + assert(0) |
| 21 | + |
| 22 | +def human_bytes(x): |
| 23 | + conv = [(1024.0**3,'GB'), |
| 24 | + (1024.0**2,'MB'), |
| 25 | + (1024.0**1,'KB'), |
| 26 | + (1,'B')] |
| 27 | + while len(conv): |
| 28 | + amt, unit = conv.pop(0) |
| 29 | + if amt <= x or len(conv) == 0: |
| 30 | + x = float(x) / amt |
| 31 | + if int(x*10) % 10 == 0: |
| 32 | + return '%d%s' % (x, unit) |
| 33 | + else: |
| 34 | + return '%0.1f%s' % (x, unit) |
| 35 | + assert(0) |
| 36 | + |
| 37 | +def only(lst): |
| 38 | + lst = list(lst) |
| 39 | + assert(len(lst) == 1) |
| 40 | + return lst[0] |
| 41 | + |
| 42 | +def memoize(function): |
| 43 | + memo = {} |
| 44 | + def wrapper(*args, **kvargs): |
| 45 | + key = (args, tuple(sorted(kvargs.iteritems()))) |
| 46 | + if key in memo: |
| 47 | + return memo[key] |
| 48 | + else: |
| 49 | + rv = function(*args, **kvargs) |
| 50 | + memo[key] = rv |
| 51 | + return rv |
| 52 | + return wrapper |
| 53 | + |
| 54 | +def argsdict(**kvargs): |
| 55 | + return kvargs |
| 56 | + |
| 57 | +def rdjs(path): |
| 58 | + return json.loads(readall(path)) |
| 59 | + |
| 60 | +def wrjs(path, data, atomic=False): |
| 61 | + if atomic: |
| 62 | + wrjs(path+'.tmp', data, atomic=False) |
| 63 | + os.rename(path+'.tmp', path) |
| 64 | + else: |
| 65 | + writeall(path, json.dumps(data, indent=2)) |
| 66 | + |
| 67 | +def readall(path): |
| 68 | + f = open(path) |
| 69 | + d = f.read() |
| 70 | + f.close() |
| 71 | + return d |
| 72 | + |
| 73 | +def writeall(path, data): |
| 74 | + f = open(path, 'w') |
| 75 | + f.write(data) |
| 76 | + f.close() |
| 77 | + |
| 78 | +def path_iter(path, skip_empty=True): |
| 79 | + f = open(path) |
| 80 | + for l in f: |
| 81 | + if skip_empty and l.strip() == '': |
| 82 | + continue |
| 83 | + yield l |
| 84 | + f.close() |
| 85 | + |
| 86 | +# example: key1=val1, key2=val2, ... |
| 87 | +def parse_comma_eq(data, typ=float): |
| 88 | + pairs = data.split(',') |
| 89 | + d = {} |
| 90 | + for pair in pairs: |
| 91 | + k,v = pair.strip().split('=') |
| 92 | + if typ != None: |
| 93 | + v = typ(v) |
| 94 | + d[k] = v |
| 95 | + return d |
| 96 | + |
| 97 | +# example: |
| 98 | +# node1 |
| 99 | +# leaf1: val1 |
| 100 | +# node2 |
| 101 | +# leaf2: val2 |
| 102 | +def parse_tab_colon_tree(data, typ=float): |
| 103 | + def tab_count(l): |
| 104 | + return len(l) - len(l.lstrip('\t')) |
| 105 | + |
| 106 | + tree = {} |
| 107 | + levels = [tree] |
| 108 | + |
| 109 | + for l in data.split('\n'): |
| 110 | + if not l.strip(): |
| 111 | + continue |
| 112 | + parts = map(str.strip, l.split(':')) |
| 113 | + key = parts[0] |
| 114 | + if len(parts) == 1: |
| 115 | + val = {} |
| 116 | + else: |
| 117 | + val = parts[1] |
| 118 | + if typ != None: |
| 119 | + val = typ(val) |
| 120 | + level_idx = tab_count(l) |
| 121 | + assert(level_idx < len(levels)) |
| 122 | + levels[level_idx][key] = val |
| 123 | + if len(parts) == 1: |
| 124 | + if len(levels) <= level_idx+1: |
| 125 | + levels.append(None) |
| 126 | + levels[level_idx+1] = val |
| 127 | + return tree |
| 128 | + |
| 129 | +def key_replace(orig={}, replace={}, recursive=False): |
| 130 | + for k1,k2 in replace.iteritems(): |
| 131 | + if k1 in orig: |
| 132 | + v = orig.pop(k1) |
| 133 | + orig[k2] = v |
| 134 | + if recursive: |
| 135 | + for v in orig.values(): |
| 136 | + if type(v) == dict: |
| 137 | + key_replace(v, replace, recursive) |
| 138 | + |
| 139 | +class Sample: |
| 140 | + def __init__(self, vals=None): |
| 141 | + if vals == None: |
| 142 | + vals = [] |
| 143 | + self.vals = vals |
| 144 | + |
| 145 | + def add(self, val): |
| 146 | + self.vals.append(val) |
| 147 | + |
| 148 | + def perc_under(self, threshold): |
| 149 | + count = len(filter(lambda v: v<threshold, self.vals)) |
| 150 | + return count * 100.0 / len(self.vals) |
| 151 | + |
| 152 | + def sub_sample(self, lower=0, upper=None): |
| 153 | + vals = filter(lambda v: lower<=v<=upper, self.vals) |
| 154 | + return Sample(vals) |
| 155 | + |
| 156 | + def dump(self): |
| 157 | + vals = sorted(self.vals) |
| 158 | + for i, v in enumerate(vals): |
| 159 | + print '%d: %f' % (i, v) |
| 160 | + |
| 161 | + def median(self): |
| 162 | + vals = sorted(self.vals) |
| 163 | + if len(vals) % 2 == 0: |
| 164 | + return (vals[len(vals)/2] + vals[len(vals)/2-1]) / 2.0 |
| 165 | + else: |
| 166 | + return vals[len(vals)/2] |
| 167 | + |
| 168 | + def avg(self): |
| 169 | + return sum(self.vals) / len(self.vals) |
| 170 | + |
| 171 | + def sum(self): |
| 172 | + return sum(self.vals) |
| 173 | + |
| 174 | + def max(self): |
| 175 | + return max(self.vals) |
| 176 | + |
| 177 | + def __str__(self): |
| 178 | + return ', '.join(map(str,self.vals)) |
| 179 | + |
| 180 | +def keylist_parse(keylist): |
| 181 | + if type(keylist) == str: |
| 182 | + return keylist.split(':') |
| 183 | + return list(keylist) |
| 184 | + |
| 185 | +def tree_get(tree, keylist, default=None): |
| 186 | + keylist = keylist_parse(keylist) |
| 187 | + tmp = tree |
| 188 | + while len(keylist): |
| 189 | + key = keylist.pop(0) |
| 190 | + if not key in tmp: |
| 191 | + return default |
| 192 | + tmp = tmp[key] |
| 193 | + return tmp |
| 194 | + |
| 195 | +def tree_put(tree, keylist, val): |
| 196 | + keylist = keylist_parse(keylist) |
| 197 | + tmp = tree |
| 198 | + while len(keylist) > 1: |
| 199 | + key = keylist.pop(0) |
| 200 | + if not key in tmp: |
| 201 | + tmp[key] = {} |
| 202 | + tmp = tmp[key] |
| 203 | + tmp[keylist.pop(0)] = val |
| 204 | + |
| 205 | +def leaf_iter_callback(subtree, keylist=[], fn=None): |
| 206 | + for k,v in subtree.iteritems(): |
| 207 | + if type(v) == dict: |
| 208 | + leaf_iter_callback(v, keylist+[k], fn) |
| 209 | + else: |
| 210 | + fn(keylist+[k]) |
| 211 | + |
| 212 | +def leaf_iter(subtree, keylist=[]): |
| 213 | + vals = [] |
| 214 | + def add(val): |
| 215 | + vals.append(val) |
| 216 | + leaf_iter_callback(subtree, fn=add) |
| 217 | + return vals |
| 218 | + |
| 219 | +def trees_sample(trees): |
| 220 | + sample_tree = {} |
| 221 | + for tree in trees: |
| 222 | + for keylist in leaf_iter(tree): |
| 223 | + sample = tree_get(sample_tree, keylist, Sample()) |
| 224 | + sample.add(tree_get(tree, keylist)) |
| 225 | + tree_put(sample_tree, keylist, sample) |
| 226 | + return sample_tree |
| 227 | + |
0 commit comments