-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathgasol_asm.py
executable file
·1090 lines (832 loc) · 49.7 KB
/
gasol_asm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/python3
import json
import os
import shutil
import sys
from typing import Tuple, Optional, List, Dict, Set
from copy import deepcopy
from timeit import default_timer as dtimer
from argparse import ArgumentParser, Namespace
import pandas as pd
sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/gasol_ml")
import global_params.constants as constants
import global_params.paths as paths
import sfs_generator.ir_block as ir_block
from sfs_generator.gasol_optimization import get_sfs_dict
from sfs_generator.parser_asm import (parse_asm, parse_json_asm,
generate_block_from_plain_instructions,
parse_blocks_from_plain_instructions)
from sfs_generator.utils import process_blocks_split
from verification.sfs_verify import verify_block_from_list_of_sfs, are_equals
from solution_generation.optimize_from_sub_blocks import rebuild_optimized_asm_block
from sfs_generator.asm_block import AsmBlock, AsmBytecode
from sfs_generator.asm_contract import AsmContract
from smt_encoding.block_optimizer import BlockOptimizer, OptimizeOutcome
from solution_generation.ids2asm import asm_from_ids
from verification.forves_verification import compare_forves
from statistics.statistics_from_asm_block import csv_from_asm_block
from global_params.options import OptimizationParams
from greedy.block_generation import greedy_from_json, greedy_standalone
from smt_encoding.json_with_dependencies import generate_dot_graph_from_sms
def init():
global previous_gas
previous_gas = 0
global new_gas
new_gas = 0
global previous_size
previous_size = 0
global new_size
new_size = 0
global new_n_instrs
new_n_instrs = 0
global prev_n_instrs
prev_n_instrs = 0
def select_model_and_config(model: str, criteria: str, i: int) -> Tuple[str, int]:
configurations = {"bound_size": ("bound_size.pyt", 4), "bound_gas": ("bound_gas.pyt", 4),
"opt_size": ("opt_size.pyt", 0),
"opt_gas": ("opt_gas.pyt", 0)}
selected_config = configurations.get(f"{model}_{criteria}", [])
return f"models/{selected_config[0]}", selected_config[1]
def create_ml_models(params: OptimizationParams) -> None:
if params.bound_select or params.opt_select:
import torch
torch.set_num_threads(1)
torch.set_num_interop_threads(1)
# For length, we use the same model as gas. Hence,
# we cannot use the parameter directly
criteria = "size" if params.criteria == "size" else "gas"
if params.bound_select:
import gasol_ml.bound_predictor as bound_predictor
model_name, conf = select_model_and_config("bound", criteria, params.bound_select)
params.bound_model = bound_predictor.ModelQuery(model_name, conf)
if params.opt_select:
import gasol_ml.opt_predictor as opt_predictor
model_name, conf = select_model_and_config("opt", criteria, params.opt_select)
params.optimized_predictor_model = opt_predictor.ModelQuery(model_name, conf)
def compute_original_sfs_with_simplifications(block: AsmBlock, params: OptimizationParams):
stack_size = block.source_stack
block_name = block.block_name
block_id = block.block_id
instructions = block.to_plain()
instructions_to_optimize = block.instructions_to_optimize_plain()
revert_flag = ("REVERT" in instructions or "RETURN" in instructions) and params.terminal
block_data = {"instructions": instructions_to_optimize, "input": stack_size}
fname = params.input_file.split("/")[-1].split(".")[0]
exit_code, subblocks_list = \
ir_block.evm2rbr_compiler(file_name=fname, block=block_data, block_name=block_name, block_id=block_id,
simplification=params.rules_enabled, storage=params.split_storage,
size=params.size_rules_enabled, part=params.split_partition,
pop=params.pop_uninterpreted, push=not params.push_basic, revert=revert_flag,
debug_info=params.verbose)
sfs_dict = get_sfs_dict()
return sfs_dict, subblocks_list
# Assume optimized criteria and optimized greedy have returned a valid model
def compare_best_block(original_seq: List[AsmBytecode], optimized_superopt: List[AsmBytecode],
optimized_greedy: List[AsmBytecode], criterion: str) -> Tuple[List[AsmBytecode], str]:
if criterion == 'size':
saved_optimized = sum([instr.bytes_required for instr in original_seq]) - sum([instr.bytes_required for instr in optimized_superopt])
saved_greedy = sum([instr.bytes_required for instr in original_seq]) - sum([instr.bytes_required for instr in optimized_greedy])
elif criterion == 'gas':
saved_optimized = sum([instr.gas_spent for instr in original_seq]) - sum([instr.gas_spent for instr in optimized_superopt])
saved_greedy = sum([instr.gas_spent for instr in original_seq]) - sum([instr.gas_spent for instr in optimized_greedy])
else:
saved_optimized = len(original_seq) - len(optimized_superopt)
saved_greedy = len(original_seq) - len(optimized_greedy)
if saved_optimized <= 0 and saved_greedy <= 0:
return optimized_superopt, 'both_worse_or_equal'
elif saved_optimized == saved_greedy:
return optimized_superopt, 'tie'
elif saved_optimized < saved_greedy:
return optimized_greedy, 'greedy'
else:
return optimized_superopt, 'superopt'
def search_optimal(sfs_block: Dict, params: OptimizationParams, tout: int,
block_name: str) -> Tuple[OptimizeOutcome, float, List[str], Optional[List[str]]]:
"""
Decides which superoptimization algorithm (or greedy standalone) is applied, using the bound by the greedy if
the corresponding option is enabled. Returns the optimization outcome, the time spent in the search, the ids
from the optimized sequence and from the greedy algorithm
"""
# Must come before extended json with instr dep and bounds to apply the new ub to the bounds generation
if params.ub_greedy and not params.greedy:
try:
new_data, _, _, greedy_ids, error = greedy_from_json(sfs_block)
# Only choose the greedy if the obtained ub is better
print("GREEDY IDS BEF", greedy_ids)
# Check there is no error in the generated solution (as to consider it for the greedy)
if error == 0:
# For smt, we are only interested in improving the bound
if len(greedy_ids) < sfs_block['init_progr_len']:
sfs_block['init_progr_len'] = len(greedy_ids)
else:
greedy_ids = None
except:
greedy_ids = None
else:
greedy_ids = None
# Greedy standalone configuration
if params.greedy:
optimization_outcome_str, solver_time, optimized_ids = greedy_standalone(sfs_block)
optimization_outcome = OptimizeOutcome.non_optimal if optimization_outcome_str == "non_optimal" else OptimizeOutcome.error
# Otherwise, SMT superoptimization
else:
optimizer = BlockOptimizer(block_name, sfs_block, params, tout)
optimization_outcome, solver_time, optimized_ids = optimizer.optimize_block()
return optimization_outcome, solver_time, optimized_ids, greedy_ids
def choose_best_solution(original_asm: List[AsmBytecode], optimized_asm: List[AsmBytecode],
greedy_asm: Optional[List[AsmBytecode]], optimization_outcome: OptimizeOutcome,
params: OptimizationParams):
chosen_solution_tag = None
if params.ub_greedy:
criterion = params.criteria
# Case: no solution found by the superoptimizer and the greedy has found a solution
if (optimization_outcome == OptimizeOutcome.no_model or optimization_outcome == OptimizeOutcome.unsat) \
and greedy_asm is not None:
optimized_asm, chosen_solution_tag = greedy_asm, 'greedy_no_model'
# Case: both the greedy and the superoptimizer has found no solution
elif optimization_outcome == OptimizeOutcome.no_model or optimization_outcome == OptimizeOutcome.unsat:
optimized_asm, chosen_solution_tag = [], 'both_worse_or_equal'
# Case: no solution found by the greedy algorithm but the superoptimizer has found a solution
elif greedy_asm is None:
optimized_asm, chosen_solution_tag = compare_best_block(original_asm, optimized_asm,
original_asm, criterion)
# Case: both the greedy and the superoptimizer have returned a sequence
else:
optimized_asm, chosen_solution_tag = compare_best_block(original_asm, optimized_asm,
greedy_asm, criterion)
# We just return the corresponding optimization assembly code and the tag indicating in which situation
# we have fallen into
return optimized_asm, chosen_solution_tag
# Given the sequence of bytecodes, the initial stack size, the contract name and the
# block id, returns the output given by the solver, the name given to that block and current gas associated
# to that sequence.
def optimize_block(sfs_dict, params: OptimizationParams) -> List[Tuple[AsmBlock, OptimizeOutcome, float, \
List[AsmBytecode], Optional[str], int, int, List[str], List[str]]]:
block_solutions = []
# SFS dict of syrup contract contains all sub-blocks derived from a block after splitting
for block_name in sfs_dict:
sfs_block = sfs_dict[block_name]
initial_solver_bound = sfs_block['init_progr_len']
original_instr = sfs_block['original_instrs']
previous_bound = sfs_block['init_progr_len']
original_block = generate_block_from_plain_instructions(original_instr, block_name)
if params.bound_model is not None:
inferred_bound = params.bound_model.eval(sfs_block)
if inferred_bound == 0:
new_bound = previous_bound
else:
new_bound = min(previous_bound, inferred_bound)
sfs_block['init_progr_len'] = new_bound
if params.verbose:
print(f"Previous bound: {previous_bound} Inferred bound: {inferred_bound} Final bound: {new_bound}")
# To match previous results, multiply timeout by number of storage instructions
# TODO devise better heuristics to deal with timeouts
if params.direct_timeout:
tout = params.timeout
else:
tout = params.timeout * (1 + len([True for instr in sfs_block['user_instrs'] if instr["storage"]]))
print(f"Optimizing {block_name}... Timeout:{str(tout)}")
# We have enabled the optimization process (otherwise, we just generate the intermediate SMT files)
if params.dot_generation:
generate_dot_graph_from_sms(sfs_block, block_name)
elif params.optimization_enabled:
optimization_outcome, solver_time, optimized_ids, greedy_ids = search_optimal(sfs_block, params, tout, block_name)
optimized_asm = asm_from_ids(sfs_block, optimized_ids) if optimized_ids is not None else []
greedy_asm = asm_from_ids(sfs_block, greedy_ids) if greedy_ids is not None else None
# We only determine the best solution if we have enabled previously the greedy algorithm
if params.ub_greedy:
chosen_seq, chosen_tag = choose_best_solution(original_block.instructions, optimized_asm, greedy_asm, optimization_outcome, params)
else:
chosen_seq, chosen_tag = optimized_asm, "greedy_not_enabled"
block_solutions.append((original_block, optimization_outcome, solver_time,
chosen_seq, chosen_tag, tout, initial_solver_bound, sfs_block['rules'], optimized_ids))
else:
optimizer = BlockOptimizer(block_name, sfs_block, params, tout)
optimizer.generate_intermediate_files()
return block_solutions
# Given the log file loaded in json format, current block and the contract name, generates three dicts: one that
# contains the sfs from each block, the second one contains the sequence of instructions and
# the third one is a set that contains all block ids.
def generate_sfs_dicts_from_log(block, json_log, params: OptimizationParams):
contracts_dict, sub_block_list = compute_original_sfs_with_simplifications(block, params)
syrup_contracts = contracts_dict["syrup_contract"]
# Contains sfs blocks considered to check the SMT problem. Therefore, a block is added from
# sfs_original iff solver could not find an optimized solution, and from sfs_dict otherwise.
optimized_sfs_dict = {}
# Dict that contains all instr sequences
instr_sequence_dict = {}
# Set that contains all ids
ids = set()
# We need to inspect all sub-blocks in the sfs dict.
for block_id in syrup_contracts:
ids.add(block_id)
# If the id is not at json log, this means it has not been optimized
if block_id not in json_log:
continue
instr_sequence = json_log[block_id]
sfs_block = syrup_contracts[block_id]
optimized_sfs_dict[block_id] = sfs_block
instr_sequence_dict[block_id] = instr_sequence
return syrup_contracts, optimized_sfs_dict, sub_block_list, instr_sequence_dict, ids
# Given a dict with the sfs from each block and another dict that contains whether previous block was optimized or not,
# generates the corresponding solution. All checks are assumed to have been done previously
def optimize_asm_block_from_log(block, sfs_dict, sub_block_list, instr_sequence_dict: Dict[str, List[str]]):
# Optimized blocks. When a block is not optimized, None is pushed to the list.
optimized_blocks = {}
for sub_block_name, sfs_sub_block in sfs_dict.items():
if sub_block_name not in instr_sequence_dict:
optimized_blocks[sub_block_name] = None
else:
new_sub_block = asm_from_ids(sfs_sub_block, instr_sequence_dict[sub_block_name])
optimized_blocks[sub_block_name] = new_sub_block
new_block = rebuild_optimized_asm_block(block, sub_block_list, optimized_blocks)
return new_block
def optimize_asm_from_log(params: OptimizationParams, json_log: Dict):
asm = parse_asm(params.input_file)
# Blocks from all contracts are checked together. Thus, we first will obtain the needed
# information from each block
sfs_dict_to_check, instr_sequence_dict, file_ids = {}, {}, set()
contracts = []
for c in asm.contracts:
new_contract = deepcopy(c)
# If it does not have the asm field, then we skip it, as there are no instructions to optimize
if not c.has_asm_field:
contracts.append(new_contract)
continue
contract_name = c.shortened_name
init_code = c.init_code
init_code_blocks = []
print("\nAnalyzing Init Code of: " + contract_name)
print("-----------------------------------------\n")
for block in init_code:
if block.instructions_to_optimize_plain() == []:
init_code_blocks.append(deepcopy(block))
continue
sfs_all, sfs_optimized, sub_block_list, instr_sequence_dict_block, block_ids = \
generate_sfs_dicts_from_log(block, json_log, params)
new_block = optimize_asm_block_from_log(block, sfs_all, sub_block_list, instr_sequence_dict_block)
eq, reason = compare_asm_block_asm_format(block, new_block, params)
if not eq:
raise ValueError(f"Error parsing the log file. [REASON]: {reason}")
init_code_blocks.append(new_block)
new_contract.init_code = init_code_blocks
print("\nAnalyzing Runtime Code of: " + contract_name)
print("-----------------------------------------\n")
for identifier in c.get_data_ids_with_code():
blocks = c.get_run_code(identifier)
run_code_blocks = []
for block in blocks:
if block.instructions_to_optimize_plain() == []:
run_code_blocks.append(deepcopy(block))
continue
sfs_all, sfs_optimized, sub_block_list, instr_sequence_dict_block, block_ids = \
generate_sfs_dicts_from_log(block, json_log, params)
new_block = optimize_asm_block_from_log(block, sfs_all, sub_block_list, instr_sequence_dict_block)
eq, reason = compare_asm_block_asm_format(block, new_block, params)
if not eq:
raise ValueError(f"Error parsing the log file. [REASON]: {reason}")
run_code_blocks.append(new_block)
new_contract.set_run_code(identifier, run_code_blocks)
contracts.append(new_contract)
print("Solution generated from log file has been verified correctly")
new_asm = deepcopy(asm)
new_asm.contracts = contracts
with open(params.optimized_file, 'w') as f:
f.write(json.dumps(new_asm.to_json()))
print("")
print("Optimized code stored at " + params.optimized_file)
def optimize_isolated_asm_block(params: OptimizationParams):
seqs_rows = []
blocks_rows = []
with open(params.input_file, "r") as f:
instructions = f.read()
blocks = parse_blocks_from_plain_instructions(instructions, params.block_name, params.block_name_prefix)
asm_blocks = []
for old_block in blocks:
asm_block, _, statistics_csv = optimize_asm_block_asm_format(old_block, params)
seqs_rows.extend(statistics_csv)
eq, reason = compare_asm_block_asm_format(old_block, asm_block, params)
if not eq:
print("Comparison failed, so initial block is kept")
print("\t[REASON]: " + reason)
print(old_block.to_plain())
print(asm_block.to_plain())
print("")
asm_block = old_block
update_gas_count(old_block, asm_block)
update_length_count(old_block, asm_block)
update_size_count(old_block, asm_block)
asm_blocks.append(asm_block)
if params.optimization_enabled:
print("")
print("Initial sequence (basic block per line):")
print('\n'.join([old_block.to_plain_with_byte_number() for old_block in blocks]))
print("")
print("Optimized sequence (basic block per line):")
print('\n'.join([asm_block.to_plain_with_byte_number() for asm_block in asm_blocks]))
with open(params.optimized_file, 'w') as f:
f.write('\n'.join([asm_block.to_plain_with_byte_number() for asm_block in asm_blocks]))
pd.DataFrame(blocks_rows).to_csv(params.blocks_file)
pd.DataFrame(seqs_rows).to_csv(params.seqs_file)
def update_gas_count(old_block: AsmBlock, new_block: AsmBlock):
global previous_gas
global new_gas
previous_gas += old_block.gas_spent
new_gas += new_block.gas_spent
def update_size_count(old_block: AsmBlock, new_block: AsmBlock):
global previous_size
global new_size
previous_size += old_block.bytes_required
new_size += new_block.bytes_required
def update_length_count(old_block: AsmBlock, new_block: AsmBlock):
global prev_n_instrs
global new_n_instrs
prev_n_instrs += len([True for instruction in old_block.instructions if instruction.disasm != 'tag'])
new_n_instrs += len([True for instruction in new_block.instructions if instruction.disasm != 'tag'])
def generate_statistics_info(original_block: AsmBlock, outcome: Optional[OptimizeOutcome], solver_time: float,
optimized_block: AsmBlock, chosen_tag: Optional[str], initial_bound: int, tout: int, rules: List[str]) -> Dict:
block_name = original_block.block_name
original_instr = ' '.join(original_block.instructions_to_optimize_plain())
statistics_row = {"block_id": block_name, "previous_solution": original_instr, "timeout": tout,
"initial_n_instrs": initial_bound, 'initial_estimated_size': original_block.bytes_required,
'initial_estimated_gas': original_block.gas_spent, 'rules': ','.join(rules),
'initial_length': len(original_block.instructions_to_optimize_plain()),
'saved_length': 0, 'chosen_block_tag': chosen_tag}
# The outcome of the solver is unsat
if outcome == OptimizeOutcome.unsat:
statistics_row.update(
{"model_found": False, "shown_optimal": False, "solver_time_in_sec": round(solver_time, 3),
"saved_size": 0, "saved_gas": 0, 'outcome': 'unsat'})
# The solver has returned no model
elif outcome == OptimizeOutcome.no_model:
statistics_row.update(
{"model_found": False, "shown_optimal": False, "solver_time_in_sec": round(solver_time, 3),
"saved_size": 0, "saved_gas": 0, 'outcome': 'no_model'})
# The solver has returned a valid model
else:
shown_optimal = outcome == OptimizeOutcome.optimal
optimized_size = optimized_block.bytes_required
optimized_gas = optimized_block.gas_spent
optimized_length = len(optimized_block.instructions_to_optimize_plain())
initial_size = original_block.bytes_required
initial_gas = original_block.gas_spent
initial_length = len(original_block.instructions_to_optimize_plain())
statistics_row.update({"solver_time_in_sec": round(solver_time, 3), "saved_size": initial_size - optimized_size,
"saved_gas": initial_gas - optimized_gas, "model_found": True,
"shown_optimal": shown_optimal,
"solution_found": ' '.join([instr.to_plain() for instr in optimized_block.instructions]),
"optimized_n_instrs": optimized_length, 'optimized_length': optimized_length,
'optimized_estimated_size': optimized_size, 'optimized_estimated_gas': optimized_gas,
'outcome': 'model', 'saved_length': initial_length - optimized_length})
return statistics_row
def improves_criterion(saved_criterion: int, *saved_other):
if saved_criterion > 0:
return True
elif saved_criterion == 0:
any_improves = False
for crit in saved_other:
if crit > 0:
any_improves = True
elif crit < 0:
return False
return any_improves
else:
return False
def block_has_been_optimized(original_block: AsmBlock, optimized_block: AsmBlock,
criteria: str) -> bool:
saved_size = original_block.bytes_required - optimized_block.bytes_required
saved_gas = original_block.gas_spent - optimized_block.gas_spent
saved_length = original_block.length - optimized_block.length
return (criteria == "size" and improves_criterion(saved_size, saved_gas)) or \
(criteria == "length" and improves_criterion(saved_length, saved_gas, saved_size)) or \
(criteria == "gas" and improves_criterion(saved_gas, saved_size))
# Given an asm_block and its contract name, returns the asm block after the optimization
def optimize_asm_block_asm_format(block: AsmBlock, params: OptimizationParams) -> Tuple[AsmBlock, Dict, List[Dict]]:
csv_statistics = []
new_block = deepcopy(block)
# Optimized blocks. When a block is not optimized, None is pushed to the list.
optimized_blocks = {}
log_dicts = {}
instructions = block.instructions_to_optimize_plain()
sfs_dict = {}
# No instructions to optimize
if instructions == []:
return new_block, {}, []
if params.optimized_predictor_model is not None and params.optimization_enabled:
stack_size = block.source_stack
instructions_to_optimize = block.instructions_to_optimize_plain()
block_data = {"instructions": instructions_to_optimize, "input": stack_size}
sub_block_list = ir_block.get_subblocks(block_data, storage=params.split_storage, part=params.split_partition)
subblocks2analyze = [instructions for instructions in process_blocks_split(sub_block_list)]
for i, subblock in enumerate(subblocks2analyze):
# If we have an empty sub block, we just consider it has not been optimized and go on with the new block
if subblock == []:
continue
ins_str = " ".join(subblock)
new_block = parse_blocks_from_plain_instructions(ins_str)[0]
new_block.set_block_name(block.get_block_name())
new_block.set_block_id(i)
out = params.optimized_predictor_model.eval(ins_str)
# The new sub block name is generated as follows. Important to match the format in the sfs_dict
sub_block_name = f'{new_block.get_block_name()}_{new_block.get_block_id()}'
# Out == 1 means the predictors predicts the block won't lead to any optimization, and hence, it's not worthy
# to try optimization
if out == 0:
optimized_blocks[sub_block_name] = None
if params.verbose:
print(f"{block.block_name} has been chosen not to be optimized")
else:
try:
contracts_dict, _ = compute_original_sfs_with_simplifications(new_block, params)
except Exception as e:
failed_row = {'instructions': instructions, 'exception': str(e)}
return new_block, {}, []
old_name = list(contracts_dict["syrup_contract"].keys())[0]
sfs_dict[sub_block_name] = contracts_dict["syrup_contract"][old_name]
else:
try:
contracts_dict, sub_block_list = compute_original_sfs_with_simplifications(block, params)
except Exception as e:
failed_row = {'instructions': instructions, 'exception': str(e)}
return new_block, {}, []
sfs_dict = contracts_dict["syrup_contract"]
if not params.optimization_enabled:
optimize_block(sfs_dict, params)
return new_block, {}, []
for sub_block, optimization_outcome, solver_time, optimized_asm, chosen_tag, tout, initial_solver_bound, rules, optimized_log_rep in optimize_block(
sfs_dict, params):
optimal_block = AsmBlock('optimized', sub_block.block_id, sub_block.block_name, sub_block.is_init_block)
optimal_block.instructions = optimized_asm
statistics_info = generate_statistics_info(sub_block, optimization_outcome, solver_time, optimal_block, chosen_tag,
initial_solver_bound, tout, rules)
if "solution_found" in statistics_info:
statistics_info["forves_checker"] = compare_forves(statistics_info["previous_solution"],
statistics_info["solution_found"],
"size" if params.criteria == "size" else "gas",
params.forves_enabled)
else:
statistics_info["forves_checker"] = "true"
csv_statistics.append(statistics_info)
# Only check if the new block is considered if the solver has generated a new one
if optimization_outcome == OptimizeOutcome.non_optimal or optimization_outcome == OptimizeOutcome.optimal:
sub_block_name = sub_block.block_name
if block_has_been_optimized(sub_block, optimal_block, params.criteria):
optimized_blocks[sub_block_name] = optimized_asm
log_dicts[sub_block_name] = optimized_log_rep
else:
optimized_blocks[sub_block_name] = None
new_block = rebuild_optimized_asm_block(block, sub_block_list, optimized_blocks)
return new_block, log_dicts, csv_statistics
def compare_asm_block_asm_format(old_block: AsmBlock, new_block: AsmBlock, params: OptimizationParams) -> Tuple[bool, str]:
# Change new block name to store the corresponding sfs with the new change
original_block_name = new_block.get_block_name()
new_block.set_block_name("alreadyOptimized_"+ original_block_name)
new_sfs_information, _ = compute_original_sfs_with_simplifications(new_block, params)
new_block.set_block_name(original_block_name)
new_sfs_dict = new_sfs_information["syrup_contract"]
old_sfs_information, _ = compute_original_sfs_with_simplifications(old_block, params)
old_sfs_dict = old_sfs_information["syrup_contract"]
final_comparison, reason = verify_block_from_list_of_sfs(old_sfs_dict, new_sfs_dict)
# We also must check intermediate instructions match i.e those that are not sub blocks
initial_instructions_old = old_block.instructions_initial_bytecode()
initial_instructions_new = new_block.instructions_initial_bytecode()
final_instructions_old = old_block.instructions_final_bytecode()
final_instructions_new = new_block.instructions_final_bytecode()
return final_comparison and (initial_instructions_new == initial_instructions_old) and \
final_instructions_new == final_instructions_old, reason
def csv_from_asm_blocks(old_contract_blocks: List[AsmBlock], new_contract_blocks: List[AsmBlock],
params: OptimizationParams) -> List[Dict]:
return [csv_from_asm_block(old_contract_block, new_contract_block,
*compare_asm_block_asm_format(old_contract_block, new_contract_block, params),
compare_forves(old_contract_block.to_plain(), new_contract_block.to_plain(),
"size" if params.criteria == "size" else "gas",
params.forves_enabled))
for old_contract_block, new_contract_block in zip(old_contract_blocks, new_contract_blocks)]
def optimize_asm_contract(c: AsmContract, params: OptimizationParams) -> Tuple[AsmContract, List[Dict], Dict, List[Dict]]:
seq_rows, log_dicts, blocks_rows = [], {}, []
new_contract = deepcopy(c)
# If it does not have the asm field, then we skip it, as there are no instructions to optimize. Same if a
# contract has been specified and current name does not match.
contract_name = c.shortened_name
init_code = c.init_code
print("\nAnalyzing Init Code of: " + contract_name)
print("-----------------------------------------\n")
init_code_blocks = []
for old_block in init_code:
optimized_block, log_element, csv_statistics = optimize_asm_block_asm_format(old_block, params)
seq_rows.extend(csv_statistics)
eq, reason = compare_asm_block_asm_format(old_block, optimized_block, params)
if not eq:
print("Comparison failed, so initial block is kept")
print("\t[REASON]: " + reason)
print(old_block.to_plain())
print(optimized_block.to_plain())
print("")
optimized_block = old_block
log_element = {}
log_dicts.update(log_element)
init_code_blocks.append(optimized_block)
# Deployment size is not considered when measuring it
update_gas_count(old_block, optimized_block)
update_length_count(old_block, optimized_block)
new_contract.init_code = init_code_blocks
if params.forves_enabled:
print("Checking optimized basic blocks in init_code with forves...")
blocks_rows.extend(csv_from_asm_blocks(init_code, init_code_blocks, params))
print("\nAnalyzing Runtime Code of: " + contract_name)
print("-----------------------------------------\n")
for identifier in c.get_data_ids_with_code():
blocks = c.get_run_code(identifier)
run_code_blocks = []
for old_block in blocks:
optimized_block, log_element, csv_statistics = optimize_asm_block_asm_format(old_block, params)
seq_rows.extend(csv_statistics)
eq, reason = compare_asm_block_asm_format(old_block, optimized_block, params)
print("EQ", eq, reason)
if not eq:
print("Comparison failed, so initial block is kept")
print("\t[REASON]: " + reason)
print(old_block.to_plain())
print(optimized_block.to_plain())
print("")
optimized_block = old_block
log_element = {}
log_dicts.update(log_element)
run_code_blocks.append(optimized_block)
update_gas_count(old_block, optimized_block)
update_length_count(old_block, optimized_block)
update_size_count(old_block, optimized_block)
if params.forves_enabled:
print("Checking optimized basic blocks in init_code with forves...")
blocks_rows.extend(csv_from_asm_blocks(blocks, run_code_blocks, params))
new_contract.set_run_code(identifier, run_code_blocks)
return new_contract, seq_rows, log_dicts, blocks_rows
def optimize_asm_in_asm_format(params: OptimizationParams):
seqs_rows = []
blocks_rows = []
asm = parse_asm(params.input_file)
log_dicts = {}
contracts = []
new_contract = None
for c in asm.contracts:
if not c.has_asm_field or (params.contract is not None and c.shortened_name != params.contract):
contracts.append(c)
else:
new_contract, contract_seq_rows, contract_log_dicts, contract_block_rows = optimize_asm_contract(c, params)
contracts.append(new_contract)
# Update the corresponding information
log_dicts.update(contract_log_dicts)
seqs_rows.extend(contract_seq_rows)
blocks_rows.extend(contract_block_rows)
new_asm = deepcopy(asm)
new_asm.contracts = contracts
if params.generate_log:
with open(params.log_file, "w") as log_f:
json.dump(log_dicts, log_f)
if params.optimization_enabled:
with open(params.optimized_file, 'w') as f:
if params.contract is not None:
if new_contract is None or new_contract.shortened_name != params.contract:
raise ValueError("Specified contract cannot be found")
else:
f.write(json.dumps(new_contract.to_asm_json()))
else:
f.write(json.dumps(new_asm.to_json()))
pd.DataFrame(seqs_rows).to_csv(params.seqs_file)
pd.DataFrame(blocks_rows).to_csv(params.blocks_file)
def optimize_asm_from_asm_json(params: OptimizationParams):
c = parse_json_asm(params.input_file)
new_contract, contract_seq_rows, contract_log_dicts, contract_block_rows = optimize_asm_contract(c, params)
if params.generate_log:
with open(params.log_file, "w") as log_f:
json.dump(contract_log_dicts, log_f)
if params.optimization_enabled:
with open(params.optimized_file, 'w') as f:
f.write(json.dumps(c.to_asm_json()))
pd.DataFrame(contract_seq_rows).to_csv(params.seqs_file)
pd.DataFrame(contract_block_rows).to_csv(params.blocks_file)
def optimize_from_sfs(params: OptimizationParams):
block_name = 'isolated_block_sfs'
with open(params.input_file, 'r') as f:
sfs_dict = json.load(f)
csv_statistics = []
for original_block, optimization_outcome, solver_time, optimized_asm, chosen_tag, tout, initial_solver_bound, rules, optimized_log_rep \
in optimize_block(sfs_dict, params):
optimal_block = AsmBlock('optimized', original_block.block_id, original_block.block_name,
original_block.is_init_block)
optimal_block.instructions = optimized_asm
statistics_info = generate_statistics_info(original_block, optimization_outcome, solver_time, optimal_block, chosen_tag,
initial_solver_bound, tout, rules)
csv_statistics.append(statistics_info)
# print(json.dumps(sfs_dict, indent=4))
if params.optimization_enabled:
pd.DataFrame(csv_statistics).to_csv(params.seqs_file)
print("")
print("Initial sequence (basic block per line):")
print(original_block.to_plain_with_byte_number())
print("")
print("Optimized sequence (basic block per line):")
print(optimal_block.to_plain_with_byte_number())
def modify_file_names(params: OptimizationParams) -> None:
input_file_name = params.input_file.split("/")[-1].split(".")[0]
if params.optimized_file is None:
if params.input_format == "block":
output_file = input_file_name + "_optimized.txt"
elif params.input_format == "sfs":
output_file = input_file_name + "_optimized.json"
elif params.input_format == "single-asm":
output_file = input_file_name + "_optimized.json"
elif params.from_log is not None:
output_file = input_file_name + "_optimized_from_log.json_solc"
else:
output_file = input_file_name + "_optimized.json_solc"
params.optimized_file = output_file
if params.seqs_file is None:
params.seqs_file = input_file_name + "_statistics_seq.csv"
if params.blocks_file is None:
params.blocks_file = input_file_name + "_statistics_blocks.csv"
if params.log_file is None:
params.log_file = input_file_name + ".log"
def options_gasol(ap: ArgumentParser) -> None:
# Unused options
# ap.add_argument("-last-constants", "--last-constants", help="It removes the last instructions of a block when they generate a constant value", dest="last_constants", action = "store_true")
# ap.add_argument("-mem40", "--mem40", help="It assumes that pos 64 in memory is not dependant with variables", action = "store_true")
input = ap.add_argument_group('Input options')
input.add_argument('input_path', help='Path to input file that contains the code to optimize. Can be either asm'
'either from the --combined-json asm or --json-asm options, '
'plain instructions or a json containing the SFS. The corresponding flag'
'must be enabled')
group_input = input.add_mutually_exclusive_group()
group_input.add_argument("-bl", "--block", help="Enable analysis of a single asm block", action="store_true")
group_input.add_argument("-sfs", "--sfs", dest='sfs', help="Enable analysis of a single SFS", action="store_true")
group_input.add_argument("-c", "--contract", dest="contract", action='store',
help='Specify the specific contract in the json_solc from the combined json to be optimized. '
'The name of the contract must match the name that appears in the solc file. Produces'
'a json file that only includes the information of the corresponding contract, as if'
'it was produced using the solc option --asm-json.')
group_input.add_argument("-single-json", "--single-json", dest='json_asm', action="store_true",
help="Enables analysis of a single contract generated by the --json-asm option.")
output = ap.add_argument_group('Output options')
output.add_argument("-o", help="Path for storing the optimized code", dest='output_path', action='store')
output.add_argument("-csv", help="CSV file path for the seqs optimization report", dest='seq_csv_path',
action='store')
output.add_argument("-block-csv", help="CSV file path for the blocks optimization report", dest='block_csv_path',
action='store')
output.add_argument("-backend", "--backend", action="store_false",
help="Disables backend generation, so that only intermediate files are generated")
output.add_argument("-intermediate", "--intermediate", action="store_true",
help="Keeps temporary intermediate files. "
"These files contain the sfs representation, smt encoding...")
output.add_argument("-d", "--debug", help="It prints debugging information", dest='debug_flag', action="store_true")
output.add_argument("-forves", "--forves", help="Enables the verification via the forves checker",
dest='forves_enabled', action="store_true")
output.add_argument("-dot", "--dot", help="Disables the optimization and generates a dot file for each dependency"
"graph induced by the instructions. Red edges mark memory dependencies",
action="store_true", dest="dot_generation")
log_generation = ap.add_argument_group('Log generation options', 'Options for managing the log generation')
log_generation.add_argument("-log", "--generate-log", help="Enable log file for verification",
action="store_true", dest='log')
log_generation.add_argument("-dest-log", help="Log output path", action="store", dest='log_stored_final')
log_generation.add_argument("-optimize-from-log", dest='log_path', action='store', metavar="log_file",
help="Generates the same optimized bytecode than the one associated to the log file")
basic = ap.add_argument_group('SearchOptimal general options',
'Basic options for solving the corresponding constraint solver')
basic.add_argument("-ub-greedy", "--ub-greedy", dest='ub_greedy', help='Enables greedy algorithm to predict the upper bound', action='store_true')
basic.add_argument('-greedy', '--greedy', dest='greedy', help='Uses greedy directly to generate the results', action='store_true')
basic.add_argument("-solver", "--solver", help="Choose the solver", choices=["z3", "barcelogic", "oms"],
default="oms")
basic.add_argument("-tout", metavar='timeout', action='store', type=int,
help="Timeout in seconds. By default, set to 2s per block.", default=2)
basic.add_argument("-direct-tout", dest='direct_timeout', action='store_true',
help="Sets the Max-SMT timeout to -tout directly, "
"without considering the structure of the block")
basic.add_argument("-push0", "--push0", dest='push0_enabled', action='store_false',
help="Assumes PUSH0 opcode cannot be used in the optimizations.")
basic.add_argument('-no-simplification', "--no-simplification", action='store_true', dest='no_simp',
help='Disables the application of simplification rules')
blocks = ap.add_argument_group('Split block options', 'Options for deciding how to split blocks when optimizing')
blocks.add_argument("-storage", "--storage", help="Split using SSTORE, MSTORE and MSTORE8", action="store_true")
blocks.add_argument("-partition", "--partition", help="It enables the partition in blocks of 24 instructions",
action="store_true")
hard = ap.add_argument_group('Hard constraints', 'Options for modifying the hard constraint generation')
hard.add_argument("-memory-encoding", help="Choose the memory encoding model", choices=["l_vars", "direct"],
default="direct", dest='memory_encoding')
hard.add_argument('-push-basic', action='store_true', dest='push_basic',
help='Disables push instruction as uninterpreted functions')
hard.add_argument('-pop-uninterpreted', action='store_true', dest='pop_uninterpreted',
help='Encodes pop instruction as uninterpreted functions')
hard.add_argument('-order-bounds', action='store_false', dest='order_bounds',
help='Disables bounds on the position instructions can appear in the encoding')
hard.add_argument('-empty', action='store_true', dest='empty',
help='Consider "empty" value as part of the encoding to reflect some stack position is empty,'
'instead of using a boolean term')
hard.add_argument('-term-encoding', action='store', dest='encode_terms',
choices=['int', 'stack_vars', 'uninterpreted_uf', 'uninterpreted_int'],
help='Decides how terms are encoded in the SMT encoding: directly as numbers, using stack'
'variables or introducing uninterpreted functions', default='uninterpreted_uf')
hard.add_argument('-terminal', action='store_true', dest='terminal',
help='(UNSUPPORTED) Encoding for terminal blocks that end with REVERT or RETURN. '
'Instead of considering the full stack order, just considers the two top elements')
hard.add_argument('-ac', action='store_true', dest='ac_solver',
help='(UNSUPPORTED) Commutativity in operations is considered as an extension inside the solver. '
'Can only be combined with Z3')
soft = ap.add_argument_group('Soft constraints', 'Options for modifying the soft constraint generation')
group = soft.add_mutually_exclusive_group()
group.add_argument("-size", "--size", action="store_true",
help="It enables size cost model for optimization and disables rules that increase the size"
"The simplification rules are applied only if they reduce the size")
group.add_argument("-length", "--length", action="store_true",
help="It enables the #instructions cost model. Every possible simplification rule is applied")
soft.add_argument("-direct-inequalities", dest='direct_soft', action='store_true',
help="Soft constraints with inequalities instead of equalities and without grouping")
additional = ap.add_argument_group('Additional constraints',
'Constraints that can help to speed up the optimization process, but are not '
'necessary')
additional.add_argument('-at-most', action='store_true', dest='at_most',
help='add a constraint for each uninterpreted function so that they are used at most once')
additional.add_argument('-pushed-once', action='store_true', dest='pushed_once',
help='add a constraint to indicate that each pushed value is pushed at least once')
additional.add_argument("-no-output-before-pop", action='store_false', dest='no_output_before_pop',
help='Remove the constraint representing the fact that the previous instruction'
'of a pop can only be a instruction that does not produce an element')
additional.add_argument('-order-conflicts', action='store_false', dest='order_conflicts',
help='Disable the order among the uninterpreted opcodes in the encoding')
ml_options = ap.add_argument_group('ML Options', 'Options to execute the different ML modules')
ml_options.add_argument('-bound-model', "--bound-model", action='store_true', dest='bound_select',
help="Enable bound regression model")
ml_options.add_argument('-opt-model', "--opt-model", action='store_true', dest='opt_select',
help="Select which representation model is used for the opt classification")
def parse_encoding_args(ap: ArgumentParser):
return ap.parse_args()