Skip to content

Commit ecedf48

Browse files
committed
Merge branch 'develop' of https://github.com/RSGInc/Daysim into develop
2 parents db1cb01 + 4b6dad7 commit ecedf48

File tree

6 files changed

+89
-78
lines changed

6 files changed

+89
-78
lines changed

DaySim.Tests/DaySim.Tests.external/compare_output_directories/compare_output_directories.py

+65-75
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@
44
import sys
55
import traceback
66
import collections
7-
import time
8-
from enum import Enum
9-
from utilities import *
107
import logging
118

9+
if sys.version_info < (3,0):
10+
print("Sorry, requires Python 3.x, not Python 2.x")
11+
sys.exit(1)
12+
sys.path.append(os.path.realpath(os.path.dirname(sys.argv[0])) + '.utilities')
13+
from utilities import *
14+
1215
#ignore some file extensions
1316
def remove_irrelevant_files(listOfFiles):
1417
return [file for file in listOfFiles if not ( file.endswith('.log')
@@ -24,7 +27,7 @@ def remove_irrelevant_files_from_dcmp(dcmp, filter_function=remove_irrelevant_fi
2427
dcmp.right_only = filter_function(dcmp.right_only)
2528
dcmp.diff_files = filter_function(dcmp.diff_files)
2629
dcmp.funny_files = filter_function(dcmp.funny_files)
27-
dcmp.common_files = filter_function(dcmp.common_files)
30+
dcmp.common_files = filter_function(dcmp.common_files)
2831
dcmp.common_funny = filter_function(dcmp.common_funny)
2932

3033
for sub_dcmp in dcmp.subdirs.values():
@@ -57,7 +60,7 @@ def get_all_common_different_files(dcmp):
5760

5861
def get_hash_sum_of_lines(filename):
5962
"""this can be used to get a nearly unique identifier for the content of a file
60-
where order does not matter. Two files with identical linesin different order should have the same hash sum"""
63+
where order does not matter. Two files with identical lines in different order should have the same hash sum"""
6164
with open(filename) as infile:
6265
hash_sum = sum(hash(l) for l in infile)
6366
return hash_sum
@@ -72,7 +75,6 @@ def print_diff_files(dcmp):
7275
print_diff_files(sub_dcmp)
7376

7477
def are_outputs_equal(parameters):
75-
start_time = time.perf_counter()
7678
parser = argparse.ArgumentParser(description='Compare two DaySim output directories')
7779
parser.add_argument('--outputs_reference', help='The reference saved outputs from a successful run [default: %(default)s}')
7880
parser.add_argument('--outputs_new', help='Newly generated result to be compared to reference [default: %(default)s}')
@@ -95,109 +97,97 @@ def are_outputs_equal(parameters):
9597
elif not os.path.isdir(args.outputs_new):
9698
raise Exception('outputs_reference "' + args.outputs_reference + '" exists but not outputs_new "' + args.outputs_new + '"')
9799

98-
100+
print('python ' + os.path.realpath(__file__) + ' --outputs_reference "' + os.path.realpath(args.outputs_reference) + '" --outputs_new "' + os.path.realpath(args.outputs_new) + '"')
99101
dcmp = filecmp.dircmp(args.outputs_reference, args.outputs_new)
100102
remove_irrelevant_files_from_dcmp(dcmp)
101103

102-
#logging.debug('dcmp finished')
103-
#logging.debug('perf_time(): ' + str(time.perf_counter() - start_time))
104-
105104
are_all_files_common = are_all_files_common_func(dcmp)
106-
#logging.debug('are_all_files_common finished: ' + str(are_all_files_common))
107-
#logging.debug('perf_time(): ' + str(time.perf_counter() - start_time))
108105

109106
if not are_all_files_common:
110107
result = False
111108
print("Folders do not have all of the same files so regression fails.")
109+
dcmp.report_full_closure()
112110
else:
113111
all_common_different_files = get_all_common_different_files(dcmp)
114-
result = len(all_common_different_files) == 0 #result is good if all common files are the same
112+
result = True #this will be changed to false if any individual file is different in an important way (other than order)
115113
logging.debug('There are #' + str(len(all_common_different_files)) + ' files which are not binary identical. Will look more deeply.')
116-
#logging.debug('perf_time(): ' + str(time.perf_counter() - start_time))
117114

115+
actuallyDifferentFiles = []
118116
for different_file in all_common_different_files:
119-
result = False #since files are different assume failure unless changed again
120117
#some DaySim files are identical in content but are output in a different line order
121118
reference_file = os.path.join(args.outputs_reference, different_file)
122119
assert os.path.isfile(reference_file), "reference_file is not a file: " + reference_file
123120
filename, file_extension = os.path.splitext(reference_file)
124121
allow_text_comparison = file_extension in ['.csv','.dat','.tsv','.txt']
125122
new_file = os.path.join(args.outputs_new, different_file)
126123
assert os.path.isfile(reference_file), "new_file is not a file: " + new_file
127-
if os.path.getsize(reference_file) != os.path.getsize(new_file):
128-
logging.debug('length of common file: ' + different_file + ' differs so difference must be more than different sort order!')
124+
#could check file size here with os.path.getsize is concerned about speed but don't bother because want to give more detailed diff if possible
125+
filesAreDifferent = not allow_text_comparison
126+
if filesAreDifferent:
127+
print('Files are different: "' + different_file + '" but do not know how to examine this type of file line by line so must assume different in a significant way!')
129128
else:
130-
logging.debug('Common_file that is binary different at least has same file size so, if suitable text file, will check to see if same contents in different order. File: ' + different_file)
131-
if allow_text_comparison:
132-
#since same size need to check if same lines but in different order
133-
134-
#quickest and least memory method is to sum the hash of each line and then compare
135-
hash_sum_reference = get_hash_sum_of_lines(reference_file)
136-
#logging.debug('hash_sum of reference: ' + str(hash_sum_reference))
137-
#logging.debug('perf_time(): ' + str(time.perf_counter() - start_time))
138-
hash_sum_new_file = get_hash_sum_of_lines(new_file)
139-
#logging.debug('hash_sum of new file: ' + str(hash_sum_new_file))
140-
#logging.debug('perf_time(): ' + str(time.perf_counter() - start_time))
141-
142-
if hash_sum_reference == hash_sum_new_file:
143-
print('File "' + different_file + '" has identical content just in different order.')
144-
result = True #files count as same despite different order
145-
#else files are different in more than just sort order!
146-
147-
if result == False:
148-
if not allow_text_comparison:
149-
logging.debug('Files are different but unhandled extension "' + file_extension + '" so cannot check if differ only by line order. Therefore regression fails.')
150-
else:
151-
logging.debug('hash_sum of files is different so going to compare lines. reference_file "' + reference_file + '".')
129+
#quickest and least memory method is to sum the hash of each line and then compare
130+
hash_sum_reference = get_hash_sum_of_lines(reference_file)
131+
hash_sum_new_file = get_hash_sum_of_lines(new_file)
132+
133+
filesAreDifferent = hash_sum_reference != hash_sum_new_file
134+
if not filesAreDifferent:
135+
logging.debug('File "' + different_file + '" has identical content just in different order.')
136+
else: #files are different in more than just sort order!
137+
#print('hash_sum of files is different so going to compare lines. File "' + different_file + '".')
152138
#if the files do not have identical lines get more detailed information of differences
139+
153140
with open(reference_file, encoding='latin-1') as infile:
141+
reference_header = infile.readline()
154142
counts = collections.Counter(l for l in infile)
155143

156144
logging.debug('Finished counting lines in reference folder copy of "' + different_file + '". There are '
157145
+ str(len(counts)) + ' distinct lines')
158-
#logging.debug('perf_time(): ' + str(time.perf_counter() - start_time))
159-
160-
#logging.debug('deep_getsizeof(counts): ' + human_readable_bytes(deep_getsizeof(counts, set())))
161-
#logging.debug('perf_time(): ' + str(time.perf_counter() - start_time))
162146

163147
with open(new_file, encoding='latin-1') as infile:
148+
new_header = infile.readline()
164149
counts.subtract(l for l in infile)
165150
logging.debug('Finished checking new version of "' + different_file + '".')
166-
#logging.debug('perf_time(): ' + str(time.perf_counter() - start_time))
167-
168-
missing_from_reference = []
169-
missing_from_new = []
170-
for line, count in counts.items():
171-
if count < 0:
172-
missing_from_reference.append((line,count))
173-
elif count > 0:
174-
missing_from_new.append((line,count))
175-
176-
assert len(missing_from_reference) != 0 or len(missing_from_new) != 0, "hash_sum was different but the counts of each distinct are identical!"
177-
178-
print('File "' + different_file + '" with ' + str(len(counts)) + ' distinct lines has '
179-
+ str(len(missing_from_new)) + ' distinct lines that were not found in the new and '
180-
+ str(len(missing_from_reference)) + ' distinct lines that were not found in the reference file')
181-
182-
def print_line_and_counts_to_string(identifier, counted_strings):
183-
#sort the missing lines so that the ones shown in reference and new will likely be similar which will make differences easier to spot
184-
counted_strings.sort(key=lambda line_count_tuple : line_count_tuple[0])
185-
if len(counted_strings) > 0:
186-
message = ('All ' if len(counted_strings) <= args.max_different_lines_to_show else (' Sample ' + str(args.max_different_lines_to_show))) + ' lines that are ' + identifier + '.\n'
187-
message += '\n'.join(str(abs(count)) + ': ' + str(line) for line, count in counted_strings[:args.max_different_lines_to_show])
188-
print(message)
189-
190-
print_line_and_counts_to_string('missing from new file', missing_from_new)
191-
print_line_and_counts_to_string('missing from reference', missing_from_reference)
192-
193-
#logging.debug('perf_time(): ' + str(time.perf_counter() - start_time))
194-
#STOP!
195-
break
196151

152+
if reference_header != new_header:
153+
print('File headers are different!\nref: ' + reference_header + '\nnew: ' + new_header)
154+
else:
155+
missing_from_reference = []
156+
missing_from_new = []
157+
for line, count in counts.items():
158+
if count < 0:
159+
missing_from_reference.append((line,count))
160+
elif count > 0:
161+
missing_from_new.append((line,count))
162+
163+
assert len(missing_from_reference) != 0 or len(missing_from_new) != 0, "hash_sum was different but the counts of each distinct are identical!"
164+
165+
print('File "' + different_file + '" with ' + str(len(counts)) + ' distinct lines has '
166+
+ str(len(missing_from_new)) + ' distinct lines that were not found in the new and '
167+
+ str(len(missing_from_reference)) + ' distinct lines that were not found in the reference file')
168+
169+
#sort list and only keep top few lines
170+
missing_from_reference.sort(key=lambda line_count_tuple : line_count_tuple[0])
171+
missing_from_reference = missing_from_reference[:args.max_different_lines_to_show]
172+
173+
missing_from_new.sort(key=lambda line_count_tuple : line_count_tuple[0])
174+
missing_from_new = missing_from_new[:args.max_different_lines_to_show]
175+
176+
print('hdr: ' + reference_header.strip('\n'))
177+
for missing_line_index in range(0, min(len(missing_from_reference), len(missing_from_new))):
178+
print('ref: ' + missing_from_reference[missing_line_index][0].strip('\n') + '\tmissing count: ' + str(abs(missing_from_reference[missing_line_index][1])))
179+
print('new: ' + missing_from_new[missing_line_index][0].strip('\n') + '\tmissing count: ' + str(abs(missing_from_new[missing_line_index][1])))
180+
print('------')
181+
if filesAreDifferent:
182+
actuallyDifferentFiles.append(different_file)
183+
result = result and not filesAreDifferent
184+
#print('Is "' + different_file + '" actually different?: ' + str(filesAreDifferent) + '. Is regression still passing?: ' + str(result))
185+
186+
print('There were ' + str(len(all_common_different_files)) + ' that were binary different. Of those, ' + str(len(actuallyDifferentFiles)) + ' files differed in ways that mattered: ' + str(actuallyDifferentFiles))
197187
if result:
198-
print('Tests passed. Number of order different files: ' + str(len(all_common_different_files)))
188+
print('PASSED! :-)')
199189
else:
200-
dcmp.report_full_closure()
190+
print('FAILED! :-(')
201191
return result
202192

203193
if __name__ == "__main__":

DaySim.Tests/DaySim.Tests.external/compare_output_directories/regress_model.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,19 @@
66
import traceback
77
import collections
88
import time
9-
from enum import Enum
109
import shutil
1110
import xml.etree.ElementTree as ET
1211
import logging
1312
import subprocess
13+
from string import Template
14+
15+
if sys.version_info < (3,0):
16+
print("Sorry, requires Python 3.x, not Python 2.x")
17+
sys.exit(1)
18+
sys.path.append(os.path.realpath(os.path.dirname(sys.argv[0])) + '.run_process_with_realtime_output')
1419
import run_process_with_realtime_output
20+
sys.path.append(os.path.realpath(os.path.dirname(sys.argv[0])) + '.utilities')
1521
import utilities
16-
from string import Template
1722

1823
def compare_directories(old_dir, new_dir, isVerbose):
1924
import compare_output_directories

DaySim.Tests/DaySim.Tests.external/compare_output_directories/regress_subfolders.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,15 @@
44
import sys
55
import traceback
66
import time
7-
import regress_model
87
import glob
98
import logging
9+
10+
if sys.version_info < (3,0):
11+
print("Sorry, requires Python 3.x, not Python 2.x")
12+
sys.exit(1)
13+
sys.path.append(os.path.realpath(os.path.dirname(sys.argv[0])) + '.regress_model')
14+
import regress_model
15+
sys.path.append(os.path.realpath(os.path.dirname(sys.argv[0])) + '.run_process_with_realtime_output')
1016
import run_process_with_realtime_output
1117

1218
def parse_bool(v):

DaySim.Tests/DaySim.Tests.external/compare_output_directories/run_process_with_realtime_output.py

+5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
from subprocess import Popen, PIPE
33
from threading import Thread
44
from queue import Queue, Empty
5+
import sys
6+
7+
if sys.version_info < (3,0):
8+
print("Sorry, requires Python 3.x, not Python 2.x")
9+
sys.exit(1)
510

611
def run_process_with_realtime_output(cmd):
712
io_q = Queue()

DaySim.Tests/DaySim.Tests.external/compare_output_directories/utilities.py

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44
import time
55
import os
66
import re
7+
import sys
8+
9+
if sys.version_info < (3,0):
10+
print("Sorry, requires Python 3.x, not Python 2.x")
11+
sys.exit(1)
712

813
def delete_matching_files(directory, pattern):
914
for root, dirs, files in os.walk(directory):
Binary file not shown.

0 commit comments

Comments
 (0)