Skip to content

Commit 60e3c1e

Browse files
committed
Computing metrics for glottochronology and morphology
1 parent 26af455 commit 60e3c1e

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed

lingvodoc/schema/gql_cognate.py

+42
Original file line numberDiff line numberDiff line change
@@ -4371,6 +4371,11 @@ class Arguments:
43714371
embedding_3d = graphene.List(graphene.List(graphene.Float))
43724372
perspective_name_list = graphene.List(graphene.String)
43734373

4374+
dictionary_count = graphene.Int()
4375+
group_count = graphene.Int()
4376+
not_enough_count = graphene.Int()
4377+
transcription_count = graphene.Int()
4378+
43744379
@staticmethod
43754380
def get_entry_text(entry):
43764381
return f"{entry['swadesh']} [ {entry['transcription']} ] {entry['translation']}"
@@ -4773,19 +4778,31 @@ def split_lex(lex):
47734778
# GC
47744779
del data_query
47754780

4781+
group_counter = [0] * len(group_list)
4782+
total_transcription_count = 0
4783+
47764784
# Checking if found entries have links
47774785
means = collections.OrderedDict()
47784786
for perspective_id, entries in entries_set.items():
47794787
means[perspective_id] = collections.defaultdict(set)
47804788
for group_index, group in enumerate(group_list):
4789+
47814790
# Select etymologically linked entries
47824791
linked = entries & group
4792+
# Count non-empty 'linked'
4793+
group_counter[group_index] += (len(linked) > 0)
4794+
47834795
for entry_id in linked:
47844796
result_pool[perspective_id][entry_id]['group'] = group_index
47854797
swadesh = result_pool[perspective_id][entry_id]['swadesh']
47864798
# Store the correspondence: perspective { meanings(1/2/3) { etymological_groups(1.1/1.2/2.1/3.1)
47874799
if not result_pool[perspective_id][entry_id]['borrowed']:
47884800
means[perspective_id][swadesh].add(group_index)
4801+
total_transcription_count += 1
4802+
4803+
not_enough_count = 0
4804+
for count in group_counter:
4805+
not_enough_count += (count < 2)
47894806

47904807
dictionary_count = len(means)
47914808
distance_data_array = numpy.full((dictionary_count, dictionary_count), 50, dtype='float')
@@ -4863,6 +4880,10 @@ def split_lex(lex):
48634880

48644881
result = html_result,
48654882
xlsx_url = xlsx_url,
4883+
dictionary_count = len(perspective_info_list),
4884+
group_count = len(group_list),
4885+
not_enough_count = not_enough_count,
4886+
transcription_count = total_transcription_count,
48664887
minimum_spanning_tree = mst_list,
48674888
embedding_2d = embedding_2d_pca,
48684889
embedding_3d = embedding_3d_pca,
@@ -5024,6 +5045,11 @@ class Arguments:
50245045
embedding_3d = graphene.List(graphene.List(graphene.Float))
50255046
perspective_name_list = graphene.List(graphene.String)
50265047

5048+
dictionary_count = graphene.Int()
5049+
group_count = graphene.Int()
5050+
not_enough_count = graphene.Int()
5051+
transcription_count = graphene.Int()
5052+
50275053
@staticmethod
50285054
def get_entry_text(entry):
50295055
return f"{'; '.join(entry['affix'])} ( {'; '.join(entry['meaning'])} )"
@@ -5207,17 +5233,29 @@ def morph_cognate_statistics(
52075233
# GC
52085234
del data_query
52095235

5236+
group_counter = [0] * len(group_list)
5237+
total_transcription_count = 0
5238+
52105239
# Checking if found entries have links
52115240
for perspective_id, entries in result_pool.items():
52125241
for group_index, group in enumerate(group_list):
5242+
52135243
# Select etymologically linked entries
52145244
linked = entries.keys() & group
5245+
# Count non-empty 'linked'
5246+
group_counter[group_index] += (len(linked) > 0)
5247+
52155248
for entry_id in linked:
52165249
result_pool[perspective_id][entry_id]['group'] = group_index
52175250
meaning = result_pool[perspective_id][entry_id]['meaning']
5251+
total_transcription_count += 1
52185252
for sub_meaning in meaning:
52195253
meaning_to_links[perspective_id][sub_meaning].add(group_index)
52205254

5255+
not_enough_count = 0
5256+
for count in group_counter:
5257+
not_enough_count += (count < 2)
5258+
52215259
dictionary_count = len(result_pool)
52225260
distance_data_array = numpy.full((dictionary_count, dictionary_count), 50, dtype='float')
52235261
complex_data_array = numpy.full((dictionary_count, dictionary_count), "n/a", dtype='object')
@@ -5302,6 +5340,10 @@ def morph_cognate_statistics(
53025340

53035341
result = html_result,
53045342
xlsx_url = xlsx_url,
5343+
dictionary_count=len(perspective_info_list),
5344+
group_count=len(group_list),
5345+
not_enough_count = not_enough_count,
5346+
transcription_count=total_transcription_count,
53055347
minimum_spanning_tree = mst_list,
53065348
embedding_2d = embedding_2d_pca,
53075349
embedding_3d = embedding_3d_pca,

0 commit comments

Comments
 (0)