Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes
File renamed without changes
File renamed without changes
Binary file added analysis/plots/doculects.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
46 changes: 46 additions & 0 deletions analysis/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
appdirs==1.4.4
attrs==23.2.0
Babel==2.15.0
bibtexparser==2.0.0b7
certifi==2024.7.4
charset-normalizer==3.3.2
clldutils==3.22.2
colorama==0.4.6
colorlog==6.8.2
commonnexus==1.9.2
csvw==3.3.0
idna==3.7
importlib_metadata==8.0.0
isodate==0.6.1
jmespath==1.0.1
jsonschema==4.23.0
jsonschema-specifications==2023.12.1
language-tags==1.2.0
latexcodec==3.0.0
lingpy==2.6.13
lxml==5.2.2
Markdown==3.6
MarkupSafe==2.1.5
networkx==3.2.1
newick==1.9.0
numpy==2.0.0
pybtex==0.24.0
pycldf==1.38.1
pylatexenc==2.10
pyparsing==3.1.2
pysem==0.8
python-dateutil==2.9.0.post0
python-frontmatter==1.1.0
PyYAML==6.0.1
rdflib==7.0.0
referencing==0.35.1
requests==2.32.3
rfc3986==1.5.0
rpds-py==0.19.0
six==1.16.0
tabulate==0.9.0
termcolor==2.4.0
tqdm==4.66.4
uritemplate==4.1.1
urllib3==2.2.2
zipp==3.19.2
2,264 changes: 2,264 additions & 0 deletions analysis/soundclass_queries/coordinates.js

Large diffs are not rendered by default.

171 changes: 171 additions & 0 deletions analysis/soundclass_queries/data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
ka= all
kaki= dry, thirsty
kali= skin, bark, leather, clothes
kalu= hand/arm, wing, side,
kama= squeeze/press
kapa= oar, paddle
kapu= head, top, begin, start, chieftain, (French chef), first
kati= bone, leg/foot
katu= seize, take, win
kaγu= own, have, hold
ki= small, little, narrow, few
kika= spear, pierce, impale, sting
kila= tail, back, behind,
kilu= against, war, struggle, fight, argument
kima= sad(ness), remorse
kipa= cleave(r), destroy, break
kiγa= face, forehead, front, countenance
ku= what/who, something/someone, part/member
kuka= 4-legged reptiles such as crocodile, alligator, lizard, crawl
kula= hole, cave
kuli= claw, nail
kulu= hunt, look for, pursue
kupa= run, flow, flee
kuta= bad, wrong, ugly, left
kutu= louse, flea, insect, bug
kuγa= dig, hollow out
kuγu= color, dye/pigment, paint
la= I/we 1st person
laka= ear, hear, smell, listen
laki= up, above, high, ascend
laku= person, people
lala= good, beautiful, lucky, easy
lali= swim, bathe, wash
lalu= road, path, way, manner, how
lama= name, kind/type
lamu= together, unite, agree
lapi= leaf, hair, feather
lapu= mouth, lips
lata= long, far, tall, wide, long time
laγa= sleep, lie (down), rest
li= thou/you, 2nd person
lika= other, different, strange, foreign, alien, unknown
lila= woman, female, wife,
lili= rope, thread, fishing line
lima= tongue, language, word, speak, say
limi= bird
lipa= joy, happy, happiness, bliss
lipi= know, understand,
lita= night, dark, black
liti= sharp(en), point
liγa= carve, mark (on stone, skin, etc.)
luka= neck, throat
luki= tool, weapon
lula= south
luli= same, self, spontaneous,
luma= tie, bind, knot
lumi= god, heaven, holy
lumu= eat, bite, food, taste, ingest
lupa= medicine, cure, drug
lupu= remove, cut off, take off, take away
luta= girl, daughter
luti= wind, air, breath(e), weather
luγa= rise, stand, climb
ma= come, go, walk, arrive, happen
maki= gather, pick up
mala= mind, think, remember
mali= weak, tired, soft, smooth
malu= stone, rock, hill, mountain
mama= sing, song, music
mapa= milk, breast, udder, chest
mata= eye, see, dream
matu= ship, canoe, boat, raft
maγa= white, light (color), clear, pure
maγu= (go) in, at, enter, inside
mi= not, no, nothing, -less, in-, un-, without (cf. `a`ole in Hawaiian, bâ in Kanuri)
miki= son, boy
miku= heavy, difficult, pregnant
mila= permit, allow, may, maybe, can, be able to, possible
mili= new, young
mimi= flat, plain, smooth
mipi= short, low, near
mitu= container, trencher (plate), pot, cup(ule)
miγa= deer
miγi= raw, unripe, green
mu= mother, aunt
muki= man, male, husband,
mula= fruit, grain
mulu= ash, dust, fog, smoke
muma= big, many, much, important, mighty, very
mupu= belly, stomach, intestines, liver
muti= grease, fat, oil, mutter
muγa= knee, kneel, elbow
pa= give, lend
paka= knife, use a knife, cut
paki= small mammal, prey
pala= fly, jump
pali= two, pair, double, twin
papu= wave, rough (of sea)
patu= end (time, space), finish, complete, after (as in Mayan, see Bohnemeyer, 1998)
paγa= (go) out, leave, outside
pi= blood, red
piki= sour, bitter, salty
pila= moon, month
pilu= fire, flame, burn(t)
pima= let go, free, open, untie
pipu= sibling, cousin
pita= from, cause, because, therefore, why, source
piti= dance, twinkle (astral bodies)
piγa= (have) sex
pu= full, complete, swell, teem
puka= vomit, spit
puli= hit, strike, beat
puma= calm, serene, peace(ful), silence, be silent
pupa= pack, gang, family, relatives
puti= female genitals
puγi= house, home
ta= this, here, now
taka= east, dawn
taku= father, uncle
tali= sand, shore
tama= throw, shoot
tami= only, (al)one
tapi= stick, shaft, club
tatu= ripe, cooked, yellow
taγi= cold, fresh, chill
ti= fish, sea creature
tiki= fear, anxiety
tila= sun, day
tili= root, blood vessel
tilu= tooth
tima= heart, soul
timu= cloud, sky
tipa= rain, storm
tipi= male genitals
titi= star, twinkle, blink, flash
titu= north
tiγa= tree, wood
tiγu= hammer (flexible), pelt
tu= that, there, then, 3rd person (cf. 那 in mandarin, káa in Mayan, see Bohnemeyer, 1998)
tuka= touch, feel
tuku= jaguar, genus Panthera, genus Canis, predator (as in Tupian languages)
tula= pain, sick, wound, disease
tuli= nose, beak
tulu= warm, hot, heat, fever
tumi= nut,
tumu= flesh, body,
tupa= set, place, put
tuta= straight, direct, right/correct
tutu= excrement
tuγu= strong, hard
γala= be born, beget, birth
γapi= below, under, down, descend/fall
γatu= old, past, age, ex-, former
γi= egg, testicle(s), seed, bean, pea
γika= similar, like, seem
γila= squat, sit, reside, lie, remain, be, exist(ence), live
γima= cover, shut
γita= friend, guest
γiγa= shout, howl, shriek, call
γu= earth, land, ground, soil
γuku= do, make, change, prepare, become, action
γula= horn,
γuli= want, desire, hunger
γulu= sweet, tasty, fragrant (cf. suave in Latin, Romance, and lekker in Germanic)
γuma= water, sea, river, lake, tear (eye)
γumu= die, kill, dead
γupa= west, evening
γupu= urine
γuti= plant, herb, grass, flower
γutu= sound, noise
γuγi= snake, worm, slither
27 changes: 27 additions & 0 deletions analysis/soundclass_queries/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<html>
<head>
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css"
integrity="sha256-p4NxAoJBhIIN+hmNHrzRCf9tD/miZyoHS5obTRR9BMY="
crossorigin=""/>
<!-- Make sure you put this AFTER Leaflet's CSS -->
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"
integrity="sha256-20nQCchB9co0qIjJZRGuk2/Z9VM+kNiyxNV1lvTlZBo="
crossorigin=""></script>
</head>
<body>
<div id="map"></div>
</body>
<style>
#map { height: 400px; width: 800px; }
</style>

<script>
var map = L.map('map').setView([51.505, -0.09], 1);
L.tileLayer('https://tile.openstreetmap.org/{z}/{x}/{y}.png', {
maxZoom: 19,
attribution: '&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>'
}).addTo(map);

</script>
<script src="coordinates.js"></script>
</html>
117 changes: 117 additions & 0 deletions analysis/soundclass_queries/match_soundclasses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
Query with the Lexibank Database.
"""
import argparse
import csv
import sqlite3
from clldutils.misc import slug
from lingpy import tokens2class, ipa2tokens
from pysem import to_concepticon
from tabulate import tabulate


# load lexibank database
db = sqlite3.connect("../lexibank.sqlite3")
cursor = db.cursor()


def run_query(setting, gcode):
"""Runs the query against the selected glottocode."""
# get the data on the language
data = []

if setting == 'q_proto.sql':
with open("data.txt", encoding='utf8') as f:
for row in f:
word, concepts = row.strip().split("=")
for concept in concepts.strip().split(", "):
mappings = to_concepticon([{"gloss": concept}])[concept]
if mappings:
data += [[
word.strip(),
"".join(tokens2class(ipa2tokens(word.strip()), "dolgo")),
concept,
mappings[0][0],
mappings[0][1]
]]
cursor.execute("insert into LanguageTable(cldf_id) values ('proto');")
for i, row in enumerate(data):
cursor.execute(
"insert into FormTable(cldf_id, cldf_form, cldf_value, cldf_languageReference, " +
" cldf_Segments, cldf_parameterReference, " +
"Dolgo_Sound_Classes) values ('proto-" + str(i + 1) +
"', '" + row[0] + "', '" + row[0] + "', "
"'proto', '" +
" ".join(ipa2tokens(row[0])) + "', '" +
slug(row[4]) + "', '" +
row[1] + "');"
)
try:
cursor.execute("ALTER TABLE LanguageTable DROP COLUMN filter")
except sqlite3.OperationalError:
pass

if setting != 'q_proto.sql':
cursor.execute(f"ALTER TABLE LanguageTable ADD COLUMN filter VARCHAR DEFAULT {gcode}")

with open(setting, encoding='utf8') as f:
query = f.read()

cursor.execute(query)
table = cursor.fetchall()

max_hits = table[0][-1]
colors = {
max_hits: "black",
max_hits - (0.25*max_hits): "darkgray",
max_hits - (0.50*max_hits): "lightgray"
}
scale = 0.50

with open("coordinates.js", "w", encoding='utf8') as f:
for row in table[::-1]:
if row[-1] >= max_hits - (0.5*max_hits):
for key, color in colors.items():
if key <= row[-1]:
fill = color
break

f.write(
f"L.circle([{row[4]}, {row[5]}], {{color: 'black', fillOpacity: 1, weight: 1, fillColor: '{fill}', radius: {row[-1] * scale * 50000}}}).addTo(map)\n"
)
f.write(f'.bindPopup("<b>{row[0]}: {row[-1]} Hits</b>");\n')

print(f"[i] Saved file with maximal number of hits at {max_hits}.")

header = []
if setting in ('q_base.sql', 'q_proto.sql'):
header = ["Name", "ID", "Glottocode", "Family", "Latitude", "Longitude", "Hits"]
elif setting == 'q_extended.sql':
header = [
"Name", "ID", "Glottocode", "Family", "Latitude", "Longitude", "Concepticon",
"Core Concept", "Dolgopolsky", "Segments A", "Segments B", "Hits"
]

print(tabulate(
table[:10],
tablefmt="pipe",
headers=header
))

with open('matches.tsv', 'w', encoding='utf8', newline='') as f:
writer = csv.writer(f, delimiter='\t')
writer.writerows(table)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--setting', type=str,
help='Choose which query to run: base or extended')
parser.add_argument('--glottocode', type=str, default='kusu1250',
help='Choose which glottocode to use for the query')
args = parser.parse_args()

run_query(
setting=args.setting,
gcode=args.glottocode
)
Loading