Skip to content

Commit

Permalink
git: initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
nanoskript committed Aug 31, 2023
0 parents commit c259147
Show file tree
Hide file tree
Showing 11 changed files with 1,001 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/.venv
/.pdm.toml
/onomatopoeia.json
/public/onomatopoeia.json
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "data/nihongoresources.com"]
path = data/nihongoresources.com
url = https://github.com/Pomax/nihongoresources.com.git
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# jp-onomatopoeia

Compiled dataset of Japanese onomatopoeia and sound effects.

## Data sources

- [nihongoresources.com](https://github.com/Pomax/nihongoresources.com)
- [Japanese Manga SFX spreadsheet](https://docs.google.com/spreadsheets/d/1iXFIVmSzukN1PYD1JakdqMmrNNlc7TtkutDKevuKM0c/edit)
122 changes: 122 additions & 0 deletions compile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import csv
import dataclasses
import glob
import json
import re
from collections import defaultdict
from itertools import islice
from pathlib import Path
from typing import Optional


@dataclasses.dataclass()
class RawEntry:
katakana: str
english: str
details: Optional[str]


def compile_nihongoresources_com(entries: list[RawEntry]):
with open(Path("data") / "nihongoresources.com" / "giongo.txt") as f:
for line in f.readlines()[1:]:
[_katakana, katakana, translation, *details] = line.split("\t")
entries.append(RawEntry(
katakana=katakana,
english=translation,
details=", ".join(details).strip(),
))


def compile_japanese_manga_sfx_sheet(entries: list[RawEntry]):
pattern = Path("data") / "japanese-manga-sfx" / "*.csv"
for file in glob.glob(str(pattern)):
with open(file) as f:
# Structured with two header rows.
rows = list(csv.reader(f))

# Find all columns of entries.
katakana_columns = []
for index, cell in enumerate(rows[1]):
if cell == "Japanese":
katakana_columns.append(index)

# Compile each column.
new_entries = []
for index in katakana_columns:
katakana, english, details = None, "", ""
for row_index, row in islice(enumerate(rows), 2, None):
if row[index + 1].strip():
# Assume each romaji row starts a new entry.
if katakana is not None:
new_entries.append(RawEntry(katakana, english, details))
english, details = "", ""
katakana = row[index].removesuffix(",")

# Append text to existing entry.
english += row[index + 2] + " "
details += row[index + 3] + " "

# Some columns have no entries.
if katakana is not None:
new_entries.append(RawEntry(katakana, english, details))

# Refine new entries.
for entry in new_entries:
# Normalize english.
english = entry.english
english = re.sub(r"\s+", " ", english.strip())

# Normalize details.
details = entry.details
details = details.replace("More »", "")
details = re.sub(r"\s+", " ", details.strip())

# Consider numbering.
english_split = re.split(r"\((\d+)\)", english)
if len(english_split) == 1:
# No numbering.
entries.append(RawEntry(entry.katakana, english, details or None))
continue

details_items = {}
details_split = re.split(r"\((\d+)\)", details)
for index, string in enumerate(details_split):
if string.isdigit():
value = details_split[index + 1]
value = value.strip().removesuffix(";")
details_items[int(string)] = value

for index, string in enumerate(english_split):
if string.isdigit():
english = english_split[index + 1]
english = english.strip().removesuffix(";")
details = details_items.get(int(string), None)
entries.append(RawEntry(entry.katakana, english, details))


def compile_raw_entries() -> list[RawEntry]:
entries = []
compile_nihongoresources_com(entries)
compile_japanese_manga_sfx_sheet(entries)
return entries


def compile_entries(raw_entries: list[RawEntry]):
entries = defaultdict(list)
for entry in raw_entries:
entries[entry.katakana].append({
"english": entry.english,
"details": entry.details,
})
return entries


def main():
raw_entries = compile_raw_entries()
entries = compile_entries(raw_entries)
with open("onomatopoeia.json", "w") as f:
json.dump(entries, f, indent=2)


if __name__ == '__main__':
main()
95 changes: 95 additions & 0 deletions data/japanese-manga-sfx/Japanese Manga SFX - Sheet1 (3).csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
,,,,ラ,,,,,レ,,,,,ワ,,,,,ン,,,,,リ,,,,,ル,,,,,ロ,
,,Japanese,Romaji*,English,Explanation,,Japanese,Romaji*,English,Explanation,,Japanese,Romaji*,English,Explanation,,Japanese,Romaji*,English,Explanation,,Japanese,Romaji*,English,Explanation,,Japanese,Romaji*,English,Explanation,,Japanese,Romaji*,English,Explanation
,,"ラララ,",ra ra ra,*la di da*,SFX for skipping and humming a happy tune. ,,"レロレロ,",rero rero,*bleh bleh*,(1) Rolling something with/on the tongue ,,"ワ,",wa,*wow!* *ack!* *whoa!*,More »,,"ンッ,",n,*grunt*,"As of surprise, effort, sleepiness, pain, or passion.",,,,,,,,,,,,,,,
,,ららら,,*la la la*,,,れろれろ,,*roll roll*,,,わ,,,,,んっ,,,Usually in context of exerting physical effort. ,,,,,,,,,,,,,,,
,,,,,More »,,,,,More »,,"ワ,",wa,(1) *boo!*,More »,,,,,,,,,,,,,,,,,,,,
,,"ラブラブ,",rabu rabu,*romantic*,"SFX that literally derives from the Japanese pronunciation of ""love-love"" as ""rah-bu rah-bu."" ",,,,,,,わ,,(2) *ack!*,,,,,,More »,,,,,,,,,,,,,,,
,,らぶらぶ,,*love love*,,,,,,,,"ワッ,",wa,*wah!* *yay!* *yeeahh!* *cheer* *woot!*,"SFX for a cheer, or a happy atmosphere. ",,"ン?,",n,*hey?*,"Grunt, usually coming out in the form of a question. ",,,,,,,,,,,,,,,
,,,,,More »,,,,,,,わっ,,,,,ん?,,*what?*,,,,,,,,,,,,,,,,
,,"ラクラク,",raku raku,*too easy*,"SFX derived from the adjective ""easy"" or 「楽な」. ",,,,,,,,,,More »,,,,*hm?*,More »,,,,,,,,,,,,,,,
,,らくらく,,,,,,,,,,"ワー,",wa-,(1) *roar*;,(1) (A crowd's) excited roar. See also *wai wai* and *wara wara*;,,,,*hmm?*,,,,,,,,,,,,,,,,
,,,,,More »,,,,,,,わー,,(2) *wow*;,(4) SFX for cheering or SFX for cry of dismay or despair. ,,"ン,",n,(1) *uhn*; ,"Crying out, usually when exerting physical effort. ",,,,,,,,,,,,,,,
,,"ランッ,",ran,(1) *beam* *intensely*,"(1) SFX for sudden attention in one's gaze, like snapping to attention dramatically (can be used for comedic effect) ",,,,,,,,,(3) *yeaahh* *cheer* *applause*;,,,ん,,(2) *ughn*,,,,,,,,,,,,,,,,
,,らんっ,,,,,,,,,,,,(4) *waaah*,More »,,,,,More »,,,,,,,,,,,,,,,
,,,,,More »,,,,,,,"ワーッ,",wa-,*wow* *yay* *delight*,More »,,"ンボーッ,",nbo-,*spaced out* *daydreaming* *dazed*,"Kind of a slow reaction state, when one is very tired or groggy from lack of sleep or exhaustion. (Even the SFX is exhausted with the ""n"" sound before the *bo-* SFX.) See also *bo-*. ",,,,,,,,,,,,,,,
,,"ランラン,",ran ran,*la la*,SFX for skipping and humming a happy tune. ,,,,,,,わーっ,,,,,んぼーっ,,,,,,,,,,,,,,,,,,
,,らんらん,,,,,,,,,,"ワーイ,",wa-i,*yay* *woohoo* *yes* *yeah*,"Positive cheering, expression of cheerfulness or happiness. ",,,,,More »,,,,,,,,,,,,,,,
,,,,,More »,,,,,,,わーい,,,,,"ンチュ,",nchu,*kiss*,"See also *buchu*, *chu*, and *uchu*. ",,,,,,,,,,,,,,,
,,"ランラン,",ranran,*intense* *fiery*,"SFX for an eye expression, for intense, fiery eyes. ",,,,,,,,,,More »,,んちゅ,,,,,,,,,,,,,,,,,,
,,らんらん,,,,,,,,,,"ワーンッ,",wa-n,(1) *bawling* *waahh*;,"(1) SFX for a long, wailing cry of despair;",,,,,More »,,,,,,,,,,,,,,,
,,,,,More »,,,,,,,わーんっ,,(2) *waahh* *grrr*,(2) SFX for a loud cry of outrage or unhappiness with something ,,"ンゴーンガー,",ngo- nga-,*hrrm mrmm*,"SFX for unintelligible muttering or murmuring sounds, such as the sounds made when one is tossing and turning in one's sleep. ",,,,,,,,,,,,,,,
,,"ララリ,",rarari,*ching ching*,"SFX for a light tinkling, ringing, bell-like, chime-like sound. Can be mechanical or digital, like from a video game. ",,,,,,,,,,,,んごーんがー,,*mph mmm*,,,,,,,,,,,,,,,,
,,ららり,,*cha-ching ching*,,,,,,,,,,,More »,,,,,More »,,,,,,,,,,,,,,,
,,,,,More »,,,,,,,"ワーンワーン,",wa-n wa-n,*waah waah*,SFX for crying loudly and hysterically. ,,"ンク,",nku,*sniff* ,"See also *funka*, *hunka*, and *kunka*. ",,,,,,,,,,,,,,,
,,,,,,,,,,,,わーんわーん,,*booh hoo*,,,んく,,*inhale*,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,*bawling*,More »,,,,,More »,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワーワー,",wa-wa-,(1) *roar*;,(1) (A crowd's) excited roar. See also *wai wai* and *wara wara*;,,"ンマッ,",nma,*geh* *gah*,SFX for shock. ,,,,,,,,,,,,,,,
,,,,,,,,,,,,わーわー,,(2) *chatter* *rustle*,(2) SFX for background noise of pedestrians on the street. ,,んまっ,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,"ンモ,",nmo,*munch* ,SFX for eating with mouth closed. ,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワア,",waa,(1) *wow*;,"(2) Like the war cries of soldiers in battle, or cry of protest. ",,んも,,*mmm munch*,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わあ,,(2) *roar* *yaahh!*;,,,,,,More »,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,(3) *cheer* *applause*,More »,,"ンン,",nn,(1) *grunt* *urgh* *ugh* *argh* *arg* *umph* *mmph* *nn*;,"(1) SFX for a grunt of effort, stress, or pain;",,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワッフン,",waffun,*grunt*,Light SFX...strange grunt. ,,んん,,(2) *hm?*,(2) Can be used as a question 「んん?」 ,,,,,,,,,,,,,,,
,,,,,,,,,,,,わっふん,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,More »,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワフ,",wafu,(1) *grunt*;,(1) Light SFX...strange grunt;,,"ンヌー,",nnu-,*menacing*,More »,,,,,,,,,,,,,,,
,,,,,,,,,,,,わふ,,(2) *woof*,"(2) SFX for dog ""woof-ing"", see also *Wafu wafu*. ",,んぬー,,*scary*,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,*rawr*,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,"ンショ,",nsho,(1) *grunt*;,Sound made when exerting physical effort or strain. ,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワフワフ,",wafu wafu,(1) *grunt grunt*,(1) Light SFX...strange grunt;,,んしょ,,(2) oof! umph!,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わふわふ,,*mumble mumble*;,"(2) SFX for dog ""woof-ing"", see also *Wafu*. ",,,,,Same as *Yoisho*; see also *nisho* and *unsho*. ,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,(2) *woof woof*,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,More »,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワフーン,",wafu-n,*whine*,"High-pitched, sad whine of a dog. ",,"ンッショ,",nsho,*grunt*,Sound made when exerting physical effort.,,,,,,,,,,,,,,,
,,,,,,,,,,,,わふーん,,,,,んっしょ,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,More »,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワハハ,",wahaha,*hahaha*,"Full-on, loud laughter.",,"ンショット,",nshotto,(1) *effort*; ,"(1) See also *nisho*, *unsho*, and *yoisho*;",,,,,,,,,,,,,,,
,,,,,,,,,,,,わはは,,*bwahaha*,,,んしょっと,,(2) oof! umph!,"(2) Effort, strain. ",,,,,,,,,,,,,,,
,,,,,,,,,,,,,,*ahaha*,See also *hahaha* (ハハハ). ,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワイ,",wai,*yay!* *delight*,"Feminine exclamation of delight, spoken by a character. ",,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わい,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワイワイ,",wai wai,*chatter chatter*,"To explain lively action/noise, like *Gaya gaya* ",,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わいわい,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワク,",waku,*excitement* *happy*,Cute anticipation. ,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わく,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワクワク,",waku waku,*excited* ,"Happy, cute anticipation.",,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わくわく,,*nervous* ,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,*trembling*,See also *baku baku*. ,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワン!,",wan,*woof!*,SFX for the call of a dog or wolf. ,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わん!,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワンワン,",wan wan,*bow-wow*,SFX for the sound of a dog calling. ,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わんわん,,*woof woof*,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,*wuff wuff*,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,*ruff ruff*,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワナワナ,",wana wana,*tremble tremble* ,Trembling from fear or anger; trembling all over. ,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わなわな,,*shake shake*,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワラワラ,",wara wara,*chatter chatter*,"Crowded people, usually small kids. See also *waa* and *wai wai*. ",,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わらわら,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワサワサ,",wasa wasa,*rustle rustle*,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わさわさ,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワッシ,",wasshi,*one-two*,SFX for the sound one makes while motivating oneself to use physical effort or to exert oneself physically.,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わっし,,*one-and-a-two*,"Ex: Counting out ""one-two-three, and a one-two-three"" while doing jumping jacks. ",,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,*one-two-three*,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,*hee haa*,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,*huff huff*,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワタワタ,",wata wata,(1) *panic*;,(1) Moving busily;,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わたわた,,(2) *flap flap*,"(2) SFX for arms flailing around, trying to cover a mistake. ",,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワワ,",wawa,*whoa*,"In a ""what the heck"" way. ",,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わわ,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,"ワザワザ,",waza waza,*expressly* *intentionally*,To go through the trouble of doing something. ,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,わざわざ,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,More »,,,,,,,,,,,,,,,,,,,,
Loading

0 comments on commit c259147

Please sign in to comment.