-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathearpatch.py
executable file
·170 lines (149 loc) · 6.31 KB
/
earpatch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/python3
import ctypes
import json
import random
import struct
import wave
import snxrom
import click
def sizeof(obj):
return len(memoryview(obj).cast('B'))
def getFirstAU(content, assetTablePointers):
for i, (o, e) in enumerate(zip(assetTablePointers, assetTablePointers[1:] + [len(content)])):
asset_type = struct.unpack('<H', content[o:o+2])[0]
if asset_type == snxrom.AU:
return i, memoryview(content[o:e])
raise ValueError("No AU chunk found in file")
# On my bear, code 2 goes past "mouth open" and starts to close again,
# so I'm calling that "middle".
CLOSED, OPEN, MIDDLE = range(3)
# To go from state X to state Y, start LATENCY[X][Y]ms earlier than what the
# rhubarb data says
LATENCY = {
CLOSED: {
CLOSED: 0, OPEN: 200, MIDDLE: 200,
},
OPEN: {
CLOSED: 200, OPEN: 0, MIDDLE: 0,
},
MIDDLE: {
CLOSED: 200, OPEN: 150, MIDDLE: 0,
}
}
rhubarb_mouth_map = {
'A': CLOSED,
'F': CLOSED,
'X': CLOSED,
'B': MIDDLE,
'C': MIDDLE,
'G': MIDDLE,
'H': MIDDLE,
'D': OPEN,
'E': OPEN,
}
def rhubarb_to_timestamp(rhubarb_json):
last = 0
result = [(0, CLOSED)]
old_value = CLOSED
for row in rhubarb_json['mouthCues']:
start = round(row['start'] * 1000)
value = rhubarb_mouth_map[row['value']]
if value != old_value:
start = max(last, start - LATENCY[old_value][value])
result.append((start, value))
old_value = value
return result
def random_eye_timestamp(duration_ms, random_eyes_median, random_eyes_std_dev):
now = 0
result = [(0, random.randint(12,14))] # Start with an animation
while now < duration_ms:
delta = random.gauss(random_eyes_median, random_eyes_std_dev) * 1000
now += int(max(0, delta))
result.append((now, random.randint(12, 14))) # Assume these are the valid animation numbers (true for Intro.bin)
return result
def timestamp_to_delay(seq):
last = 0
result = []
for now, action in seq:
result.append((now-last, action))
last = now
return result
@click.command
@click.option("--au", type=click.File('rb'), default=None, help="Previously converted sound file with 'AU' magic header")
@click.option("--wav", type=wave.open, default=None, help="Wave file, 16-bit mono at 16kHz")
@click.option("--rhubarb-json", type=click.File('r'), default=None, help="Rhubarb json file for mouth positions")
@click.option("--no-mouth", default=False, is_flag=True, help="Just keep your mouth shut")
@click.option("--random-eyes/--no-random-eyes", default=True, is_flag=True, help="Use random eye animations (default: None)")
@click.option("--random-eyes-median", default=30., help="The median time between eye animations")
@click.option("--random-eyes-std-dev", default=6., help="The standard deviation of the animation time")
@click.argument("input-file", type=click.File('rb'))
@click.argument("output-file", type=click.File('wb'))
def earpatch(au, wav, rhubarb_json, no_mouth, random_eyes, random_eyes_median, random_eyes_std_dev, input_file, output_file):
content = bytearray(input_file.read())
file_header, assetTablePointers = snxrom.parseHeaderAndPointers(content)
AUIdx, au_chunk = getFirstAU(content, assetTablePointers)
AUoffset = assetTablePointers[AUIdx]
file_header.assetTableLength = 1 + AUIdx # Remove any following items from asset table (assumed to be audio chunks)
if au is not None:
au_chunk = bytearray(au.read())
au_header = snxrom.AudioHeader.from_buffer(au_chunk)
au_payload = au_chunk[au_header.headerSize*2:(au_header.headerSize+au_header.sizeOfAudioBinary)*2]
print(au_header, au_header.padding)
duration_ms = round(au_header.sizeOfAudioBinary * 16 * 100 / au_header.bitRate)
elif wav is not None:
import g722_1_mod as m
wav_params = wav.getparams()
assert wav_params.framerate in (16000, 32000)
assert wav_params.sampwidth == 2
assert wav_params.nchannels == 1
nframes = wav.getnframes()
duration_ms = nframes * 1000 / wav_params.framerate
samples = wav.readframes(nframes)
if wav_params.framerate >= 24000:
bitRate = 3200
inchunksize = 640
outchunksize = 80
else:
bitRate = 1600
inchunksize = 320
outchunksize = 40
au_payload = m.encode(samples, inchunksize, outchunksize)
print(f"{duration_ms=}")
au_header = snxrom.AudioHeader(snxrom.AU, sampleRate=wav_params.framerate, bitRate=bitRate, channels=1, totalAudioFrames=len(au_payload) // outchunksize, sizeOfAudioBinary = len(au_payload) // 2, markFlag=True, silenceFlag=False, headerSize=16, audio32Type=0xffff, padding=0xffff)
if rhubarb_json is not None:
rhubarb_timings = json.load(rhubarb_json)
elif no_mouth:
rhubarb_timings = {'mouth_cues': []}
else:
rhubarb_timings = None
if rhubarb_timings is not None:
mouth_timings = rhubarb_to_timestamp(rhubarb_timings)
print(f"{mouth_timings=}")
if random_eyes:
eye_timings = random_eye_timestamp(duration_ms, random_eyes_median, random_eyes_std_dev)
else:
eye_timings = []
print(f"{eye_timings=}")
mark_timings = timestamp_to_delay(sorted(mouth_timings + eye_timings))
print(f"{mark_timings=}")
mark_data = snxrom.encodeMarkTable(mark_timings)
print(f"{mark_data=}")
else:
mark_data = au_chunk[32:au_header.headerSize*2]
au_header.markFlag = bool(mark_data)
au_header.headerSize = (sizeof(au_header) + sizeof(mark_data)) // 2 # in units of uint16
newChunk = bytearray()
newChunk.extend(memoryview(au_header).cast('B'))
newChunk.extend(memoryview(mark_data).cast('B'))
newChunk.extend(memoryview(au_payload).cast('B'))
print(f"Old chunk length {len(au_chunk)}")
print(f"New chunk length {len(newChunk)}")
content = bytearray(content[:AUoffset])
content.extend(memoryview(au_header).cast('B'))
content.extend(memoryview(mark_data).cast('B'))
content.extend(memoryview(au_payload).cast('B'))
if len(content) % 512 != 0:
content.extend(b'\xff' * (512 - len(content) % 512))
output_file.write(content)
if __name__ == '__main__':
earpatch()