Skip to content

Commit d5e8abf

Browse files
committed
optimized __llmul_b and strtoll
1 parent 61ddf63 commit d5e8abf

File tree

2 files changed

+49
-82
lines changed

2 files changed

+49
-82
lines changed

src/crt/llmulu_b.src

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ __llmulu_b:
88
; Multiplies BC:UDE:UHL by (SP) and returns the 64-bit unsigned product bc:ude:uhl.
99
; I: (SP) = 8-bit multiplier, BC:UDE:UHL = multiplicand, ADL=1
1010
; O: bc:ude:uhl = BC:UDE:UHL * (SP)
11-
; CC: 101*r(PC)+21*r(SPL)+18*w(SPL)+33
12-
; CC: 100 bytes | 101F + 21R + 18W + 33
11+
; CC: 99*r(PC)+20*r(SPL)+18*w(SPL)+33
12+
; CC: 98 bytes | 99F + 20R + 18W + 33
1313
push iy
1414
ld iy, 0
1515
add iy, sp
@@ -18,11 +18,11 @@ __llmulu_b:
1818
push bc
1919
ld b, (iy + 6)
2020
ld c, 0
21+
ld e, h ; ld e, (iy - 5)
2122
ld h, b
2223
mlt hl
2324
ld (iy - 6), l
2425
ld d, b
25-
ld e, (iy - 5)
2626
mlt de
2727
ld l, h
2828
ld h, c

src/libc/strtoll.src

Lines changed: 46 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,19 @@ _strtoll:
3232
.out_of_range:
3333
.overflow:
3434
ld b, $80
35-
ld de, 0
36-
ld c, e
3735
ld hl, 5 ; ERANGE
36+
ld c, h
3837
ld (_errno), hl
3938
ld l, h ; ld hl, 0
39+
push hl
40+
pop de
41+
; BC:UDE:UHL = LLONG_MIN
4042
ret z ; underflow
4143
; overflow
42-
dec bc
4344
dec hl
4445
dec de
46+
dec bc
47+
; BC:UDE:UHL = LLONG_MAX
4548
ret
4649

4750
;-------------------------------------------------------------------------------
@@ -60,14 +63,15 @@ _strtoull:
6063
jp __llneg
6164

6265
.out_of_range:
63-
sbc hl, hl
64-
ex de, hl
6566
ld hl, 5 ; ERANGE
6667
ld (_errno), hl
67-
ld l, h
68+
ld l, h ; ld hl, 0
6869
dec hl
69-
ld b, e
70-
ld c, e
70+
push hl
71+
pop de
72+
ld b, l
73+
ld c, l
74+
; BC:UDE:UHL = ULLONG_MAX
7175
ret
7276

7377
;-------------------------------------------------------------------------------
@@ -214,15 +218,15 @@ __strtoll_common:
214218
; Setting B (base) to zero ensures that cp a, b will never set carry.
215219
; forcing the function to return.
216220
push af
217-
; sets E:UHL to zero
221+
; sets BC:UDE:UHL to zero
218222
jr .invalid_base_hijack
219223
;-------------------------------------------------------------------------------
220224
; CC per non-decimal digit:
221225
; minimum : 100F + 20R + 18W + 35
222226
; low average : 102F + 20R + 18W + 36
223227
; high average : 112F + 20R + 18W + 37 ; an over-estimate of the average CC
224228
; maximum : 127F + 20R + 18W + 40
225-
; overflow max : 131F + 21R + 19W + 42
229+
; overflow max : 133F + 24R + 22W + 42
226230
.check_decimal:
227231
cp a, (ix - 1)
228232
jr nc, .end_loop
@@ -267,50 +271,57 @@ __llmul_add_b_overflow:
267271
; bit 0, (ix - 3) is set if overflow has occured
268272
.__llmulu_b_overflow:
269273
; inlined/modified __llmulu_b
270-
; CC if no overflow: 70F + 15R + 15W + 32
271-
push de ; (ix - 9)
272-
push hl ; (ix - 12)
274+
; CC no overflow : 69F + 16R + 15W + 33
275+
; CC overflow max : 73F + 17R + 16W + 34
276+
push hl ; (ix - 9)
277+
push de ; (ix - 12)
273278
push bc ; (ix - 15)
274279
ld b, (ix - 1)
275-
ld c, 0
280+
ld c, e ; ld c, (ix - 12)
281+
ld e, h ; ld e, (ix - 8)
276282
ld h, b
277283
mlt hl
278-
ld (ix - 12), l
284+
285+
; (255 * 255) + 255 < 65535 so no 16bit carry can occur
286+
add a, l
287+
ld (ix - 9), a ; L
279288
ld d, b
280-
ld e, (ix - 11)
289+
; ld e, (ix - 8) ; H
281290
mlt de
282291
ld l, h
283-
ld h, c
284-
add hl, de
285-
ld (ix - 11), l
292+
ld h, 0
293+
adc hl, de ; handles carry from adding A
294+
xor a, a
295+
296+
ld (ix - 8), l ; H
286297
ld d, b
287-
ld e, (ix - 10)
298+
ld e, (ix - 7) ; UHL
288299
mlt de
289300
ld l, h
290-
ld h, c
301+
ld h, a
291302
add hl, de
292-
ld (ix - 10), l
303+
ld (ix - 7), l ; UHL
293304
ld d, b
294-
ld e, (ix - 9)
305+
ld e, c ; ld e, (ix - 12) ; E
295306
mlt de
296307
ld l, h
297-
ld h, c
308+
ld h, a
298309
add hl, de
299-
ld (ix - 9), l
310+
ld (ix - 12), l ; E
300311
ld d, b
301-
ld e, (ix - 8)
312+
ld e, (ix - 11) ; D
302313
mlt de
303314
ld l, h
304-
ld h, c
315+
ld h, a
305316
add hl, de
306-
ld (ix - 8), l
317+
ld (ix - 11), l ; D
307318
ld d, b
308-
ld e, (ix - 7)
319+
ld e, (ix - 10) ; UDE
309320
mlt de
310321
ld l, h
311-
ld h, c
322+
ld h, a
312323
add hl, de
313-
ld (ix - 7), l
324+
ld (ix - 10), l ; UDE
314325

315326
pop de
316327
ld l, h
@@ -321,60 +332,16 @@ __llmul_add_b_overflow:
321332
mlt de
322333

323334
add.s hl, de
335+
pop de
324336
jr c, .set_overflow_bit
325-
inc b
326-
djnz .set_overflow_bit
337+
sub a, b ; set carry if B is non-zero
327338
ld b, h
328339
ld c, l
340+
.set_overflow_bit:
329341
pop hl
330-
pop de
331-
.__lladd_b_overflow:
332-
; inlined/modified __lladd_b_fast
333-
; NC L: 3F + 3R + 2 (49.8046875%)
334-
; NC H: 5F + 3R + 3 (49.9992370605%)
335-
; All other cases: (0.196075439453%)
336-
; NC UHL: 15F + 3R + 0W + 4
337-
; NC E: 17F + 3R + 0W + 5
338-
; NC D: 19F + 3R + 0W + 6
339-
; NC UDE: 26F + 3R + 0W + 7
340-
; NC BC: 30F + 3R + 0W + 7
341-
; Overflow: 34F + 4R + 1W + 9
342-
; Average : 5F + 3R + 0W + 3 (Rounded up and assuming no overflow)
343-
add a, l ; a=L+A
344-
ld l, a ; l=L+A
345-
ret nc ; cf=1
346-
inc h ; h=H+1
347-
ret nz ; h=0
348-
dec h
349-
ld l, h ; uhl=(HLU<<16)+0xFFFF
350-
inc hl ; uhl=HLU+1<<16
351-
add hl, bc
352-
or a, a
353-
sbc hl, bc
354-
ld l, a ; uhl=(HLU+1<<16)+(L+A&0xFF)
355-
ret nz ; uhl=L+A&0xFF, cf=0
356-
inc e ; e=E+1
357-
ret nz ; e=0
358-
inc d ; d=D+1
359-
ret nz ; d=0
360-
dec d
361-
ld e, d ; ude=(DEU<<16)+0xFFFF
362-
inc de ; ude=DEU+1<<16
363-
sbc hl, de
364-
add hl, de
365-
ret c ; ude=0
366-
inc bc ; ubc=UBC+1
367-
; test for overflow
368-
ld a, b
369-
or a, c
370-
ret nz
371-
.finish_overflow:
342+
ret nc
372343
set 0, (ix - 3) ; set carry
373344
ret
374-
.set_overflow_bit:
375-
pop hl
376-
pop de
377-
jr .finish_overflow
378345

379346
extern _errno
380347
extern __llneg

0 commit comments

Comments
 (0)