Skip to content

Commit

Permalink
feat(stdlib): Add Bytes.setChar and Bytes.getChar (#2215)
Browse files Browse the repository at this point in the history
Co-authored-by: Oscar Spencer <[email protected]>
  • Loading branch information
spotandjake and ospencer authored Jan 2, 2025
1 parent 3134504 commit 4919ba3
Show file tree
Hide file tree
Showing 11 changed files with 516 additions and 457 deletions.
19 changes: 19 additions & 0 deletions compiler/test/stdlib/bytes.test.gr
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,25 @@ assert Bytes.length(Bytes.empty) == 0
let bytes = Bytes.make(64)
assert Bytes.length(bytes) == 64

// Bytes.getChar
let bytes = Bytes.fromString("ab©✨🍞")
assert Bytes.getChar(0, bytes) == 'a'
assert Bytes.getChar(1, bytes) == 'b'
assert Bytes.getChar(2, bytes) == '©'
assert Bytes.getChar(4, bytes) == '✨'
assert Bytes.getChar(7, bytes) == '🍞'

// Bytes.setChar
let bytes = Bytes.make(16)
Bytes.setChar(0, 'a', bytes)
assert Bytes.getChar(0, bytes) == 'a'
Bytes.setChar(1, '©', bytes)
assert Bytes.getChar(1, bytes) == '©'
Bytes.setChar(3, '✨', bytes)
assert Bytes.getChar(3, bytes) == '✨'
Bytes.setChar(7, '🍞', bytes)
assert Bytes.getChar(7, bytes) == '🍞'

// Bytes.setInt8, Bytes.setUint8, Bytes.getInt8, Bytes.getUint8
let bytes = Bytes.make(1)
Bytes.setInt8(0, 0xffs, bytes)
Expand Down
50 changes: 8 additions & 42 deletions stdlib/buffer.gr
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ from "runtime/unsafe/wasmi32" include WasmI32
from "runtime/unsafe/conv" include Conv
from "runtime/exception" include Exception
from "runtime/dataStructures" include DataStructures
use DataStructures.{ untagChar }
use DataStructures.{ untagChar, tagSimpleNumber }
from "int32" include Int32
from "bytes" include Bytes
from "string" include String
from "char" include Char
from "runtime/numbers" include Numbers
use Numbers.{ coerceNumberToWasmI32 }
from "runtime/utf8" include Utf8
use Utf8.{ usvEncodeLength }

abstract record Buffer {
mut len: Number,
Expand Down Expand Up @@ -389,48 +391,12 @@ provide let addString = (string, buffer) => {
*/
@unsafe
provide let addChar = (char, buffer) => {
use WasmI32.{ (-), (*), (&), (|), (>>>), ltU as (<), gtU as (>), leU as (<=) }
let usv = untagChar(char)

let bytelen = if (usv < 0x80n) {
autogrow(1, buffer)
use WasmI32.{ (+) }
let off = coerceNumberToWasmI32(buffer.len)
let dst = WasmI32.fromGrain(buffer.data) + _VALUE_OFFSET
WasmI32.store8(dst, usv, off)
1
} else {
let mut count = 0n
let mut bytelen = 0
let mut offset = 0n
if (usv <= 0x07FFn) {
count = 1n
bytelen = 2
offset = 0xC0n
} else if (usv <= 0xFFFFn) {
count = 2n
bytelen = 3
offset = 0xE0n
} else {
count = 3n
bytelen = 4
offset = 0xF0n
}
use WasmI32.{ (+) }
autogrow(bytelen, buffer)
let off = coerceNumberToWasmI32(buffer.len)
let dst = WasmI32.fromGrain(buffer.data) + _VALUE_OFFSET
WasmI32.store8(dst, (usv >>> (6n * count)) + offset, off)
let mut n = 0n
while (count > 0n) {
n += 1n
let temp = usv >>> (6n * (count - 1n))
WasmI32.store8(dst + n, 0x80n | temp & 0x3Fn, off)
count -= 1n
}
bytelen
}
buffer.len += bytelen
let byteCount = tagSimpleNumber(usvEncodeLength(usv))
autogrow(byteCount, buffer)
let index = buffer.len
buffer.len += byteCount
Bytes.setChar(index, char, buffer.data)
}

/**
Expand Down
76 changes: 75 additions & 1 deletion stdlib/bytes.gr
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ from "runtime/unsafe/wasmf64" include WasmF64
from "runtime/unsafe/conv" include Conv
from "runtime/dataStructures" include DataStructures
use DataStructures.{
tagChar,
tagInt8,
tagUint8,
tagInt16,
tagUint16,
untagChar,
untagInt8,
untagUint8,
untagInt16,
Expand All @@ -33,6 +35,14 @@ from "runtime/exception" include Exception
from "int32" include Int32
from "runtime/numbers" include Numbers
use Numbers.{ coerceNumberToWasmI32 }
from "runtime/utf8" include Utf8
use Utf8.{
utf8ByteCount,
getCodePoint,
usvEncodeLength,
writeUtf8CodePoint,
exception MalformedUnicode,
}

@unsafe
let _SIZE_OFFSET = 4n
Expand Down Expand Up @@ -396,6 +406,70 @@ provide let clear = (bytes: Bytes) => {
ignore(bytes)
}

/**
* Gets the UTF-8 encoded character at the given byte index.
*
* @param index: The byte index to access
* @param bytes: The byte sequence to access
* @returns The character that starts at the given index
*
* @throws IndexOutOfBounds: When `index` is negative
* @throws MalformedUnicode: When the requested character is not a valid UTF-8 sequence
*
* @example
* let bytes = Bytes.fromString("Hello")
* assert Bytes.getChar(0, bytes) == 'H'
*
* @since v0.7.0
*/
@unsafe
provide let getChar = (index: Number, bytes: Bytes) => {
// result
use WasmI32.{ (+), (&), (+), (==), (>) }
let ptr = WasmI32.fromGrain(bytes)
let size = getSize(ptr)
let offset = coerceNumberToWasmI32(index)
checkIndexIsInBounds(offset, 1n, size)
let byte = WasmI32.load8U(ptr + offset, _VALUE_OFFSET)
let charSize = utf8ByteCount(byte)
if (offset + charSize > size) {
throw MalformedUnicode
}
let codePoint = getCodePoint(ptr + offset + _VALUE_OFFSET)
ignore(bytes)
tagChar(codePoint)
}

/**
* UTF-8 encodes a character starting at the given byte index.
*
* @param index: The byte index to update
* @param value: The value to set
* @param bytes: The byte sequence to mutate
*
* @throws IndexOutOfBounds: When `index` is negative
* @throws IndexOutOfBounds: When `index + charSize` is greater than the bytes size, `charSize` is the number of bytes in the character ranging from 1 to 4
*
* @example
* let bytes = Bytes.make(1)
* Bytes.setChar(0, 'a', bytes)
* assert Bytes.getChar(0, bytes) == 'a'
*
* @since v0.7.0
*/
@unsafe
provide let setChar = (index: Number, value: Char, bytes: Bytes) => {
use WasmI32.{ (+) }
let ptr = WasmI32.fromGrain(bytes)
let size = getSize(ptr)
let offset = coerceNumberToWasmI32(index)
let usv = untagChar(value)
let charSize = usvEncodeLength(usv)
checkIndexIsInBounds(offset, charSize, size)
writeUtf8CodePoint(ptr + offset + _VALUE_OFFSET, usv)
ignore(bytes)
}

/**
* Gets a signed 8-bit integer starting at the given byte index.
*
Expand Down Expand Up @@ -452,8 +526,8 @@ provide let setInt8 = (index: Number, value: Int8, bytes: Bytes) => {
let offset = coerceNumberToWasmI32(index)
checkIndexIsInBounds(offset, _INT8_BYTE_SIZE, size)
let v = untagInt8(value)
ignore(bytes)
WasmI32.store8(ptr + offset, v, _VALUE_OFFSET)
ignore(bytes)
}

/**
Expand Down
79 changes: 79 additions & 0 deletions stdlib/bytes.md
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,85 @@ Bytes.clear(bytes)
assert bytes == b"\x00\x00\x00\x00\x00"
```

### Bytes.**getChar**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
getChar : (index: Number, bytes: Bytes) => Char
```

Gets the UTF-8 encoded character at the given byte index.

Parameters:

|param|type|description|
|-----|----|-----------|
|`index`|`Number`|The byte index to access|
|`bytes`|`Bytes`|The byte sequence to access|

Returns:

|type|description|
|----|-----------|
|`Char`|The character that starts at the given index|

Throws:

`IndexOutOfBounds`

* When `index` is negative

`MalformedUnicode`

* When the requested character is not a valid UTF-8 sequence

Examples:

```grain
let bytes = Bytes.fromString("Hello")
assert Bytes.getChar(0, bytes) == 'H'
```

### Bytes.**setChar**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
setChar : (index: Number, value: Char, bytes: Bytes) => Void
```

UTF-8 encodes a character starting at the given byte index.

Parameters:

|param|type|description|
|-----|----|-----------|
|`index`|`Number`|The byte index to update|
|`value`|`Char`|The value to set|
|`bytes`|`Bytes`|The byte sequence to mutate|

Throws:

`IndexOutOfBounds`

* When `index` is negative
* When `index + charSize` is greater than the bytes size, `charSize` is the number of bytes in the character ranging from 1 to 4

Examples:

```grain
let bytes = Bytes.make(1)
Bytes.setChar(0, 'a', bytes)
assert Bytes.getChar(0, bytes) == 'a'
```

### Bytes.**getInt8**

<details>
Expand Down
54 changes: 7 additions & 47 deletions stdlib/char.gr
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ module Char
from "runtime/unsafe/wasmi32" include WasmI32
from "runtime/dataStructures" include DataStructures
use DataStructures.{ tagSimpleNumber, tagChar, untagChar, allocateString }

exception MalformedUtf8
from "runtime/utf8" include Utf8
use Utf8.{ usvEncodeLength, writeUtf8CodePoint }

/**
* The minimum valid Unicode scalar value.
Expand Down Expand Up @@ -164,52 +164,12 @@ provide let pred = char => {
*/
@unsafe
provide let toString = (char: Char) => {
use WasmI32.{
(+),
(-),
(*),
(&),
(|),
(>>>),
ltU as (<),
gtU as (>),
leU as (<=),
}

use WasmI32.{ (+) }
let usv = untagChar(char)

let result = if (usv < 0x80n) {
let string = allocateString(1n)
WasmI32.store8(string, usv, 8n)
WasmI32.toGrain(string): String
} else {
let mut count = 0n
let mut offset = 0n
if (usv <= 0x07FFn) {
count = 1n
offset = 0xC0n
} else if (usv <= 0xFFFFn) {
count = 2n
offset = 0xE0n
} else {
count = 3n
offset = 0xF0n
}
let string = allocateString(count + 1n)
WasmI32.store8(string, (usv >>> (6n * count)) + offset, 8n)

let mut n = 0n
while (count > 0n) {
n += 1n
let temp = usv >>> (6n * (count - 1n))
WasmI32.store8(string + n, 0x80n | temp & 0x3Fn, 8n)
count -= 1n
}

WasmI32.toGrain(string): String
}

result
let byteCount = usvEncodeLength(usv)
let string = allocateString(byteCount)
writeUtf8CodePoint(string + 8n, usv)
WasmI32.toGrain(string): String
}

/**
Expand Down
Loading

0 comments on commit 4919ba3

Please sign in to comment.