Skip to content

Commit 591fec2

Browse files
authored
[stdlib][SR-9438] Re-implement integer-to-string conversion (redux) (#85180)
Inspired by #84826, I've dusted off and completely reworked a native implementation of integer-to-string conversion. Besides existing tests in this repository, the core of the implementation has been comprehensively tested in a separate package for all bases between 2–36 to demonstrate identical output for all 8-bit and 16-bit values, and for randomly generated 32-bit, 64-bit, and 128-bit values. Resolves #51902. <!-- If this pull request is targeting a release branch, please fill out the following form: https://github.com/swiftlang/.github/blob/main/PULL_REQUEST_TEMPLATE/release.md?plain=1 Otherwise, replace this comment with a description of your changes and rationale. Provide links to external references/discussions if appropriate. If this pull request resolves any GitHub issues, link them like so: Resolves <link to issue>, resolves <link to another issue>. For more information about linking a pull request to an issue, see: https://docs.github.com/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue --> <!-- Before merging this pull request, you must run the Swift continuous integration tests. For information about triggering CI builds via @swift-ci, see: https://github.com/apple/swift/blob/main/docs/ContinuousIntegration.md#swift-ci Thank you for your contribution to Swift! -->
1 parent 46b41a3 commit 591fec2

File tree

4 files changed

+330
-344
lines changed

4 files changed

+330
-344
lines changed

stdlib/public/core/Integers.swift

Lines changed: 325 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,64 +1377,341 @@ extension BinaryInteger {
13771377
//===--- CustomStringConvertible conformance ------------------------------===//
13781378
//===----------------------------------------------------------------------===//
13791379

1380-
extension BinaryInteger {
1381-
internal func _description(radix: Int, uppercase: Bool) -> String {
1382-
_precondition(2...36 ~= radix, "Radix must be between 2 and 36")
1383-
1384-
if bitWidth <= 64 {
1385-
let radix_ = Int64(radix)
1386-
return Self.isSigned
1387-
? _int64ToString(
1388-
Int64(truncatingIfNeeded: self), radix: radix_, uppercase: uppercase)
1389-
: _uint64ToString(
1390-
UInt64(truncatingIfNeeded: self), radix: radix_, uppercase: uppercase)
1380+
/// This internal function does not validate `radix` or the size of `buffer`.
1381+
/// It is an unsafe operation.
1382+
///
1383+
/// The behavior is undefined if `radix` is not between `2` and `36` (inclusive)
1384+
/// or if there are insufficient bytes in `buffer` for the output.
1385+
@_specialize(where T == UInt64)
1386+
@_specialize(where T == Int64)
1387+
@unsafe
1388+
internal func _BinaryIntegerToASCII<T: BinaryInteger>(
1389+
negative: Bool,
1390+
magnitude: T.Magnitude,
1391+
radix: T,
1392+
uppercase: Bool,
1393+
buffer utf8Buffer: inout MutableSpan<UTF8.CodeUnit>
1394+
) -> Range<Int> {
1395+
var value = magnitude
1396+
let radix = radix.magnitude
1397+
1398+
// We need a `MutableRawSpan` to use wide store/load operations.
1399+
var buffer = utf8Buffer.mutableBytes
1400+
var offset = buffer.byteCount
1401+
1402+
if value == (0 as T.Magnitude) {
1403+
unsafe buffer.storeBytes(
1404+
of: 0x30 /* "0" */,
1405+
toUncheckedByteOffset: 0,
1406+
as: UInt8.self)
1407+
// Unlike the C++ implementation, we'll never return "-0".
1408+
return 0..<1
1409+
}
1410+
1411+
if radix == (10 as T.Magnitude) {
1412+
// Look up two digits at once.
1413+
let lookup: _InlineArray<100, (UInt8, UInt8)> = [
1414+
(0x30, 0x30), (0x30, 0x31), (0x30, 0x32), (0x30, 0x33), (0x30, 0x34),
1415+
(0x30, 0x35), (0x30, 0x36), (0x30, 0x37), (0x30, 0x38), (0x30, 0x39),
1416+
(0x31, 0x30), (0x31, 0x31), (0x31, 0x32), (0x31, 0x33), (0x31, 0x34),
1417+
(0x31, 0x35), (0x31, 0x36), (0x31, 0x37), (0x31, 0x38), (0x31, 0x39),
1418+
(0x32, 0x30), (0x32, 0x31), (0x32, 0x32), (0x32, 0x33), (0x32, 0x34),
1419+
(0x32, 0x35), (0x32, 0x36), (0x32, 0x37), (0x32, 0x38), (0x32, 0x39),
1420+
(0x33, 0x30), (0x33, 0x31), (0x33, 0x32), (0x33, 0x33), (0x33, 0x34),
1421+
(0x33, 0x35), (0x33, 0x36), (0x33, 0x37), (0x33, 0x38), (0x33, 0x39),
1422+
(0x34, 0x30), (0x34, 0x31), (0x34, 0x32), (0x34, 0x33), (0x34, 0x34),
1423+
(0x34, 0x35), (0x34, 0x36), (0x34, 0x37), (0x34, 0x38), (0x34, 0x39),
1424+
(0x35, 0x30), (0x35, 0x31), (0x35, 0x32), (0x35, 0x33), (0x35, 0x34),
1425+
(0x35, 0x35), (0x35, 0x36), (0x35, 0x37), (0x35, 0x38), (0x35, 0x39),
1426+
(0x36, 0x30), (0x36, 0x31), (0x36, 0x32), (0x36, 0x33), (0x36, 0x34),
1427+
(0x36, 0x35), (0x36, 0x36), (0x36, 0x37), (0x36, 0x38), (0x36, 0x39),
1428+
(0x37, 0x30), (0x37, 0x31), (0x37, 0x32), (0x37, 0x33), (0x37, 0x34),
1429+
(0x37, 0x35), (0x37, 0x36), (0x37, 0x37), (0x37, 0x38), (0x37, 0x39),
1430+
(0x38, 0x30), (0x38, 0x31), (0x38, 0x32), (0x38, 0x33), (0x38, 0x34),
1431+
(0x38, 0x35), (0x38, 0x36), (0x38, 0x37), (0x38, 0x38), (0x38, 0x39),
1432+
(0x39, 0x30), (0x39, 0x31), (0x39, 0x32), (0x39, 0x33), (0x39, 0x34),
1433+
(0x39, 0x35), (0x39, 0x36), (0x39, 0x37), (0x39, 0x38), (0x39, 0x39)
1434+
]
1435+
while value >= (10 as T.Magnitude) {
1436+
offset &-= 2
1437+
unsafe buffer.storeBytes(
1438+
of: lookup[unchecked: Int(truncatingIfNeeded: value % 100)],
1439+
toUncheckedByteOffset: offset,
1440+
as: (UInt8, UInt8).self)
1441+
value /= 100
1442+
}
1443+
if value != (0 as T.Magnitude) {
1444+
offset &-= 1
1445+
unsafe buffer.storeBytes(
1446+
of: UInt8(truncatingIfNeeded: value) | 0x30,
1447+
toUncheckedByteOffset: offset,
1448+
as: UInt8.self)
1449+
}
1450+
} else if radix == (16 as T.Magnitude) {
1451+
let lookup: _InlineArray<16, UInt8> = [
1452+
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
1453+
0x41, 0x42, 0x43, 0x44, 0x45, 0x46
1454+
]
1455+
let adjustment: UInt8 = uppercase ? 0 : 0x20
1456+
1457+
while value != (0 as T.Magnitude) {
1458+
offset &-= 1
1459+
unsafe buffer.storeBytes(
1460+
of: lookup[unchecked: Int(truncatingIfNeeded: value & 0xf)] | adjustment,
1461+
toUncheckedByteOffset: offset,
1462+
as: UInt8.self)
1463+
value >>= 4
1464+
}
1465+
} else if radix == (8 as T.Magnitude) {
1466+
while value != (0 as T.Magnitude) {
1467+
offset &-= 1
1468+
unsafe buffer.storeBytes(
1469+
of: UInt8(truncatingIfNeeded: value & 0x7) | 0x30,
1470+
toUncheckedByteOffset: offset,
1471+
as: UInt8.self)
1472+
value >>= 3
1473+
}
1474+
} else if radix == (2 as T.Magnitude) {
1475+
while value != (0 as T.Magnitude) {
1476+
offset &-= 1
1477+
unsafe buffer.storeBytes(
1478+
of: UInt8(truncatingIfNeeded: value & 1) | 0x30,
1479+
toUncheckedByteOffset: offset,
1480+
as: UInt8.self)
1481+
value >>= 1
13911482
}
1483+
} else {
1484+
let lookup: _InlineArray<36, UInt8> = [
1485+
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
1486+
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a,
1487+
0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54,
1488+
0x55, 0x56, 0x57, 0x58, 0x59, 0x5a
1489+
]
1490+
let adjustment: UInt8 = uppercase ? 0 : 0x20
1491+
1492+
while value != (0 as T.Magnitude) {
1493+
offset &-= 1
1494+
unsafe buffer.storeBytes(
1495+
of: lookup[Int(truncatingIfNeeded: value % radix)] | adjustment,
1496+
toUncheckedByteOffset: offset,
1497+
as: UInt8.self)
1498+
value /= radix
1499+
}
1500+
}
13921501

1393-
if self == (0 as Self) { return "0" }
1502+
if negative {
1503+
offset &-= 1
1504+
unsafe buffer.storeBytes(
1505+
of: 0x2d /* "-" */,
1506+
toUncheckedByteOffset: offset,
1507+
as: UInt8.self)
1508+
}
13941509

1395-
// Bit shifting can be faster than division when `radix` is a power of two
1396-
// (although not necessarily the case for builtin types).
1397-
let isRadixPowerOfTwo = radix.nonzeroBitCount == 1
1398-
let radix_ = Magnitude(radix)
1399-
func _quotientAndRemainder(_ value: Magnitude) -> (Magnitude, Magnitude) {
1400-
return isRadixPowerOfTwo
1401-
? (value >> radix.trailingZeroBitCount, value & (radix_ - 1))
1402-
: value.quotientAndRemainder(dividingBy: radix_)
1403-
}
1510+
return offset..<buffer.byteCount
1511+
}
14041512

1405-
let hasLetters = radix > 10
1406-
func _ascii(_ digit: UInt8) -> UInt8 {
1407-
let base: UInt8
1408-
if !hasLetters || digit < 10 {
1409-
base = UInt8(("0" as Unicode.Scalar).value)
1410-
} else if uppercase {
1411-
base = UInt8(("A" as Unicode.Scalar).value) &- 10
1412-
} else {
1413-
base = UInt8(("a" as Unicode.Scalar).value) &- 10
1414-
}
1415-
return base &+ digit
1416-
}
1513+
// Support legacy ABI on top of new implementation:
1514+
// ================================================
1515+
1516+
// Returns a UInt64, but that value is the length of the string, so it's
1517+
// guaranteed to fit into an Int. This is part of the ABI, so we can't
1518+
// trivially change it to Int. Callers can safely convert the result
1519+
// to any integer type without checks, however.
1520+
@_silgen_name("swift_int64ToString")
1521+
@usableFromInline
1522+
internal func _int64ToStringImpl(
1523+
_ textBuffer: UnsafeMutablePointer<UTF8.CodeUnit>,
1524+
_ bufferLength: UInt,
1525+
_ value: Int64,
1526+
_ radix: Int64,
1527+
_ uppercase: Bool
1528+
) -> UInt64 {
1529+
_precondition(radix >= 2 && radix <= 36, "Radix must be between 2 and 36")
1530+
_precondition(bufferLength >= (radix >= 10 ? 21 : 65), "Insufficient buffer size")
1531+
unsafe textBuffer.initialize(repeating: 0x30, count: Int(bufferLength))
1532+
1533+
var buffer = unsafe MutableSpan<UTF8.CodeUnit>(
1534+
_unchecked: textBuffer,
1535+
count: Int(bufferLength))
1536+
let textRange = unsafe _BinaryIntegerToASCII(
1537+
negative: value < 0,
1538+
magnitude: value.magnitude,
1539+
radix: radix,
1540+
uppercase: uppercase,
1541+
buffer: &buffer)
1542+
_ = consume buffer
1543+
let byteCount = textRange.upperBound &- textRange.lowerBound
1544+
1545+
// Move text to start of buffer.
1546+
if textRange.lowerBound != 0 {
1547+
unsafe _memmove(
1548+
dest: textBuffer,
1549+
src: textBuffer + textRange.lowerBound,
1550+
size: UInt(truncatingIfNeeded: byteCount))
1551+
}
1552+
return UInt64(truncatingIfNeeded: byteCount)
1553+
}
14171554

1418-
let isNegative = Self.isSigned && self < (0 as Self)
1419-
var value = magnitude
1555+
// Returns a UInt64, but that value is the length of the string, so it's
1556+
// guaranteed to fit into an Int. This is part of the ABI, so we can't
1557+
// trivially change it to Int. Callers can safely convert the result
1558+
// to any integer type without checks, however.
1559+
@_silgen_name("swift_uint64ToString")
1560+
@usableFromInline
1561+
internal func _uint64ToStringImpl(
1562+
_ textBuffer: UnsafeMutablePointer<UTF8.CodeUnit>,
1563+
_ bufferLength: UInt,
1564+
_ value: UInt64,
1565+
_ radix: Int64,
1566+
_ uppercase: Bool
1567+
) -> UInt64 {
1568+
_precondition(radix >= 2 && radix <= 36, "Radix must be between 2 and 36")
1569+
_precondition(bufferLength >= (radix >= 10 ? 20 : 64), "Insufficient buffer size")
1570+
unsafe textBuffer.initialize(repeating: 0x30, count: Int(bufferLength))
1571+
1572+
var buffer = unsafe MutableSpan<UTF8.CodeUnit>(
1573+
_unchecked: textBuffer,
1574+
count: Int(bufferLength))
1575+
let textRange = unsafe _BinaryIntegerToASCII(
1576+
negative: false,
1577+
magnitude: value,
1578+
radix: radix,
1579+
uppercase: uppercase,
1580+
buffer: &buffer)
1581+
_ = consume buffer
1582+
let byteCount = textRange.upperBound &- textRange.lowerBound
1583+
1584+
// Move text to start of buffer.
1585+
if textRange.lowerBound != 0 {
1586+
unsafe _memmove(
1587+
dest: textBuffer,
1588+
src: textBuffer + textRange.lowerBound,
1589+
size: UInt(truncatingIfNeeded: byteCount))
1590+
}
1591+
return UInt64(truncatingIfNeeded: byteCount)
1592+
}
14201593

1421-
// TODO(FIXME JIRA): All current stdlib types fit in small. Use a stack
1422-
// buffer instead of an array on the heap.
1594+
public // @testable
1595+
func _uint64ToString(
1596+
_ value: UInt64,
1597+
radix: Int64 = 10,
1598+
uppercase: Bool = false
1599+
) -> String {
1600+
_precondition(radix >= 2 && radix <= 36, "Radix must be between 2 and 36")
1601+
1602+
if radix >= 10 {
1603+
var buffer = _InlineArray<20, UTF8.CodeUnit>(repeating: 0x30)
1604+
var span = buffer.mutableSpan
1605+
let textRange = unsafe _BinaryIntegerToASCII(
1606+
negative: false,
1607+
magnitude: value,
1608+
radix: radix,
1609+
uppercase: uppercase,
1610+
buffer: &span)
1611+
let textStart =
1612+
unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self)
1613+
+ textRange.lowerBound
1614+
let byteCount = textRange.upperBound &- textRange.lowerBound
1615+
let textBuffer =
1616+
unsafe UnsafeBufferPointer<UTF8.CodeUnit>(
1617+
_uncheckedStart: textStart, count: byteCount)
1618+
return unsafe String._fromASCII(textBuffer)
1619+
}
1620+
1621+
var buffer = _InlineArray<64, UTF8.CodeUnit>(repeating: 0x30)
1622+
var span = buffer.mutableSpan
1623+
let textRange = unsafe _BinaryIntegerToASCII(
1624+
negative: false,
1625+
magnitude: value,
1626+
radix: radix,
1627+
uppercase: false, // When radix < 10, case is irrelevant.
1628+
buffer: &span)
1629+
let textStart =
1630+
unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self)
1631+
+ textRange.lowerBound
1632+
let byteCount = textRange.upperBound &- textRange.lowerBound
1633+
let textBuffer =
1634+
unsafe UnsafeBufferPointer<UTF8.CodeUnit>(
1635+
_uncheckedStart: textStart, count: byteCount)
1636+
return unsafe String._fromASCII(textBuffer)
1637+
}
14231638

1424-
var result: [UInt8] = []
1425-
while value != 0 {
1426-
let (quotient, remainder) = _quotientAndRemainder(value)
1427-
result.append(_ascii(UInt8(truncatingIfNeeded: remainder)))
1428-
value = quotient
1429-
}
1639+
extension BinaryInteger {
1640+
internal func _description(radix: Int, uppercase: Bool) -> String {
1641+
_precondition(radix >= 2 && radix <= 36, "Radix must be between 2 and 36")
1642+
1643+
if _fastPath(bitWidth <= 64 || magnitude < UInt64.max) {
1644+
if radix >= 10 {
1645+
var buffer = _InlineArray<21, UTF8.CodeUnit>(repeating: 0x30)
1646+
var span = buffer.mutableSpan
1647+
let textRange = unsafe _BinaryIntegerToASCII(
1648+
negative: Self.isSigned && self < 0,
1649+
magnitude: UInt64(truncatingIfNeeded: magnitude),
1650+
radix: UInt64(truncatingIfNeeded: radix),
1651+
uppercase: uppercase,
1652+
buffer: &span)
1653+
let textStart =
1654+
unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self)
1655+
+ textRange.lowerBound
1656+
let byteCount = textRange.upperBound &- textRange.lowerBound
1657+
let textBuffer =
1658+
unsafe UnsafeBufferPointer<UTF8.CodeUnit>(
1659+
_uncheckedStart: textStart, count: byteCount)
1660+
return unsafe String._fromASCII(textBuffer)
1661+
}
14301662

1431-
if isNegative {
1432-
result.append(UInt8(("-" as Unicode.Scalar).value))
1663+
var buffer = _InlineArray<65, UTF8.CodeUnit>(repeating: 0x30)
1664+
var span = buffer.mutableSpan
1665+
let textRange = unsafe _BinaryIntegerToASCII(
1666+
negative: Self.isSigned && self < 0,
1667+
magnitude: UInt64(truncatingIfNeeded: magnitude),
1668+
radix: UInt64(truncatingIfNeeded: radix),
1669+
uppercase: false, // When radix < 10, case is irrelevant.
1670+
buffer: &span)
1671+
let textStart =
1672+
unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self)
1673+
+ textRange.lowerBound
1674+
let byteCount = textRange.upperBound &- textRange.lowerBound
1675+
let textBuffer =
1676+
unsafe UnsafeBufferPointer<UTF8.CodeUnit>(
1677+
_uncheckedStart: textStart, count: byteCount)
1678+
return unsafe String._fromASCII(textBuffer)
14331679
}
14341680

1435-
result.reverse()
1436-
return unsafe result.withUnsafeBufferPointer {
1437-
return unsafe String._fromASCII($0)
1681+
// The decimal representation of an unsigned value of bit width `i` requires
1682+
// `ceil(log2(10) * i)` bytes. Here, we use 5/16 as a known overestimate,
1683+
// with the division computed using a bit shift. Since integer division or
1684+
// bit shift is a truncating (flooring) operation, we add 15 to adjust for
1685+
// off-by-one results when bit width isn't a multiple of 16. Finally, we add
1686+
// 1 to leave room for the '-' sign or, in the case of zero, '0'.
1687+
let capacity = radix >= 10 ? (bitWidth * 5 + 15) &>> 4 + 1 : bitWidth + 1
1688+
return unsafe withUnsafeTemporaryAllocation(
1689+
of: UTF8.CodeUnit.self,
1690+
capacity: capacity
1691+
) {
1692+
// It's our responsibility to initialize and deinitialize memory.
1693+
// A larger buffer pointer than requested may be allocated, but it's
1694+
// undefined behavior to access the excess allocation.
1695+
let buffer =
1696+
unsafe UnsafeMutableBufferPointer<UTF8.CodeUnit>(
1697+
start: $0.baseAddress, count: capacity)
1698+
unsafe buffer.initialize(repeating: 0x30)
1699+
defer { unsafe buffer.deinitialize() }
1700+
1701+
var span = unsafe buffer.mutableSpan
1702+
let textRange = unsafe _BinaryIntegerToASCII(
1703+
negative: Self.isSigned && self < 0,
1704+
magnitude: magnitude,
1705+
radix: Magnitude(truncatingIfNeeded: radix),
1706+
uppercase: uppercase,
1707+
buffer: &span)
1708+
_ = consume span
1709+
let textStart = unsafe buffer.baseAddress! + textRange.lowerBound
1710+
let byteCount = textRange.upperBound &- textRange.lowerBound
1711+
let textBuffer =
1712+
unsafe UnsafeBufferPointer<UTF8.CodeUnit>(
1713+
_uncheckedStart: textStart, count: byteCount)
1714+
return unsafe String._fromASCII(textBuffer)
14381715
}
14391716
}
14401717

0 commit comments

Comments
 (0)