Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/core/src/zig/bench/utf8_bench.zig
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ fn benchCalculateTextWidth(results_alloc: std.mem.Allocator, iterations: usize)
var stats = BenchStats{};
for (0..iterations) |_| {
var timer = try std.time.Timer.start();
_ = utf8.calculateTextWidth(text.items, 4, true, .unicode);
_ = utf8.calculateTextWidth(text.items, 4, false, .unicode);
stats.record(timer.read());
}

Expand Down
50 changes: 25 additions & 25 deletions packages/core/src/zig/tests/utf8_test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,7 @@ test "wrap breaks: mixed graphemes and ASCII" {
// ============================================================================

test "wrap by width: empty string" {
const result = utf8.findWrapPosByWidth("", 10, 4, true, .unicode);
const result = utf8.findWrapPosByWidth("", 10, 4, false, .unicode);
try testing.expectEqual(@as(u32, 0), result.byte_offset);
try testing.expectEqual(@as(u32, 0), result.grapheme_count);
try testing.expectEqual(@as(u32, 0), result.columns_used);
Expand Down Expand Up @@ -1199,7 +1199,7 @@ test "wrap by width: combining mark" {
}

test "wrap by width: tab handling" {
const result = utf8.findWrapPosByWidth("a\tb", 5, 4, true, .unicode);
const result = utf8.findWrapPosByWidth("a\tb", 5, 4, false, .unicode);
try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "a\t"
try testing.expectEqual(@as(u32, 2), result.grapheme_count); // 'a' + tab
try testing.expectEqual(@as(u32, 5), result.columns_used); // 'a' (1) + tab (4) = 5
Expand Down Expand Up @@ -1235,7 +1235,7 @@ test "wrap by width: consistency - Unicode text" {

test "wrap by width: consistency - edge cases" {
const edge_cases = [_]struct { text: []const u8, ascii: bool }{
.{ .text = "", .ascii = true },
.{ .text = "", .ascii = false },
.{ .text = " ", .ascii = true },
.{ .text = "a", .ascii = true },
.{ .text = "abc", .ascii = true },
Expand All @@ -1244,7 +1244,7 @@ test "wrap by width: consistency - edge cases" {
.{ .text = "no-spaces-here", .ascii = true },
.{ .text = "/usr/local/bin", .ascii = true },
.{ .text = "世界", .ascii = false },
.{ .text = "\t\t\t", .ascii = true },
.{ .text = "\t\t\t", .ascii = false },
};

for (edge_cases) |input| {
Expand Down Expand Up @@ -1402,7 +1402,7 @@ test "find pos by width: selection boundaries with multiple wide chars" {
}

test "find pos by width: empty string" {
const result = utf8.findPosByWidth("", 10, 4, true, true, .unicode);
const result = utf8.findPosByWidth("", 10, 4, false, true, .unicode);
try testing.expectEqual(@as(u32, 0), result.byte_offset);
try testing.expectEqual(@as(u32, 0), result.grapheme_count);
try testing.expectEqual(@as(u32, 0), result.columns_used);
Expand Down Expand Up @@ -1470,7 +1470,7 @@ test "find pos by width: combining mark" {
}

test "find pos by width: tab handling" {
const result = utf8.findPosByWidth("a\tb", 5, 4, true, true, .unicode);
const result = utf8.findPosByWidth("a\tb", 5, 4, false, true, .unicode);
try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "a\t"
try testing.expectEqual(@as(u32, 2), result.grapheme_count); // 'a' + tab
try testing.expectEqual(@as(u32, 5), result.columns_used); // 'a' (1) + tab (4) = 5
Expand Down Expand Up @@ -1577,9 +1577,9 @@ test "split at weight: tab character" {
const input = "a\tbc"; // a(1) tab(4 fixed) b(1) c(1) = 7 columns total

// Split at column 4 - should stop before tab since it would exceed limit
const result4 = utf8.findPosByWidth(input, 4, 4, true, false, .unicode);
try testing.expectEqual(@as(u32, 2), result4.byte_offset); // After "a\t"
try testing.expectEqual(@as(u32, 5), result4.columns_used); // a(1) + tab(4) = 5
const result4 = utf8.findPosByWidth(input, 4, 4, false, false, .unicode);
try testing.expectEqual(@as(u32, 1), result4.byte_offset); // After "a"
try testing.expectEqual(@as(u32, 1), result4.columns_used); // a(1)
}

test "split at weight: complex mixed content" {
Expand Down Expand Up @@ -1959,7 +1959,7 @@ test "getPrevGraphemeStart: consecutive wide chars" {
// ============================================================================

test "calculateTextWidth: empty string" {
const result = utf8.calculateTextWidth("", 4, true, .unicode);
const result = utf8.calculateTextWidth("", 4, false, .unicode);
try testing.expectEqual(@as(u32, 0), result);
}

Expand All @@ -1969,38 +1969,38 @@ test "calculateTextWidth: simple ASCII" {
}

test "calculateTextWidth: single tab" {
const result = utf8.calculateTextWidth("\t", 4, true, .unicode);
const result = utf8.calculateTextWidth("\t", 4, false, .unicode);
try testing.expectEqual(@as(u32, 4), result);
}

test "calculateTextWidth: tab with different widths" {
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("\t", 2, true, .unicode));
try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth("\t", 4, true, .unicode));
try testing.expectEqual(@as(u32, 8), utf8.calculateTextWidth("\t", 8, true, .unicode));
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("\t", 2, false, .unicode));
try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth("\t", 4, false, .unicode));
try testing.expectEqual(@as(u32, 8), utf8.calculateTextWidth("\t", 8, false, .unicode));
}

test "calculateTextWidth: multiple tabs" {
const result = utf8.calculateTextWidth("\t\t\t", 4, true, .unicode);
const result = utf8.calculateTextWidth("\t\t\t", 4, false, .unicode);
try testing.expectEqual(@as(u32, 12), result); // 3 tabs * 4 = 12
}

test "calculateTextWidth: text with tabs" {
const result = utf8.calculateTextWidth("a\tb", 4, true, .unicode);
const result = utf8.calculateTextWidth("a\tb", 4, false, .unicode);
try testing.expectEqual(@as(u32, 6), result); // a(1) + tab(4) + b(1) = 6
}

test "calculateTextWidth: multiple tabs between text" {
const result = utf8.calculateTextWidth("a\t\tb", 2, true, .unicode);
const result = utf8.calculateTextWidth("a\t\tb", 2, false, .unicode);
try testing.expectEqual(@as(u32, 6), result); // a(1) + tab(2) + tab(2) + b(1) = 6
}

test "calculateTextWidth: tab at start" {
const result = utf8.calculateTextWidth("\tabc", 4, true, .unicode);
const result = utf8.calculateTextWidth("\tabc", 4, false, .unicode);
try testing.expectEqual(@as(u32, 7), result); // tab(4) + a(1) + b(1) + c(1) = 7
}

test "calculateTextWidth: tab at end" {
const result = utf8.calculateTextWidth("abc\t", 4, true, .unicode);
const result = utf8.calculateTextWidth("abc\t", 4, false, .unicode);
try testing.expectEqual(@as(u32, 7), result); // a(1) + b(1) + c(1) + tab(4) = 7
}

Expand All @@ -2021,7 +2021,7 @@ test "calculateTextWidth: mixed ASCII and Unicode with tabs" {

test "calculateTextWidth: realistic code with tabs" {
const text = "\tif (x > 5) {\n\t\treturn true;\n\t}";
const result = utf8.calculateTextWidth(text, 2, true, .unicode);
const result = utf8.calculateTextWidth(text, 2, false, .unicode);
// tab(2) + "if (x > 5) {" (12) + newline(0) + tab(2) + tab(2) + "return true;" (12) + newline(0) + tab(2) + "}" (1)
// = 2 + 12 + 2 + 2 + 12 + 2 + 1 = 33
try testing.expectEqual(@as(u32, 33), result);
Expand All @@ -2033,12 +2033,12 @@ test "calculateTextWidth: only spaces" {
}

test "calculateTextWidth: tabs and spaces mixed" {
const result = utf8.calculateTextWidth(" \t \t ", 4, true, .unicode);
const result = utf8.calculateTextWidth(" \t \t ", 4, false, .unicode);
try testing.expectEqual(@as(u32, 14), result); // 2 + 4 + 2 + 4 + 2 = 14
}

test "calculateTextWidth: control characters" {
const result = utf8.calculateTextWidth("a\x00b\x1Fc", 4, true, .unicode);
const result = utf8.calculateTextWidth("a\x00b\x1Fc", 4, false, .unicode);
try testing.expectEqual(@as(u32, 3), result); // Only printable chars: a, b, c
}

Expand Down Expand Up @@ -2103,7 +2103,7 @@ test "findGraphemeInfo: empty string" {
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
defer result.deinit(testing.allocator);

try utf8.findGraphemeInfo("", 4, true, .unicode, testing.allocator, &result);
try utf8.findGraphemeInfo("", 4, false, .unicode, testing.allocator, &result);
try testing.expectEqual(@as(usize, 0), result.items.len);
}

Expand Down Expand Up @@ -2390,7 +2390,7 @@ test "calculateTextWidth: fullwidth forms with tab" {
}

test "calculateTextWidth: ASCII fast path consistency" {
const text_ascii = "hello\tworld";
const text_ascii = "hello world";
const result_fast = utf8.calculateTextWidth(text_ascii, 4, true, .unicode);
const result_slow = utf8.calculateTextWidth(text_ascii, 4, false, .unicode);
try testing.expectEqual(result_fast, result_slow);
Expand All @@ -2412,7 +2412,7 @@ test "calculateTextWidth: large text with many tabs" {
}
}

const result = utf8.calculateTextWidth(buf, 4, true, .unicode);
const result = utf8.calculateTextWidth(buf, 4, false, .unicode);
try testing.expectEqual(expected, result);
}

Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/zig/tests/utf8_wcwidth_test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ test "findGraphemeInfo wcwidth: empty string" {
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
defer result.deinit(testing.allocator);

try utf8.findGraphemeInfo("", 4, true, .wcwidth, testing.allocator, &result);
try utf8.findGraphemeInfo("", 4, false, .wcwidth, testing.allocator, &result);
try testing.expectEqual(@as(usize, 0), result.items.len);
}

Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/zig/text-buffer-segment.zig
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub const TextChunk = struct {
wrap_offsets: ?[]utf8.WrapBreak = null,

pub const Flags = struct {
pub const ASCII_ONLY: u8 = 0b00000001;
pub const ASCII_ONLY: u8 = 0b00000001; // Printable ASCII only (32..126).
};

pub fn isAsciiOnly(self: *const TextChunk) bool {
Expand Down
124 changes: 18 additions & 106 deletions packages/core/src/zig/utf8.zig
Original file line number Diff line number Diff line change
Expand Up @@ -971,37 +971,11 @@ fn findWrapPosByWidthUnicode(

// ASCII-only fast path
if (isASCIIOnly) {
const vector_len = 16;
var pos: usize = 0;
var columns_used: u32 = 0;

while (pos + vector_len <= text.len) {
var i: usize = 0;
while (i < vector_len) : (i += 1) {
const b = text[pos + i];
const width = asciiCharWidth(b, tab_width);
columns_used += width;

if (columns_used > max_columns) {
return .{ .byte_offset = @intCast(pos + i), .grapheme_count = @intCast(pos + i), .columns_used = columns_used - width };
}
}
pos += vector_len;
}

// Tail
while (pos < text.len) {
const b = text[pos];
const width = asciiCharWidth(b, tab_width);
columns_used += width;

if (columns_used > max_columns) {
return .{ .byte_offset = @intCast(pos), .grapheme_count = @intCast(pos), .columns_used = columns_used - width };
}
pos += 1;
if (max_columns >= text.len) {
return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
} else {
return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
}

return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = columns_used };
}

const vector_len = 16;
Expand Down Expand Up @@ -1120,22 +1094,11 @@ fn findWrapPosByWidthWCWidth(

// ASCII-only fast path
if (isASCIIOnly) {
var pos: usize = 0;
var columns_used: u32 = 0;

while (pos < text.len) {
const b = text[pos];
const width = asciiCharWidth(b, tab_width);

if (columns_used + width > max_columns) {
return .{ .byte_offset = @intCast(pos), .grapheme_count = @intCast(pos), .columns_used = columns_used };
}

columns_used += width;
pos += 1;
if (max_columns >= text.len) {
return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
} else {
return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
}

return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = columns_used };
}

// Unicode path - process each codepoint independently
Expand Down Expand Up @@ -1209,40 +1172,11 @@ fn findPosByWidthUnicode(

// ASCII-only fast path
if (isASCIIOnly) {
const vector_len = 16;
var pos: usize = 0;
var columns_used: u32 = 0;

while (pos + vector_len <= text.len) {
var i: usize = 0;
while (i < vector_len) : (i += 1) {
const b = text[pos + i];
const prev_columns = columns_used;

columns_used += asciiCharWidth(b, tab_width);

// Check if this character starts at or after max_columns
if (prev_columns >= max_columns) {
return .{ .byte_offset = @intCast(pos + i), .grapheme_count = @intCast(pos + i), .columns_used = prev_columns };
}
}
pos += vector_len;
}

// Tail
while (pos < text.len) {
const b = text[pos];
const prev_columns = columns_used;

columns_used += asciiCharWidth(b, tab_width);

if (prev_columns >= max_columns) {
return .{ .byte_offset = @intCast(pos), .grapheme_count = @intCast(pos), .columns_used = prev_columns };
}
pos += 1;
if (max_columns >= text.len) {
return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
} else {
return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
}

return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = columns_used };
}

const vector_len = 16;
Expand Down Expand Up @@ -1364,25 +1298,11 @@ fn findPosByWidthWCWidth(

// ASCII-only fast path
if (isASCIIOnly) {
var pos: usize = 0;
var columns_used: u32 = 0;

while (pos < text.len) {
const b = text[pos];
const prev_columns = columns_used;
const width = asciiCharWidth(b, tab_width);

columns_used += width;

// Check if this character starts at or after max_columns
if (prev_columns >= max_columns) {
return .{ .byte_offset = @intCast(pos), .grapheme_count = @intCast(pos), .columns_used = prev_columns };
}

pos += 1;
if (max_columns >= text.len) {
return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
} else {
return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
}

return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = columns_used };
}

// Unicode path - process each codepoint independently
Expand Down Expand Up @@ -1602,11 +1522,7 @@ fn calculateTextWidthUnicode(text: []const u8, tab_width: u8, isASCIIOnly: bool,

// ASCII-only fast path
if (isASCIIOnly) {
var width: u32 = 0;
for (text) |b| {
width += asciiCharWidth(b, tab_width);
}
return width;
return @intCast(text.len);
}

// General case with Unicode support and grapheme cluster handling
Expand Down Expand Up @@ -1655,11 +1571,7 @@ fn calculateTextWidthWCWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool)

// ASCII-only fast path
if (isASCIIOnly) {
var width: u32 = 0;
for (text) |b| {
width += asciiCharWidth(b, tab_width);
}
return width;
return @intCast(text.len);
}

// Unicode path - sum width of all codepoints
Expand Down
Loading