diff --git a/packages/core/src/zig/bench/utf8_bench.zig b/packages/core/src/zig/bench/utf8_bench.zig index 3e2baae67..44b0b550c 100644 --- a/packages/core/src/zig/bench/utf8_bench.zig +++ b/packages/core/src/zig/bench/utf8_bench.zig @@ -603,7 +603,7 @@ fn benchCalculateTextWidth(results_alloc: std.mem.Allocator, iterations: usize) var stats = BenchStats{}; for (0..iterations) |_| { var timer = try std.time.Timer.start(); - _ = utf8.calculateTextWidth(text.items, 4, true, .unicode); + _ = utf8.calculateTextWidth(text.items, 4, false, .unicode); stats.record(timer.read()); } diff --git a/packages/core/src/zig/tests/utf8_test.zig b/packages/core/src/zig/tests/utf8_test.zig index 1bddeee53..08e8e4110 100644 --- a/packages/core/src/zig/tests/utf8_test.zig +++ b/packages/core/src/zig/tests/utf8_test.zig @@ -1157,7 +1157,7 @@ test "wrap breaks: mixed graphemes and ASCII" { // ============================================================================ test "wrap by width: empty string" { - const result = utf8.findWrapPosByWidth("", 10, 4, true, .unicode); + const result = utf8.findWrapPosByWidth("", 10, 4, false, .unicode); try testing.expectEqual(@as(u32, 0), result.byte_offset); try testing.expectEqual(@as(u32, 0), result.grapheme_count); try testing.expectEqual(@as(u32, 0), result.columns_used); @@ -1199,7 +1199,7 @@ test "wrap by width: combining mark" { } test "wrap by width: tab handling" { - const result = utf8.findWrapPosByWidth("a\tb", 5, 4, true, .unicode); + const result = utf8.findWrapPosByWidth("a\tb", 5, 4, false, .unicode); try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "a\t" try testing.expectEqual(@as(u32, 2), result.grapheme_count); // 'a' + tab try testing.expectEqual(@as(u32, 5), result.columns_used); // 'a' (1) + tab (4) = 5 @@ -1235,7 +1235,7 @@ test "wrap by width: consistency - Unicode text" { test "wrap by width: consistency - edge cases" { const edge_cases = [_]struct { text: []const u8, ascii: bool }{ - .{ .text = "", .ascii = true }, + .{ .text = "", .ascii = false }, .{ .text = " ", .ascii = true }, .{ .text = "a", .ascii = true }, .{ .text = "abc", .ascii = true }, @@ -1244,7 +1244,7 @@ test "wrap by width: consistency - edge cases" { .{ .text = "no-spaces-here", .ascii = true }, .{ .text = "/usr/local/bin", .ascii = true }, .{ .text = "世界", .ascii = false }, - .{ .text = "\t\t\t", .ascii = true }, + .{ .text = "\t\t\t", .ascii = false }, }; for (edge_cases) |input| { @@ -1402,7 +1402,7 @@ test "find pos by width: selection boundaries with multiple wide chars" { } test "find pos by width: empty string" { - const result = utf8.findPosByWidth("", 10, 4, true, true, .unicode); + const result = utf8.findPosByWidth("", 10, 4, false, true, .unicode); try testing.expectEqual(@as(u32, 0), result.byte_offset); try testing.expectEqual(@as(u32, 0), result.grapheme_count); try testing.expectEqual(@as(u32, 0), result.columns_used); @@ -1470,7 +1470,7 @@ test "find pos by width: combining mark" { } test "find pos by width: tab handling" { - const result = utf8.findPosByWidth("a\tb", 5, 4, true, true, .unicode); + const result = utf8.findPosByWidth("a\tb", 5, 4, false, true, .unicode); try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "a\t" try testing.expectEqual(@as(u32, 2), result.grapheme_count); // 'a' + tab try testing.expectEqual(@as(u32, 5), result.columns_used); // 'a' (1) + tab (4) = 5 @@ -1577,9 +1577,9 @@ test "split at weight: tab character" { const input = "a\tbc"; // a(1) tab(4 fixed) b(1) c(1) = 7 columns total // Split at column 4 - should stop before tab since it would exceed limit - const result4 = utf8.findPosByWidth(input, 4, 4, true, false, .unicode); - try testing.expectEqual(@as(u32, 2), result4.byte_offset); // After "a\t" - try testing.expectEqual(@as(u32, 5), result4.columns_used); // a(1) + tab(4) = 5 + const result4 = utf8.findPosByWidth(input, 4, 4, false, false, .unicode); + try testing.expectEqual(@as(u32, 1), result4.byte_offset); // After "a" + try testing.expectEqual(@as(u32, 1), result4.columns_used); // a(1) } test "split at weight: complex mixed content" { @@ -1959,7 +1959,7 @@ test "getPrevGraphemeStart: consecutive wide chars" { // ============================================================================ test "calculateTextWidth: empty string" { - const result = utf8.calculateTextWidth("", 4, true, .unicode); + const result = utf8.calculateTextWidth("", 4, false, .unicode); try testing.expectEqual(@as(u32, 0), result); } @@ -1969,38 +1969,38 @@ test "calculateTextWidth: simple ASCII" { } test "calculateTextWidth: single tab" { - const result = utf8.calculateTextWidth("\t", 4, true, .unicode); + const result = utf8.calculateTextWidth("\t", 4, false, .unicode); try testing.expectEqual(@as(u32, 4), result); } test "calculateTextWidth: tab with different widths" { - try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("\t", 2, true, .unicode)); - try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth("\t", 4, true, .unicode)); - try testing.expectEqual(@as(u32, 8), utf8.calculateTextWidth("\t", 8, true, .unicode)); + try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("\t", 2, false, .unicode)); + try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth("\t", 4, false, .unicode)); + try testing.expectEqual(@as(u32, 8), utf8.calculateTextWidth("\t", 8, false, .unicode)); } test "calculateTextWidth: multiple tabs" { - const result = utf8.calculateTextWidth("\t\t\t", 4, true, .unicode); + const result = utf8.calculateTextWidth("\t\t\t", 4, false, .unicode); try testing.expectEqual(@as(u32, 12), result); // 3 tabs * 4 = 12 } test "calculateTextWidth: text with tabs" { - const result = utf8.calculateTextWidth("a\tb", 4, true, .unicode); + const result = utf8.calculateTextWidth("a\tb", 4, false, .unicode); try testing.expectEqual(@as(u32, 6), result); // a(1) + tab(4) + b(1) = 6 } test "calculateTextWidth: multiple tabs between text" { - const result = utf8.calculateTextWidth("a\t\tb", 2, true, .unicode); + const result = utf8.calculateTextWidth("a\t\tb", 2, false, .unicode); try testing.expectEqual(@as(u32, 6), result); // a(1) + tab(2) + tab(2) + b(1) = 6 } test "calculateTextWidth: tab at start" { - const result = utf8.calculateTextWidth("\tabc", 4, true, .unicode); + const result = utf8.calculateTextWidth("\tabc", 4, false, .unicode); try testing.expectEqual(@as(u32, 7), result); // tab(4) + a(1) + b(1) + c(1) = 7 } test "calculateTextWidth: tab at end" { - const result = utf8.calculateTextWidth("abc\t", 4, true, .unicode); + const result = utf8.calculateTextWidth("abc\t", 4, false, .unicode); try testing.expectEqual(@as(u32, 7), result); // a(1) + b(1) + c(1) + tab(4) = 7 } @@ -2021,7 +2021,7 @@ test "calculateTextWidth: mixed ASCII and Unicode with tabs" { test "calculateTextWidth: realistic code with tabs" { const text = "\tif (x > 5) {\n\t\treturn true;\n\t}"; - const result = utf8.calculateTextWidth(text, 2, true, .unicode); + const result = utf8.calculateTextWidth(text, 2, false, .unicode); // tab(2) + "if (x > 5) {" (12) + newline(0) + tab(2) + tab(2) + "return true;" (12) + newline(0) + tab(2) + "}" (1) // = 2 + 12 + 2 + 2 + 12 + 2 + 1 = 33 try testing.expectEqual(@as(u32, 33), result); @@ -2033,12 +2033,12 @@ test "calculateTextWidth: only spaces" { } test "calculateTextWidth: tabs and spaces mixed" { - const result = utf8.calculateTextWidth(" \t \t ", 4, true, .unicode); + const result = utf8.calculateTextWidth(" \t \t ", 4, false, .unicode); try testing.expectEqual(@as(u32, 14), result); // 2 + 4 + 2 + 4 + 2 = 14 } test "calculateTextWidth: control characters" { - const result = utf8.calculateTextWidth("a\x00b\x1Fc", 4, true, .unicode); + const result = utf8.calculateTextWidth("a\x00b\x1Fc", 4, false, .unicode); try testing.expectEqual(@as(u32, 3), result); // Only printable chars: a, b, c } @@ -2103,7 +2103,7 @@ test "findGraphemeInfo: empty string" { var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{}; defer result.deinit(testing.allocator); - try utf8.findGraphemeInfo("", 4, true, .unicode, testing.allocator, &result); + try utf8.findGraphemeInfo("", 4, false, .unicode, testing.allocator, &result); try testing.expectEqual(@as(usize, 0), result.items.len); } @@ -2390,7 +2390,7 @@ test "calculateTextWidth: fullwidth forms with tab" { } test "calculateTextWidth: ASCII fast path consistency" { - const text_ascii = "hello\tworld"; + const text_ascii = "hello world"; const result_fast = utf8.calculateTextWidth(text_ascii, 4, true, .unicode); const result_slow = utf8.calculateTextWidth(text_ascii, 4, false, .unicode); try testing.expectEqual(result_fast, result_slow); @@ -2412,7 +2412,7 @@ test "calculateTextWidth: large text with many tabs" { } } - const result = utf8.calculateTextWidth(buf, 4, true, .unicode); + const result = utf8.calculateTextWidth(buf, 4, false, .unicode); try testing.expectEqual(expected, result); } diff --git a/packages/core/src/zig/tests/utf8_wcwidth_test.zig b/packages/core/src/zig/tests/utf8_wcwidth_test.zig index aec6338f6..29d6251b7 100644 --- a/packages/core/src/zig/tests/utf8_wcwidth_test.zig +++ b/packages/core/src/zig/tests/utf8_wcwidth_test.zig @@ -6,7 +6,7 @@ test "findGraphemeInfo wcwidth: empty string" { var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{}; defer result.deinit(testing.allocator); - try utf8.findGraphemeInfo("", 4, true, .wcwidth, testing.allocator, &result); + try utf8.findGraphemeInfo("", 4, false, .wcwidth, testing.allocator, &result); try testing.expectEqual(@as(usize, 0), result.items.len); } diff --git a/packages/core/src/zig/text-buffer-segment.zig b/packages/core/src/zig/text-buffer-segment.zig index 23ac61500..476985ce0 100644 --- a/packages/core/src/zig/text-buffer-segment.zig +++ b/packages/core/src/zig/text-buffer-segment.zig @@ -45,7 +45,7 @@ pub const TextChunk = struct { wrap_offsets: ?[]utf8.WrapBreak = null, pub const Flags = struct { - pub const ASCII_ONLY: u8 = 0b00000001; + pub const ASCII_ONLY: u8 = 0b00000001; // Printable ASCII only (32..126). }; pub fn isAsciiOnly(self: *const TextChunk) bool { diff --git a/packages/core/src/zig/utf8.zig b/packages/core/src/zig/utf8.zig index e4d8d1f24..30683f345 100644 --- a/packages/core/src/zig/utf8.zig +++ b/packages/core/src/zig/utf8.zig @@ -971,37 +971,11 @@ fn findWrapPosByWidthUnicode( // ASCII-only fast path if (isASCIIOnly) { - const vector_len = 16; - var pos: usize = 0; - var columns_used: u32 = 0; - - while (pos + vector_len <= text.len) { - var i: usize = 0; - while (i < vector_len) : (i += 1) { - const b = text[pos + i]; - const width = asciiCharWidth(b, tab_width); - columns_used += width; - - if (columns_used > max_columns) { - return .{ .byte_offset = @intCast(pos + i), .grapheme_count = @intCast(pos + i), .columns_used = columns_used - width }; - } - } - pos += vector_len; - } - - // Tail - while (pos < text.len) { - const b = text[pos]; - const width = asciiCharWidth(b, tab_width); - columns_used += width; - - if (columns_used > max_columns) { - return .{ .byte_offset = @intCast(pos), .grapheme_count = @intCast(pos), .columns_used = columns_used - width }; - } - pos += 1; + if (max_columns >= text.len) { + return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) }; + } else { + return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns }; } - - return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = columns_used }; } const vector_len = 16; @@ -1120,22 +1094,11 @@ fn findWrapPosByWidthWCWidth( // ASCII-only fast path if (isASCIIOnly) { - var pos: usize = 0; - var columns_used: u32 = 0; - - while (pos < text.len) { - const b = text[pos]; - const width = asciiCharWidth(b, tab_width); - - if (columns_used + width > max_columns) { - return .{ .byte_offset = @intCast(pos), .grapheme_count = @intCast(pos), .columns_used = columns_used }; - } - - columns_used += width; - pos += 1; + if (max_columns >= text.len) { + return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) }; + } else { + return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns }; } - - return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = columns_used }; } // Unicode path - process each codepoint independently @@ -1209,40 +1172,11 @@ fn findPosByWidthUnicode( // ASCII-only fast path if (isASCIIOnly) { - const vector_len = 16; - var pos: usize = 0; - var columns_used: u32 = 0; - - while (pos + vector_len <= text.len) { - var i: usize = 0; - while (i < vector_len) : (i += 1) { - const b = text[pos + i]; - const prev_columns = columns_used; - - columns_used += asciiCharWidth(b, tab_width); - - // Check if this character starts at or after max_columns - if (prev_columns >= max_columns) { - return .{ .byte_offset = @intCast(pos + i), .grapheme_count = @intCast(pos + i), .columns_used = prev_columns }; - } - } - pos += vector_len; - } - - // Tail - while (pos < text.len) { - const b = text[pos]; - const prev_columns = columns_used; - - columns_used += asciiCharWidth(b, tab_width); - - if (prev_columns >= max_columns) { - return .{ .byte_offset = @intCast(pos), .grapheme_count = @intCast(pos), .columns_used = prev_columns }; - } - pos += 1; + if (max_columns >= text.len) { + return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) }; + } else { + return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns }; } - - return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = columns_used }; } const vector_len = 16; @@ -1364,25 +1298,11 @@ fn findPosByWidthWCWidth( // ASCII-only fast path if (isASCIIOnly) { - var pos: usize = 0; - var columns_used: u32 = 0; - - while (pos < text.len) { - const b = text[pos]; - const prev_columns = columns_used; - const width = asciiCharWidth(b, tab_width); - - columns_used += width; - - // Check if this character starts at or after max_columns - if (prev_columns >= max_columns) { - return .{ .byte_offset = @intCast(pos), .grapheme_count = @intCast(pos), .columns_used = prev_columns }; - } - - pos += 1; + if (max_columns >= text.len) { + return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) }; + } else { + return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns }; } - - return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = columns_used }; } // Unicode path - process each codepoint independently @@ -1602,11 +1522,7 @@ fn calculateTextWidthUnicode(text: []const u8, tab_width: u8, isASCIIOnly: bool, // ASCII-only fast path if (isASCIIOnly) { - var width: u32 = 0; - for (text) |b| { - width += asciiCharWidth(b, tab_width); - } - return width; + return @intCast(text.len); } // General case with Unicode support and grapheme cluster handling @@ -1655,11 +1571,7 @@ fn calculateTextWidthWCWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool) // ASCII-only fast path if (isASCIIOnly) { - var width: u32 = 0; - for (text) |b| { - width += asciiCharWidth(b, tab_width); - } - return width; + return @intCast(text.len); } // Unicode path - sum width of all codepoints