diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 60bff02c0b6..0a5ec0faf03 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -847,6 +847,9 @@ importers: i18next: specifier: ^25.0.0 version: 25.2.1(typescript@5.8.3) + iconv-lite: + specifier: ^0.6.3 + version: 0.6.3 ignore: specifier: ^7.0.3 version: 7.0.4 @@ -14555,7 +14558,7 @@ snapshots: sirv: 3.0.1 tinyglobby: 0.2.14 tinyrainbow: 2.0.0 - vitest: 3.2.4(@types/debug@4.1.12)(@types/node@20.17.50)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) + vitest: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.1)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) '@vitest/utils@3.2.4': dependencies: diff --git a/src/core/tools/ApplyPatchTool.ts b/src/core/tools/ApplyPatchTool.ts index 0c3a1765f22..1d66eeba3a5 100644 --- a/src/core/tools/ApplyPatchTool.ts +++ b/src/core/tools/ApplyPatchTool.ts @@ -5,6 +5,7 @@ import { type ClineSayTool, DEFAULT_WRITE_DELAY_MS } from "@roo-code/types" import { getReadablePath } from "../../utils/path" import { isPathOutsideWorkspace } from "../../utils/pathUtils" +import { readFileWithEncoding, writeFileWithEncoding } from "../../utils/fileEncoding" import { Task } from "../task/Task" import { formatResponse } from "../prompts/responses" import { RecordSource } from "../context-tracking/FileContextTrackerTypes" @@ -59,7 +60,8 @@ export class ApplyPatchTool extends BaseTool<"apply_patch"> { // Process each hunk const readFile = async (filePath: string): Promise => { const absolutePath = path.resolve(task.cwd, filePath) - return await fs.readFile(absolutePath, "utf8") + const { content } = await readFileWithEncoding(absolutePath) + return content } let changes: ApplyPatchFileChange[] @@ -387,10 +389,10 @@ export class ApplyPatchTool extends BaseTool<"apply_patch"> { writeDelayMs, ) } else { - // Write to new path and delete old file + // Write to new path and delete old file with proper encoding const parentDir = path.dirname(moveAbsolutePath) await fs.mkdir(parentDir, { recursive: true }) - await fs.writeFile(moveAbsolutePath, newContent, "utf8") + await writeFileWithEncoding(moveAbsolutePath, newContent) } // Delete the original file diff --git a/src/integrations/editor/DiffViewProvider.ts b/src/integrations/editor/DiffViewProvider.ts index 94a483706e6..36e61c8cab0 100644 --- a/src/integrations/editor/DiffViewProvider.ts +++ b/src/integrations/editor/DiffViewProvider.ts @@ -8,6 +8,7 @@ import delay from "delay" import { type ClineSayTool, DEFAULT_WRITE_DELAY_MS } from "@roo-code/types" import { createDirectoriesForFile } from "../../utils/fs" +import { readFileWithEncoding, writeFileWithEncoding } from "../../utils/fileEncoding" import { arePathsEqual, getReadablePath } from "../../utils/path" import { formatResponse } from "../../core/prompts/responses" import { diagnosticsToProblemsString, getNewDiagnostics } from "../diagnostics" @@ -67,7 +68,8 @@ export class DiffViewProvider { this.preDiagnostics = vscode.languages.getDiagnostics() if (fileExists) { - this.originalContent = await fs.readFile(absolutePath, "utf-8") + const { content } = await readFileWithEncoding(absolutePath) + this.originalContent = content } else { this.originalContent = "" } @@ -651,9 +653,9 @@ export class DiffViewProvider { // Get diagnostics before editing the file this.preDiagnostics = vscode.languages.getDiagnostics() - // Write the content directly to the file + // Write the content directly to the file with proper encoding await createDirectoriesForFile(absolutePath) - await fs.writeFile(absolutePath, content, "utf-8") + await writeFileWithEncoding(absolutePath, content) // Open the document to ensure diagnostics are loaded // When openFile is false (PREVENT_FOCUS_DISRUPTION enabled), we only open in memory diff --git a/src/package.json b/src/package.json index 97d03858989..ac3cc0188f9 100644 --- a/src/package.json +++ b/src/package.json @@ -479,6 +479,7 @@ "fastest-levenshtein": "^1.0.16", "fzf": "^0.5.2", "get-folder-size": "^5.0.0", + "iconv-lite": "^0.6.3", "global-agent": "^3.0.0", "google-auth-library": "^9.15.1", "gray-matter": "^4.0.3", diff --git a/src/utils/__tests__/fileEncoding.spec.ts b/src/utils/__tests__/fileEncoding.spec.ts new file mode 100644 index 00000000000..742005516f7 --- /dev/null +++ b/src/utils/__tests__/fileEncoding.spec.ts @@ -0,0 +1,272 @@ +import * as vscode from "vscode" +import * as fs from "fs/promises" +import * as iconv from "iconv-lite" +import { + getFileEncoding, + normalizeEncoding, + isEncodingSupported, + readFileWithEncoding, + writeFileWithEncoding, +} from "../fileEncoding" + +// Mock vscode module +vi.mock("vscode", () => ({ + workspace: { + getConfiguration: vi.fn(), + }, + Uri: { + file: vi.fn((path: string) => ({ fsPath: path })), + }, +})) + +// Mock fs/promises module +vi.mock("fs/promises", () => ({ + default: { + readFile: vi.fn(), + writeFile: vi.fn(), + }, + readFile: vi.fn(), + writeFile: vi.fn(), +})) + +// Mock iconv-lite module +vi.mock("iconv-lite", () => ({ + default: { + encodingExists: vi.fn(), + decode: vi.fn(), + encode: vi.fn(), + }, + encodingExists: vi.fn(), + decode: vi.fn(), + encode: vi.fn(), +})) + +describe("fileEncoding", () => { + const mockedVscode = vi.mocked(vscode) + const mockedFs = vi.mocked(fs) + const mockedIconv = vi.mocked(iconv) + + beforeEach(() => { + vi.clearAllMocks() + }) + + describe("getFileEncoding", () => { + it("should return the configured encoding from VSCode settings", () => { + const mockConfig = { + get: vi.fn().mockReturnValue("cp852"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + + const encoding = getFileEncoding("/path/to/file.txt") + + expect(mockedVscode.Uri.file).toHaveBeenCalledWith("/path/to/file.txt") + expect(mockedVscode.workspace.getConfiguration).toHaveBeenCalledWith("files", { + fsPath: "/path/to/file.txt", + }) + expect(mockConfig.get).toHaveBeenCalledWith("encoding", "utf8") + expect(encoding).toBe("cp852") + }) + + it("should return utf8 as default if no encoding is configured", () => { + const mockConfig = { + get: vi.fn().mockReturnValue("utf8"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + + const encoding = getFileEncoding("/path/to/file.txt") + + expect(encoding).toBe("utf8") + }) + }) + + describe("normalizeEncoding", () => { + it("should normalize utf-8 to utf8", () => { + expect(normalizeEncoding("utf-8")).toBe("utf8") + expect(normalizeEncoding("UTF-8")).toBe("utf8") + }) + + it("should normalize windows code pages", () => { + expect(normalizeEncoding("windows1252")).toBe("windows1252") + expect(normalizeEncoding("windows-1252")).toBe("windows1252") + }) + + it("should normalize DOS code pages", () => { + expect(normalizeEncoding("cp852")).toBe("cp852") + expect(normalizeEncoding("CP852")).toBe("cp852") + }) + + it("should normalize ISO encodings", () => { + expect(normalizeEncoding("iso88591")).toBe("iso88591") + expect(normalizeEncoding("iso-8859-1")).toBe("iso88591") + }) + + it("should return the original encoding if not in the map", () => { + expect(normalizeEncoding("unknown-encoding")).toBe("unknown-encoding") + }) + }) + + describe("isEncodingSupported", () => { + it("should return true for supported encodings", () => { + mockedIconv.encodingExists = vi.fn().mockReturnValue(true) + + expect(isEncodingSupported("utf8")).toBe(true) + expect(mockedIconv.encodingExists).toHaveBeenCalledWith("utf8") + }) + + it("should return false for unsupported encodings", () => { + mockedIconv.encodingExists = vi.fn().mockReturnValue(false) + + expect(isEncodingSupported("unknown")).toBe(false) + expect(mockedIconv.encodingExists).toHaveBeenCalledWith("unknown") + }) + }) + + describe("readFileWithEncoding", () => { + beforeEach(() => { + const mockConfig = { + get: vi.fn().mockReturnValue("utf8"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + }) + + it("should read file with UTF-8 encoding directly", async () => { + const mockBuffer = Buffer.from("Hello World", "utf8") + mockedFs.readFile = vi.fn().mockResolvedValue(mockBuffer) + + const result = await readFileWithEncoding("/path/to/file.txt") + + expect(result.content).toBe("Hello World") + expect(result.encoding).toBe("utf8") + expect(result.usedFallback).toBe(false) + }) + + it("should read file with CP852 encoding", async () => { + const mockConfig = { + get: vi.fn().mockReturnValue("cp852"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + + const mockBuffer = Buffer.from([0x8d, 0x8f, 0xa7]) // Some CP852 bytes + mockedFs.readFile = vi.fn().mockResolvedValue(mockBuffer) + mockedIconv.encodingExists = vi.fn().mockReturnValue(true) + mockedIconv.decode = vi.fn().mockReturnValue("čćž") + + const result = await readFileWithEncoding("/path/to/file.txt") + + expect(mockedIconv.decode).toHaveBeenCalledWith(mockBuffer, "cp852") + expect(result.content).toBe("čćž") + expect(result.encoding).toBe("cp852") + expect(result.usedFallback).toBe(false) + }) + + it("should fall back to UTF-8 if encoding is not supported", async () => { + const mockConfig = { + get: vi.fn().mockReturnValue("unsupported-encoding"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + + const mockBuffer = Buffer.from("Hello World", "utf8") + mockedFs.readFile = vi.fn().mockResolvedValue(mockBuffer) + mockedIconv.encodingExists = vi.fn().mockReturnValue(false) + + const result = await readFileWithEncoding("/path/to/file.txt") + + expect(result.content).toBe("Hello World") + expect(result.encoding).toBe("unsupported-encoding") + expect(result.usedFallback).toBe(true) + }) + + it("should fall back to UTF-8 if decoding fails", async () => { + const mockConfig = { + get: vi.fn().mockReturnValue("cp852"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + + const mockBuffer = Buffer.from("Hello World", "utf8") + mockedFs.readFile = vi.fn().mockResolvedValue(mockBuffer) + mockedIconv.encodingExists = vi.fn().mockReturnValue(true) + mockedIconv.decode = vi.fn().mockImplementation(() => { + throw new Error("Decoding failed") + }) + + const result = await readFileWithEncoding("/path/to/file.txt") + + expect(result.content).toBe("Hello World") + expect(result.encoding).toBe("cp852") + expect(result.usedFallback).toBe(true) + }) + }) + + describe("writeFileWithEncoding", () => { + beforeEach(() => { + const mockConfig = { + get: vi.fn().mockReturnValue("utf8"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + }) + + it("should write file with UTF-8 encoding directly", async () => { + mockedFs.writeFile = vi.fn().mockResolvedValue(undefined) + + const result = await writeFileWithEncoding("/path/to/file.txt", "Hello World") + + expect(mockedFs.writeFile).toHaveBeenCalledWith("/path/to/file.txt", "Hello World", "utf8") + expect(result.encoding).toBe("utf8") + expect(result.usedFallback).toBe(false) + }) + + it("should write file with CP852 encoding", async () => { + const mockConfig = { + get: vi.fn().mockReturnValue("cp852"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + + const mockBuffer = Buffer.from([0x8d, 0x8f, 0xa7]) + mockedIconv.encodingExists = vi.fn().mockReturnValue(true) + mockedIconv.encode = vi.fn().mockReturnValue(mockBuffer) + mockedFs.writeFile = vi.fn().mockResolvedValue(undefined) + + const result = await writeFileWithEncoding("/path/to/file.txt", "čćž") + + expect(mockedIconv.encode).toHaveBeenCalledWith("čćž", "cp852") + expect(mockedFs.writeFile).toHaveBeenCalledWith("/path/to/file.txt", mockBuffer) + expect(result.encoding).toBe("cp852") + expect(result.usedFallback).toBe(false) + }) + + it("should fall back to UTF-8 if encoding is not supported", async () => { + const mockConfig = { + get: vi.fn().mockReturnValue("unsupported-encoding"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + + mockedIconv.encodingExists = vi.fn().mockReturnValue(false) + mockedFs.writeFile = vi.fn().mockResolvedValue(undefined) + + const result = await writeFileWithEncoding("/path/to/file.txt", "Hello World") + + expect(mockedFs.writeFile).toHaveBeenCalledWith("/path/to/file.txt", "Hello World", "utf8") + expect(result.encoding).toBe("unsupported-encoding") + expect(result.usedFallback).toBe(true) + }) + + it("should fall back to UTF-8 if encoding fails", async () => { + const mockConfig = { + get: vi.fn().mockReturnValue("cp852"), + } + mockedVscode.workspace.getConfiguration = vi.fn().mockReturnValue(mockConfig) + + mockedIconv.encodingExists = vi.fn().mockReturnValue(true) + mockedIconv.encode = vi.fn().mockImplementation(() => { + throw new Error("Encoding failed") + }) + mockedFs.writeFile = vi.fn().mockResolvedValue(undefined) + + const result = await writeFileWithEncoding("/path/to/file.txt", "Hello World") + + expect(mockedFs.writeFile).toHaveBeenCalledWith("/path/to/file.txt", "Hello World", "utf8") + expect(result.encoding).toBe("cp852") + expect(result.usedFallback).toBe(true) + }) + }) +}) diff --git a/src/utils/fileEncoding.ts b/src/utils/fileEncoding.ts new file mode 100644 index 00000000000..75e7ed0503a --- /dev/null +++ b/src/utils/fileEncoding.ts @@ -0,0 +1,255 @@ +import * as vscode from "vscode" +import * as fs from "fs/promises" +import * as iconv from "iconv-lite" + +/** + * Encoding mapping from VSCode encoding names to iconv-lite encoding names. + * VSCode uses specific encoding names that may differ from iconv-lite's names. + * + * @see https://code.visualstudio.com/docs/editor/codebasics#_file-encoding-support + */ +const ENCODING_MAP: Record = { + // UTF variants + utf8: "utf8", + "utf-8": "utf8", + utf8bom: "utf8", + "utf-8-bom": "utf8", + utf16le: "utf16le", + utf16be: "utf16be", + + // ISO encodings + iso88591: "iso88591", + iso885915: "iso885915", + iso88592: "iso88592", + iso88593: "iso88593", + iso88594: "iso88594", + iso88595: "iso88595", + iso88596: "iso88596", + iso88597: "iso88597", + iso88598: "iso88598", + iso88599: "iso88599", + iso885910: "iso885910", + iso885911: "iso885911", + iso885913: "iso885913", + iso885914: "iso885914", + iso885916: "iso885916", + + // Windows code pages + windows1250: "windows1250", + windows1251: "windows1251", + windows1252: "windows1252", + windows1253: "windows1253", + windows1254: "windows1254", + windows1255: "windows1255", + windows1256: "windows1256", + windows1257: "windows1257", + windows1258: "windows1258", + + // DOS code pages + cp437: "cp437", + cp850: "cp850", + cp852: "cp852", + cp855: "cp855", + cp857: "cp857", + cp860: "cp860", + cp861: "cp861", + cp862: "cp862", + cp863: "cp863", + cp864: "cp864", + cp865: "cp865", + cp866: "cp866", + cp869: "cp869", + cp874: "cp874", + cp932: "cp932", + cp936: "cp936", + cp949: "cp949", + cp950: "cp950", + + // KOI8 variants + koi8r: "koi8r", + "koi8-r": "koi8r", + koi8u: "koi8u", + "koi8-u": "koi8u", + + // EUC variants + eucjp: "eucjp", + "euc-jp": "eucjp", + euckr: "euckr", + "euc-kr": "euckr", + euccn: "euccn", + "euc-cn": "euccn", + + // Japanese + shiftjis: "shiftjis", + "shift-jis": "shiftjis", + shift_jis: "shiftjis", + + // Chinese + gb2312: "gb2312", + gbk: "gbk", + gb18030: "gb18030", + big5: "big5", + "big5-hkscs": "big5hkscs", + + // Russian + macroman: "macroman", + maccyrillic: "maccyrillic", +} + +/** + * Result type for encoding operations + */ +export interface EncodingResult { + content: string + encoding: string + usedFallback: boolean +} + +/** + * Get the file encoding setting from VSCode workspace configuration. + * This respects the user's `files.encoding` setting. + * + * @param filePath - The absolute path to the file (used for resource-scoped settings) + * @returns The encoding name configured in VSCode settings, defaults to "utf8" + */ +export function getFileEncoding(filePath: string): string { + const fileUri = vscode.Uri.file(filePath) + const config = vscode.workspace.getConfiguration("files", fileUri) + const encoding = config.get("encoding", "utf8") + return encoding +} + +/** + * Convert a VSCode encoding name to an iconv-lite compatible encoding name. + * + * @param vscodeEncoding - The encoding name from VSCode settings + * @returns The iconv-lite compatible encoding name + */ +export function normalizeEncoding(vscodeEncoding: string): string { + const normalized = vscodeEncoding.toLowerCase().replace(/-/g, "") + return ENCODING_MAP[normalized] || ENCODING_MAP[vscodeEncoding.toLowerCase()] || vscodeEncoding +} + +/** + * Check if an encoding is supported by iconv-lite. + * + * @param encoding - The encoding name to check + * @returns true if the encoding is supported + */ +export function isEncodingSupported(encoding: string): boolean { + return iconv.encodingExists(normalizeEncoding(encoding)) +} + +/** + * Read a file with the correct encoding from VSCode settings. + * Returns the content as a UTF-8 string (for use with AI models). + * + * If the file's configured encoding is not supported or conversion fails, + * falls back to UTF-8 and logs a warning. + * + * @param filePath - The absolute path to the file to read + * @returns Promise containing the file content as UTF-8 string, the original encoding, and whether fallback was used + */ +export async function readFileWithEncoding(filePath: string): Promise { + const configuredEncoding = getFileEncoding(filePath) + const normalizedEncoding = normalizeEncoding(configuredEncoding) + + // Read the file as a buffer + const buffer = await fs.readFile(filePath) + + // If encoding is UTF-8, just decode directly + if (normalizedEncoding === "utf8") { + return { + content: buffer.toString("utf8"), + encoding: configuredEncoding, + usedFallback: false, + } + } + + // Check if the encoding is supported + if (!iconv.encodingExists(normalizedEncoding)) { + console.warn( + `Encoding "${configuredEncoding}" (normalized: "${normalizedEncoding}") is not supported. Falling back to UTF-8.`, + ) + return { + content: buffer.toString("utf8"), + encoding: configuredEncoding, + usedFallback: true, + } + } + + // Convert from the file's encoding to UTF-8 + try { + const content = iconv.decode(buffer, normalizedEncoding) + return { + content, + encoding: configuredEncoding, + usedFallback: false, + } + } catch (error) { + console.warn(`Failed to decode file with encoding "${configuredEncoding}": ${error}. Falling back to UTF-8.`) + return { + content: buffer.toString("utf8"), + encoding: configuredEncoding, + usedFallback: true, + } + } +} + +/** + * Write content to a file with the correct encoding from VSCode settings. + * The content is expected to be a UTF-8 string (from AI models) and will be + * converted to the target encoding. + * + * If the file's configured encoding is not supported or conversion fails, + * falls back to UTF-8 and logs a warning. + * + * @param filePath - The absolute path to the file to write + * @param content - The content to write (as UTF-8 string) + * @returns Promise containing the encoding used and whether fallback was used + */ +export async function writeFileWithEncoding( + filePath: string, + content: string, +): Promise<{ encoding: string; usedFallback: boolean }> { + const configuredEncoding = getFileEncoding(filePath) + const normalizedEncoding = normalizeEncoding(configuredEncoding) + + // If encoding is UTF-8, just write directly + if (normalizedEncoding === "utf8") { + await fs.writeFile(filePath, content, "utf8") + return { + encoding: configuredEncoding, + usedFallback: false, + } + } + + // Check if the encoding is supported + if (!iconv.encodingExists(normalizedEncoding)) { + console.warn( + `Encoding "${configuredEncoding}" (normalized: "${normalizedEncoding}") is not supported for writing. Falling back to UTF-8.`, + ) + await fs.writeFile(filePath, content, "utf8") + return { + encoding: configuredEncoding, + usedFallback: true, + } + } + + // Convert from UTF-8 to the target encoding + try { + const buffer = iconv.encode(content, normalizedEncoding) + await fs.writeFile(filePath, buffer) + return { + encoding: configuredEncoding, + usedFallback: false, + } + } catch (error) { + console.warn(`Failed to encode content with encoding "${configuredEncoding}": ${error}. Falling back to UTF-8.`) + await fs.writeFile(filePath, content, "utf8") + return { + encoding: configuredEncoding, + usedFallback: true, + } + } +}