From 1a738362cf4abce33dc431b34c5f3a29afa46486 Mon Sep 17 00:00:00 2001 From: boris Date: Sun, 27 Jun 2021 16:07:42 -0600 Subject: [PATCH] convert bash's ANSI-C quoted strings to their value --- README.md | 24 ++++++++- browser.js | 3 +- deno.ts | 3 +- lib/index.ts | 4 +- lib/string-utils.ts | 69 ++++++++++++++++++++++++ lib/yargs-parser-types.ts | 7 ++- lib/yargs-parser.ts | 25 ++++++--- test/yargs-parser.cjs | 110 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 233 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 26148407..c732f312 100644 --- a/README.md +++ b/README.md @@ -248,7 +248,7 @@ $ node example.js --foo=99.3 * default: `true` * key: `parse-positional-numbers` -Should positional keys that look like numbers be treated as such. +Should positional keys that look like numbers be treated as such? ```console $ node example.js 99.3 @@ -502,6 +502,28 @@ $ node example.js --unknown-option --known-option 2 --string-option --unknown-op { _: ['--unknown-option'], knownOption: 2, stringOption: '--unknown-option2' } ``` +### parse bash ANSI-C strings + +* default: `false` +* key: `parse-bash-ansi-c-strings` + +Should arguments that look like [ANSI-C quoted strings](https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html) (a bash-only feature) be treated as such? + +_if disabled:_ + +```console +> const parser = require('yargs-parser') +> parser("--foo $'hello world'") +{ _: [], foo: "$'hello world'" } +``` + +_if enabled:_ + +```console +> parser("--foo $'hello world'", {configuration: {'parse-bash-ansi-c-strings': true}}) +{ _: [], foo: 'hello world' } +``` + ## Supported Node.js Versions Libraries in this ecosystem make a best effort to track diff --git a/browser.js b/browser.js index 241202c7..16f8ad5c 100644 --- a/browser.js +++ b/browser.js @@ -2,7 +2,7 @@ // specific libraries, such as "path". // // TODO: figure out reasonable web equivalents for "resolve", "normalize", etc. -import { camelCase, decamelize, looksLikeNumber } from './build/lib/string-utils.js' +import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './build/lib/string-utils.js' import { YargsParser } from './build/lib/yargs-parser.js' const parser = new YargsParser({ cwd: () => { return '' }, @@ -25,5 +25,6 @@ yargsParser.detailed = function (args, opts) { yargsParser.camelCase = camelCase yargsParser.decamelize = decamelize yargsParser.looksLikeNumber = looksLikeNumber +yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString export default yargsParser diff --git a/deno.ts b/deno.ts index 1074dc64..0a8736fe 100644 --- a/deno.ts +++ b/deno.ts @@ -3,7 +3,7 @@ // // TODO: find reasonable replacement for require logic. import * as path from 'https://deno.land/std/path/mod.ts' -import { camelCase, decamelize, looksLikeNumber } from './build/lib/string-utils.js' +import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './build/lib/string-utils.js' import { YargsParser } from './build/lib/yargs-parser.js' import type { Arguments, ArgsInput, Parser, Options, DetailedArguments } from './build/lib/yargs-parser-types.d.ts' @@ -34,5 +34,6 @@ yargsParser.detailed = function (args: ArgsInput, opts?: Partial): Deta yargsParser.camelCase = camelCase yargsParser.decamelize = decamelize yargsParser.looksLikeNumber = looksLikeNumber +yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString export default yargsParser diff --git a/lib/index.ts b/lib/index.ts index c0bfac81..83e9d6e7 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -11,7 +11,7 @@ import { format } from 'util' import { readFileSync } from 'fs' import { normalize, resolve } from 'path' import { ArgsInput, Arguments, Parser, Options, DetailedArguments } from './yargs-parser-types.js' -import { camelCase, decamelize, looksLikeNumber } from './string-utils.js' +import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './string-utils.js' import { YargsParser } from './yargs-parser.js' // See https://github.com/yargs/yargs-parser#supported-nodejs-versions for our @@ -58,4 +58,6 @@ yargsParser.detailed = function (args: ArgsInput, opts?: Partial): Deta yargsParser.camelCase = camelCase yargsParser.decamelize = decamelize yargsParser.looksLikeNumber = looksLikeNumber +yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString + export default yargsParser diff --git a/lib/string-utils.ts b/lib/string-utils.ts index 5932a4c7..0a84d0bc 100644 --- a/lib/string-utils.ts +++ b/lib/string-utils.ts @@ -61,3 +61,72 @@ export function looksLikeNumber (x: null | undefined | number | string): boolean if (/^0[^.]/.test(x)) return false return /^[-]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x) } + +// ANSI-C quoted strings are a bash-only feature and have the form $'some text' +// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html +// +// https://git.savannah.gnu.org/cgit/bash.git/tree/lib/sh/strtrans.c +export function parseAnsiCQuotedString (str: string): string { + function unescapeChar (m: string): string { + switch (m.charAt(1)) { + case '\\': + return '\\' + case 'a': + return '\a' // eslint-disable-line + case 'b': + return '\b' + case 'e': + case 'E': + return '\x1B' + case 'f': + return '\f' + case 'n': + return '\n' + case 'r': + return '\r' + case 't': + return '\t' + case 'v': + return '\v' + case "'": + return "'" + case '"': + return '"' + case '?': + return '?' + case 'c': + // bash handles all characters by considering the first byte + // of its UTF-8 input and can produce invalid UTF-8, whereas + // JavaScript stores strings in UTF-16 + if (m.codePointAt(2)! > 127) { + throw Error("non-ASCII control character in ANSI-C quoted string: '\\u{" + m.codePointAt(2)!.toString(16) + "}'") + } + // If this produces a 0x00 (null) character, it will cause bash to + // terminate the string at that character, but we return the null + // character in the result. + return m[2] === '?' ? '\x7F' : String.fromCodePoint(m[2].toUpperCase().codePointAt(0)! & 0b00011111) + case 'x': + case 'u': + case 'U': + // Hexadecimal character literal + // Unlike bash, this will error if the the code point is greater than 10FFFF + return String.fromCodePoint(parseInt(m.slice(2), 16)) + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + // Octal character literal + return String.fromCodePoint(parseInt(m.slice(1), 8) % 256) + default: + // There must be a mis-match between ANSI_BACKSLASHES and the switch statement + throw Error('unhandled character in ANSI-C escape code: ' + JSON.stringify(m)) + } + } + + const ANSI_BACKSLASHES = /\\(\\|a|b|e|E|f|n|r|t|v|'|"|\?|[0-7]{1,3}|x[0-9A-Fa-f]{1,2}|u[0-9A-Fa-f]{1,4}|U[0-9A-Fa-f]{1,8}|c.)/gs + return str.substring(2, str.length - 1).replace(ANSI_BACKSLASHES, unescapeChar) +} diff --git a/lib/yargs-parser-types.ts b/lib/yargs-parser-types.ts index 9e03ff2c..f90c6f51 100644 --- a/lib/yargs-parser-types.ts +++ b/lib/yargs-parser-types.ts @@ -72,10 +72,12 @@ export interface Configuration { 'nargs-eats-options': boolean; /** The prefix to use for negated boolean variables. Default is `'no-'` */ 'negation-prefix': string; - /** Should positional values that look like numbers be parsed? Default is `true` */ - 'parse-positional-numbers': boolean; + /** Should positional values that look ANSI-C strings (a bash-only feature) be parsed? Default is `false` */ + 'parse-bash-ansi-c-strings': boolean; /** Should keys that look like numbers be treated as such? Default is `true` */ 'parse-numbers': boolean; + /** Should positional values that look like numbers be parsed? Default is `true` */ + 'parse-positional-numbers': boolean; /** Should unparsed flags be stored in -- or _? Default is `false` */ 'populate--': boolean; /** Should a placeholder be added for keys not set via the corresponding CLI argument? Default is `false` */ @@ -155,6 +157,7 @@ export interface Parser { camelCase(str: string): string; decamelize(str: string, joinString?: string): string; looksLikeNumber(x: null | undefined | number | string): boolean; + parseAnsiCQuotedString(str: string): string; } export type StringFlag = Dictionary; diff --git a/lib/yargs-parser.ts b/lib/yargs-parser.ts index 1499fc60..30ac855f 100644 --- a/lib/yargs-parser.ts +++ b/lib/yargs-parser.ts @@ -29,7 +29,12 @@ import type { YargsParserMixin } from './yargs-parser-types.js' import { DefaultValuesForTypeKey } from './yargs-parser-types.js' -import { camelCase, decamelize, looksLikeNumber } from './string-utils.js' +import { + camelCase, + decamelize, + looksLikeNumber, + parseAnsiCQuotedString +} from './string-utils.js' let mixin: YargsParserMixin export class YargsParser { @@ -75,6 +80,7 @@ export class YargsParser { 'negation-prefix': 'no-', 'parse-numbers': true, 'parse-positional-numbers': true, + 'parse-bash-ansi-c-strings': false, 'populate--': false, 'set-placeholder-key': false, 'short-option-groups': true, @@ -607,11 +613,18 @@ export class YargsParser { function processValue (key: string, val: any) { // strings may be quoted, clean this up as we assign values. - if (typeof val === 'string' && - (val[0] === "'" || val[0] === '"') && - val[val.length - 1] === val[0] - ) { - val = val.substring(1, val.length - 1) + if (typeof val === 'string') { + if ((val[0] === "'" || val[0] === '"') && + val[val.length - 1] === val[0] + ) { + val = val.substring(1, val.length - 1) + } else if (configuration['parse-bash-ansi-c-strings'] && val.slice(0, 2) === "$'" && val[val.length - 1] === "'") { + try { + val = parseAnsiCQuotedString(val) + } catch (err) { + error = err + } + } } // handle parsing boolean arguments --foo=true --bar false. diff --git a/test/yargs-parser.cjs b/test/yargs-parser.cjs index 95bee24a..cd32d182 100644 --- a/test/yargs-parser.cjs +++ b/test/yargs-parser.cjs @@ -3589,6 +3589,116 @@ describe('yargs-parser', function () { }) }) + // see: https://github.com/yargs/yargs-parser/issues/346 + describe('ANSI-C quoted strings', () => { + it('does not parse ANSI-C quoted strings by default', function () { + const args = parser(["$'\\n'"]) + args._[0].should.equal("$'\\n'") + const args2 = parser("--foo $'\\t'") + args2.foo.should.equal("$'\\t'") + }) + + it('handles bash ANSI-C quoted strings', () => { + const args = parser("--foo $'text with \\n newline'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args.foo.should.equal('text with \n newline') + + // Double quotes shouldn't work + const args2 = parser('--foo $"text without \\n newline"', { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args2.foo.should.equal('$"text without \\n newline"') + + const characters = '\\\\' + '\\a' + '\\b' + '\\e' + '\\E' + '\\f' + '\\n' + '\\r' + '\\t' + '\\v' + "\\'" + '\\"' + '\\?' + const args3 = parser("--foo $'" + characters + "'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args3.foo.should.equal('\\\a\b\u001b\u001b\f\n\r\t\v\'"?') // eslint-disable-line + + const args4 = parser("--foo $'text \\xFFFF with \\xFF hex'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args4.foo.should.equal('text \u00FFFF with \u00FF hex') + const args5 = parser("--foo $'text \\uFFFFFF\\uFFFF with \\uFF hex'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args5.foo.should.equal('text \uFFFFFF\uFFFF with \u00FF hex') + const args6 = parser("--foo $'text \\U10FFFF\\UFFFF with \\U00FF hex'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + const longCodePoint = String.fromCodePoint(0x10FFFF) + args6.foo.should.equal(`text ${longCodePoint}\uFFFF with \u00FF hex`) + + const args7 = parser("--foo $'text \\cAB \\cz with \\c12 control \\c011 chars'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args7.foo.should.equal('text \u0001B \u001A with \u00112 control \u001011 chars') + + const args8 = parser("--foo $'text \\0 \\001 with \\12 \\123 \\129 octal'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args8.foo.should.equal('text \u0000 \u0001 with \u000A \u0053 \u000A9 octal') + }) + + it('handles edge case characters in control code escapes', () => { + const args = parser("--foo $'\\c\\t'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args.foo.should.equal('\x1Ct') + + // Check that the regex matches whitespace characters + const args2 = parser("--foo $'\\c\nt'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args2.foo.should.equal('\nt') + + const args3 = parser("--foo $'\\c '", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args3.foo.should.equal('\x00') + + // This is a special case + const args4 = parser("--foo $'\\c?'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args4.foo.should.equal('\x7F') + }) + + it('throws error for non-ASCII characters in control code escapes', () => { + const args = parser.detailed("--foo $'\\c\u0080'", { + configuration: { + 'parse-bash-ansi-c-strings': true + } + }) + args.error.message.should.match(/non-ASCII control character in ANSI-C quoted/) + }) + }) + // see: https://github.com/yargs/yargs-parser/issues/144 it('number/string types should use default when no right-hand value', () => { let argv = parser(['--foo'], {