Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support bash's ANSI-C quoted strings #366

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ $ node example.js --foo=99.3
* default: `true`
* key: `parse-positional-numbers`

Should positional keys that look like numbers be treated as such.
Should positional keys that look like numbers be treated as such?

```console
$ node example.js 99.3
Expand Down Expand Up @@ -502,6 +502,28 @@ $ node example.js --unknown-option --known-option 2 --string-option --unknown-op
{ _: ['--unknown-option'], knownOption: 2, stringOption: '--unknown-option2' }
```

### parse bash ANSI-C strings

* default: `false`
* key: `parse-bash-ansi-c-strings`

Should arguments that look like [ANSI-C quoted strings](https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html) (a bash-only feature) be treated as such?

_if disabled:_

```console
> const parser = require('yargs-parser')
> parser("--foo $'hello world'")
{ _: [], foo: "$'hello world'" }
```

_if enabled:_

```console
> parser("--foo $'hello world'", {configuration: {'parse-bash-ansi-c-strings': true}})
{ _: [], foo: 'hello world' }
```

## Supported Node.js Versions

Libraries in this ecosystem make a best effort to track
Expand Down
3 changes: 2 additions & 1 deletion browser.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// specific libraries, such as "path".
//
// TODO: figure out reasonable web equivalents for "resolve", "normalize", etc.
import { camelCase, decamelize, looksLikeNumber } from './build/lib/string-utils.js'
import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './build/lib/string-utils.js'
import { YargsParser } from './build/lib/yargs-parser.js'
const parser = new YargsParser({
cwd: () => { return '' },
Expand All @@ -25,5 +25,6 @@ yargsParser.detailed = function (args, opts) {
yargsParser.camelCase = camelCase
yargsParser.decamelize = decamelize
yargsParser.looksLikeNumber = looksLikeNumber
yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString

export default yargsParser
3 changes: 2 additions & 1 deletion deno.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//
// TODO: find reasonable replacement for require logic.
import * as path from 'https://deno.land/std/path/mod.ts'
import { camelCase, decamelize, looksLikeNumber } from './build/lib/string-utils.js'
import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './build/lib/string-utils.js'
import { YargsParser } from './build/lib/yargs-parser.js'
import type { Arguments, ArgsInput, Parser, Options, DetailedArguments } from './build/lib/yargs-parser-types.d.ts'

Expand Down Expand Up @@ -34,5 +34,6 @@ yargsParser.detailed = function (args: ArgsInput, opts?: Partial<Options>): Deta
yargsParser.camelCase = camelCase
yargsParser.decamelize = decamelize
yargsParser.looksLikeNumber = looksLikeNumber
yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString

export default yargsParser
4 changes: 3 additions & 1 deletion lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { format } from 'util'
import { readFileSync } from 'fs'
import { normalize, resolve } from 'path'
import { ArgsInput, Arguments, Parser, Options, DetailedArguments } from './yargs-parser-types.js'
import { camelCase, decamelize, looksLikeNumber } from './string-utils.js'
import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './string-utils.js'
import { YargsParser } from './yargs-parser.js'

// See https://github.com/yargs/yargs-parser#supported-nodejs-versions for our
Expand Down Expand Up @@ -58,4 +58,6 @@ yargsParser.detailed = function (args: ArgsInput, opts?: Partial<Options>): Deta
yargsParser.camelCase = camelCase
yargsParser.decamelize = decamelize
yargsParser.looksLikeNumber = looksLikeNumber
yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString

export default yargsParser
69 changes: 69 additions & 0 deletions lib/string-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,72 @@ export function looksLikeNumber (x: null | undefined | number | string): boolean
if (/^0[^.]/.test(x)) return false
return /^[-]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x)
}

// ANSI-C quoted strings are a bash-only feature and have the form $'some text'
// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
//
// https://git.savannah.gnu.org/cgit/bash.git/tree/lib/sh/strtrans.c
export function parseAnsiCQuotedString (str: string): string {
function unescapeChar (m: string): string {
switch (m.charAt(1)) {
case '\\':
return '\\'
case 'a':
return '\a' // eslint-disable-line
case 'b':
return '\b'
case 'e':
case 'E':
return '\x1B'
case 'f':
return '\f'
case 'n':
return '\n'
case 'r':
return '\r'
case 't':
return '\t'
case 'v':
return '\v'
case "'":
return "'"
case '"':
return '"'
case '?':
return '?'
case 'c':
// bash handles all characters by considering the first byte
// of its UTF-8 input and can produce invalid UTF-8, whereas
// JavaScript stores strings in UTF-16
if (m.codePointAt(2)! > 127) {
throw Error("non-ASCII control character in ANSI-C quoted string: '\\u{" + m.codePointAt(2)!.toString(16) + "}'")
}
// If this produces a 0x00 (null) character, it will cause bash to
// terminate the string at that character, but we return the null
// character in the result.
return m[2] === '?' ? '\x7F' : String.fromCodePoint(m[2].toUpperCase().codePointAt(0)! & 0b00011111)
case 'x':
case 'u':
case 'U':
// Hexadecimal character literal
// Unlike bash, this will error if the the code point is greater than 10FFFF
return String.fromCodePoint(parseInt(m.slice(2), 16))
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
// Octal character literal
return String.fromCodePoint(parseInt(m.slice(1), 8) % 256)
default:
// There must be a mis-match between ANSI_BACKSLASHES and the switch statement
throw Error('unhandled character in ANSI-C escape code: ' + JSON.stringify(m))
}
}

const ANSI_BACKSLASHES = /\\(\\|a|b|e|E|f|n|r|t|v|'|"|\?|[0-7]{1,3}|x[0-9A-Fa-f]{1,2}|u[0-9A-Fa-f]{1,4}|U[0-9A-Fa-f]{1,8}|c.)/gs
return str.substring(2, str.length - 1).replace(ANSI_BACKSLASHES, unescapeChar)
}
7 changes: 5 additions & 2 deletions lib/yargs-parser-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,12 @@ export interface Configuration {
'nargs-eats-options': boolean;
/** The prefix to use for negated boolean variables. Default is `'no-'` */
'negation-prefix': string;
/** Should positional values that look like numbers be parsed? Default is `true` */
'parse-positional-numbers': boolean;
/** Should positional values that look ANSI-C strings (a bash-only feature) be parsed? Default is `false` */
'parse-bash-ansi-c-strings': boolean;
/** Should keys that look like numbers be treated as such? Default is `true` */
'parse-numbers': boolean;
/** Should positional values that look like numbers be parsed? Default is `true` */
'parse-positional-numbers': boolean;
/** Should unparsed flags be stored in -- or _? Default is `false` */
'populate--': boolean;
/** Should a placeholder be added for keys not set via the corresponding CLI argument? Default is `false` */
Expand Down Expand Up @@ -155,6 +157,7 @@ export interface Parser {
camelCase(str: string): string;
decamelize(str: string, joinString?: string): string;
looksLikeNumber(x: null | undefined | number | string): boolean;
parseAnsiCQuotedString(str: string): string;
}

export type StringFlag = Dictionary<string[]>;
Expand Down
25 changes: 19 additions & 6 deletions lib/yargs-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@ import type {
YargsParserMixin
} from './yargs-parser-types.js'
import { DefaultValuesForTypeKey } from './yargs-parser-types.js'
import { camelCase, decamelize, looksLikeNumber } from './string-utils.js'
import {
camelCase,
decamelize,
looksLikeNumber,
parseAnsiCQuotedString
} from './string-utils.js'

let mixin: YargsParserMixin
export class YargsParser {
Expand Down Expand Up @@ -75,6 +80,7 @@ export class YargsParser {
'negation-prefix': 'no-',
'parse-numbers': true,
'parse-positional-numbers': true,
'parse-bash-ansi-c-strings': false,
'populate--': false,
'set-placeholder-key': false,
'short-option-groups': true,
Expand Down Expand Up @@ -607,11 +613,18 @@ export class YargsParser {

function processValue (key: string, val: any) {
// strings may be quoted, clean this up as we assign values.
if (typeof val === 'string' &&
(val[0] === "'" || val[0] === '"') &&
val[val.length - 1] === val[0]
) {
val = val.substring(1, val.length - 1)
if (typeof val === 'string') {
if ((val[0] === "'" || val[0] === '"') &&
val[val.length - 1] === val[0]
) {
val = val.substring(1, val.length - 1)
} else if (configuration['parse-bash-ansi-c-strings'] && val.slice(0, 2) === "$'" && val[val.length - 1] === "'") {
try {
val = parseAnsiCQuotedString(val)
} catch (err) {
error = err
}
}
}

// handle parsing boolean arguments --foo=true --bar false.
Expand Down
110 changes: 110 additions & 0 deletions test/yargs-parser.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -3589,6 +3589,116 @@ describe('yargs-parser', function () {
})
})

// see: https://github.com/yargs/yargs-parser/issues/346
describe('ANSI-C quoted strings', () => {
it('does not parse ANSI-C quoted strings by default', function () {
const args = parser(["$'\\n'"])
args._[0].should.equal("$'\\n'")
const args2 = parser("--foo $'\\t'")
args2.foo.should.equal("$'\\t'")
})

it('handles bash ANSI-C quoted strings', () => {
const args = parser("--foo $'text with \\n newline'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args.foo.should.equal('text with \n newline')

// Double quotes shouldn't work
const args2 = parser('--foo $"text without \\n newline"', {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args2.foo.should.equal('$"text without \\n newline"')

const characters = '\\\\' + '\\a' + '\\b' + '\\e' + '\\E' + '\\f' + '\\n' + '\\r' + '\\t' + '\\v' + "\\'" + '\\"' + '\\?'
const args3 = parser("--foo $'" + characters + "'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args3.foo.should.equal('\\\a\b\u001b\u001b\f\n\r\t\v\'"?') // eslint-disable-line

const args4 = parser("--foo $'text \\xFFFF with \\xFF hex'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args4.foo.should.equal('text \u00FFFF with \u00FF hex')
const args5 = parser("--foo $'text \\uFFFFFF\\uFFFF with \\uFF hex'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args5.foo.should.equal('text \uFFFFFF\uFFFF with \u00FF hex')
const args6 = parser("--foo $'text \\U10FFFF\\UFFFF with \\U00FF hex'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
const longCodePoint = String.fromCodePoint(0x10FFFF)
args6.foo.should.equal(`text ${longCodePoint}\uFFFF with \u00FF hex`)

const args7 = parser("--foo $'text \\cAB \\cz with \\c12 control \\c011 chars'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args7.foo.should.equal('text \u0001B \u001A with \u00112 control \u001011 chars')

const args8 = parser("--foo $'text \\0 \\001 with \\12 \\123 \\129 octal'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args8.foo.should.equal('text \u0000 \u0001 with \u000A \u0053 \u000A9 octal')
})

it('handles edge case characters in control code escapes', () => {
const args = parser("--foo $'\\c\\t'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args.foo.should.equal('\x1Ct')

// Check that the regex matches whitespace characters
const args2 = parser("--foo $'\\c\nt'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args2.foo.should.equal('\nt')

const args3 = parser("--foo $'\\c '", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args3.foo.should.equal('\x00')

// This is a special case
const args4 = parser("--foo $'\\c?'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args4.foo.should.equal('\x7F')
})

it('throws error for non-ASCII characters in control code escapes', () => {
const args = parser.detailed("--foo $'\\c\u0080'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args.error.message.should.match(/non-ASCII control character in ANSI-C quoted/)
})
})

// see: https://github.com/yargs/yargs-parser/issues/144
it('number/string types should use default when no right-hand value', () => {
let argv = parser(['--foo'], {
Expand Down