diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b35b0a4cf7fcb6..09c3b2a4c26a85 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -143,6 +143,7 @@ # Actions /.github/workflows/* @nodejs/actions +/.github/workflows/create-release-proposal.yml @nodejs/releasers /tools/actions/* @nodejs/actions # Test runner diff --git a/.github/workflows/coverage-linux-without-intl.yml b/.github/workflows/coverage-linux-without-intl.yml index ddd85fb8a4ff0e..1977eda3f97e03 100644 --- a/.github/workflows/coverage-linux-without-intl.yml +++ b/.github/workflows/coverage-linux-without-intl.yml @@ -79,7 +79,6 @@ jobs: - name: Clean tmp run: rm -rf coverage/tmp && rm -rf out - name: Upload - uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4.6.0 + uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a # v5.0.7 with: directory: ./coverage - token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/coverage-linux.yml b/.github/workflows/coverage-linux.yml index 153504ba4280d6..164c0b540a9f45 100644 --- a/.github/workflows/coverage-linux.yml +++ b/.github/workflows/coverage-linux.yml @@ -79,7 +79,6 @@ jobs: - name: Clean tmp run: rm -rf coverage/tmp && rm -rf out - name: Upload - uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4.6.0 + uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a # v5.0.7 with: directory: ./coverage - token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/coverage-windows.yml b/.github/workflows/coverage-windows.yml index 84feb7b09018de..fada006e321520 100644 --- a/.github/workflows/coverage-windows.yml +++ b/.github/workflows/coverage-windows.yml @@ -71,7 +71,6 @@ jobs: - name: Clean tmp run: npx rimraf ./coverage/tmp - name: Upload - uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4.6.0 + uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a # v5.0.7 with: directory: ./coverage - token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/create-release-proposal.yml b/.github/workflows/create-release-proposal.yml index 5f0f80eed24c95..f3add22090cbc0 100644 --- a/.github/workflows/create-release-proposal.yml +++ b/.github/workflows/create-release-proposal.yml @@ -1,7 +1,5 @@ # This action requires the following secrets to be set on the repository: -# GH_USER_NAME: GitHub user whose Jenkins and GitHub token are defined below # GH_USER_TOKEN: GitHub user token, to be used by ncu and to push changes -# JENKINS_TOKEN: Jenkins token, to be used to check CI status name: Create Release Proposal @@ -11,12 +9,10 @@ on: release-line: required: true type: number - default: 23 description: 'The release line (without dots or prefix). e.g: 22' release-date: required: true type: string - default: YYYY-MM-DD description: The release date in YYYY-MM-DD format concurrency: ${{ github.workflow }} @@ -26,6 +22,7 @@ env: permissions: contents: write + pull-requests: write jobs: releasePrepare: @@ -36,16 +33,14 @@ jobs: RELEASE_LINE: ${{ inputs.release-line }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ env.STAGING_BRANCH }} - # Needs the whole git history for ncu to work - # See https://github.com/nodejs/node-core-utils/pull/486 - fetch-depth: 0 + persist-credentials: false # Install dependencies - name: Install Node.js - uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3 + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: node-version: ${{ env.NODE_VERSION }} @@ -56,23 +51,19 @@ jobs: run: | ncu-config set branch "${RELEASE_BRANCH}" ncu-config set upstream origin - ncu-config set username "$USERNAME" + ncu-config set username "$GITHUB_ACTOR" ncu-config set token "$GH_TOKEN" - ncu-config set jenkins_token "$JENKINS_TOKEN" ncu-config set repo "$(echo "$GITHUB_REPOSITORY" | cut -d/ -f2)" ncu-config set owner "${GITHUB_REPOSITORY_OWNER}" env: - USERNAME: ${{ secrets.JENKINS_USER }} - GH_TOKEN: ${{ secrets.GH_USER_TOKEN }} - JENKINS_TOKEN: ${{ secrets.JENKINS_TOKEN }} + GH_TOKEN: ${{ github.token }} - name: Set up ghauth config (Ubuntu) run: | - mkdir -p ~/.config/changelog-maker/ - echo '{ - "user": "'$(ncu-config get username)'", - "token": "'$(ncu-config get token)'" - }' > ~/.config/changelog-maker/config.json + mkdir -p "${XDG_CONFIG_HOME:-~/.config}/changelog-maker" + echo '{}' | jq '{user: env.GITHUB_ACTOR, token: env.TOKEN}' > "${XDG_CONFIG_HOME:-~/.config}/changelog-maker/config.json" + env: + TOKEN: ${{ github.token }} - name: Setup git author run: | @@ -80,7 +71,12 @@ jobs: git config --local user.name "Node.js GitHub Bot" - name: Start git node release prepare + # The curl command is to make sure we run the version of the script corresponding to the current workflow. run: | - ./tools/actions/create-release.sh "${RELEASE_DATE}" "${RELEASE_LINE}" + git update-index --assume-unchanged tools/actions/create-release.sh + curl -fsSLo tools/actions/create-release.sh https://github.com/${GITHUB_REPOSITORY}/raw/${GITHUB_SHA}/tools/actions/create-release.sh + ./tools/actions/create-release.sh "${RELEASE_DATE}" "${RELEASE_LINE}" "${GITHUB_ACTOR}" env: - GH_TOKEN: ${{ secrets.GH_USER_TOKEN }} + GH_TOKEN: ${{ github.token }} + # We want the bot to push the push the release commit so CI runs on it. + BOT_TOKEN: ${{ secrets.GH_USER_TOKEN }} diff --git a/.github/workflows/lint-release-proposal.yml b/.github/workflows/lint-release-proposal.yml new file mode 100644 index 00000000000000..1ea2b4b1b173e2 --- /dev/null +++ b/.github/workflows/lint-release-proposal.yml @@ -0,0 +1,62 @@ +name: Linters (release proposals) + +on: + push: + branches: + - v[0-9]+.[0-9]+.[0-9]+-proposal + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +env: + PYTHON_VERSION: '3.12' + NODE_VERSION: lts/* + +permissions: + contents: read + +jobs: + lint-release-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + - name: Lint release commit title format + run: | + EXPECTED_TITLE='^[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}, Version [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+ (\(Current|'.+' \(LTS)\)$' + echo "Expected commit title format: $EXPECTED_TITLE" + COMMIT_SUBJECT="$(git --no-pager log -1 --format=%s)" + echo "Actual: $ACTUAL" + echo "$COMMIT_SUBJECT" | grep -q -E "$EXPECTED_TITLE" + echo "COMMIT_SUBJECT=$COMMIT_SUBJECT" >> "$GITHUB_ENV" + - name: Lint release commit message trailers + run: | + EXPECTED_TRAILER="^PR-URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/pull/[[:digit:]]+\$" + echo "Expected trailer format: $EXPECTED_TRAILER" + ACTUAL="$(git --no-pager log -1 --format=%b | git interpret-trailers --parse --no-divider)" + echo "Actual: $ACTUAL" + echo "$ACTUAL" | grep -E -q "$EXPECTED_TRAILER" + + PR_URL="${ACTUAL:8}" + PR_HEAD="$(gh pr view "$PR_URL" --json headRefOid -q .headRefOid)" + echo "Head of $PR_URL: $PR_HEAD" + echo "Current commit: $GITHUB_SHA" + [ "$PR_HEAD" = "$GITHUB_SHA" ] + env: + GH_TOKEN: ${{ github.token }} + - name: Validate CHANGELOG + id: releaser-info + run: | + EXPECTED_CHANGELOG_TITLE_INTRO="## $COMMIT_SUBJECT, @" + echo "Expected CHANGELOG section title: $EXPECTED_CHANGELOG_TITLE_INTRO" + CHANGELOG_TITLE="$(grep "$EXPECTED_CHANGELOG_TITLE_INTRO" "doc/changelogs/CHANGELOG_V${COMMIT_SUBJECT:20:2}.md")" + echo "Actual: $CHANGELOG_TITLE" + [ "${CHANGELOG_TITLE%%@*}@" = "$EXPECTED_CHANGELOG_TITLE_INTRO" ] + - name: Verify NODE_VERSION_IS_RELEASE bit is correctly set + run: | + grep -q '^#define NODE_VERSION_IS_RELEASE 1$' src/node_version.h + - name: Check for placeholders in documentation + run: | + ! grep "REPLACEME" doc/api/*.md diff --git a/.github/workflows/major-release.yml b/.github/workflows/major-release.yml new file mode 100644 index 00000000000000..a90be1798fac85 --- /dev/null +++ b/.github/workflows/major-release.yml @@ -0,0 +1,48 @@ +name: Major Release + +on: + schedule: + - cron: 0 0 15 2,8 * # runs at midnight UTC every 15 February and 15 August + +permissions: + contents: read + +jobs: + create-issue: + runs-on: ubuntu-latest + permissions: + issues: write + steps: + - name: Check for release schedule + id: check-date + run: | + # Get the current month and day + MONTH=$(date +'%m') + DAY=$(date +'%d') + # We'll create the reminder issue two months prior the release + if [[ "$MONTH" == "02" || "$MONTH" == "08" ]] && [[ "$DAY" == "15" ]]; then + echo "create_issue=true" >> "$GITHUB_ENV" + fi + - name: Retrieve next major release info from nodejs/Release + if: env.create_issue == 'true' + run: | + curl -L https://github.com/nodejs/Release/raw/HEAD/schedule.json | \ + jq -r 'to_entries | map(select(.value.start | strptime("%Y-%m-%d") | mktime > now)) | first | "VERSION=" + .key + "\nRELEASE_DATE=" + .value.start' >> "$GITHUB_ENV" + - name: Compute max date for landing semver-major PRs + if: env.create_issue == 'true' + run: | + echo "PR_MAX_DATE=$(date -d "$RELEASE_DATE -1 month" +%Y-%m-%d)" >> "$GITHUB_ENV" + - name: Create release announcement issue + if: env.create_issue == 'true' + run: | + gh issue create --repo "${GITHUB_REPOSITORY}" \ + --title "Upcoming Node.js Major Release ($VERSION)" \ + --body-file -< temp-output - # cat temp-output - # tail -n1 temp-output | grep "NEW_VERSION=" >> "$GITHUB_ENV" || true - # rm temp-output + - id: libuv + subsystem: deps + label: dependencies + run: | + ./tools/dep_updaters/update-libuv.sh > temp-output + cat temp-output + tail -n1 temp-output | grep "NEW_VERSION=" >> "$GITHUB_ENV" || true + rm temp-output - id: llhttp subsystem: deps label: dependencies diff --git a/.github/workflows/update-wpt.yml b/.github/workflows/update-wpt.yml new file mode 100644 index 00000000000000..72ec030e9d645a --- /dev/null +++ b/.github/workflows/update-wpt.yml @@ -0,0 +1,83 @@ +name: WPT update + +on: + schedule: + # Run once a week at 12:00 AM UTC on Sunday. + - cron: 0 0 * * * + workflow_dispatch: + inputs: + subsystems: + description: Subsystem to run the update for + required: false + default: '["url", "WebCryptoAPI"]' + +permissions: + contents: read + +env: + NODE_VERSION: lts/* + +jobs: + wpt-subsystem-update: + if: github.repository == 'nodejs/node' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + subsystem: ${{ fromJSON(github.event.inputs.subsystems || '["url", "WebCryptoAPI"]') }} + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + + - name: Install Node.js + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Install @node-core/utils + run: npm install -g @node-core/utils + + - name: Setup @node-core/utils + run: | + ncu-config set username "$USERNAME" + ncu-config set token "$GH_TOKEN" + ncu-config set owner "${GITHUB_REPOSITORY_OWNER}" + ncu-config set repo "$(echo "$GITHUB_REPOSITORY" | cut -d/ -f2)" + env: + USERNAME: ${{ secrets.JENKINS_USER }} + GH_TOKEN: ${{ secrets.GH_USER_TOKEN }} + + - name: Update WPT for subsystem ${{ matrix.subsystem }} + run: | + git node wpt "$SUBSYSTEM" + env: + SUBSYSTEM: ${{ matrix.subsystem }} + + - name: Retrieve new version commit + run: | + new_version="$( + node -p 'require("./test/fixtures/wpt/versions.json")[process.argv[1]].commit' "$SUBSYSTEM" + )" + { + echo "long_version=$new_version" + echo "short_version=${new_version:0:10}" + } >> "$GITHUB_ENV" + env: + SUBSYSTEM: ${{ matrix.subsystem }} + + - name: Open or update PR for the subsystem update + uses: gr2m/create-or-update-pull-request-action@77596e3166f328b24613f7082ab30bf2d93079d5 + with: + branch: actions/update-wpt-${{ matrix.subsystem }} + author: Node.js GitHub Bot + title: 'test: update WPT for ${{ matrix.subsystem }} to ${{ env.short_version }}' + commit-message: 'test: update WPT for ${{ matrix.subsystem }} to ${{ env.short_version }}' + labels: test + update-pull-request-title-and-body: true + body: > + This is an automated update of the WPT for ${{ matrix.subsystem }} to + https://github.com/web-platform-tests/wpt/commit/${{ env.long_version }}. + env: + GITHUB_TOKEN: ${{ secrets.GH_USER_TOKEN }} diff --git a/BUILDING.md b/BUILDING.md index 1eab935e6c61fd..7dc0e07fd4008c 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -686,7 +686,7 @@ easily. These files will install the following To install Node.js prerequisites from Powershell Terminal: ```powershell -winget configure .\configuration.dsc.yaml +winget configure .\configurations\configuration.dsc.yaml ``` Alternatively, you can use [Dev Home](https://learn.microsoft.com/en-us/windows/dev-home/) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76ee0d1b59e50b..6896e94b0abd72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,8 @@ release. -22.12.0
+22.13.0
+22.12.0
22.11.0
22.10.0
22.9.0
diff --git a/Makefile b/Makefile index 32dc4e5b9452f6..cf9bc749b722b5 100644 --- a/Makefile +++ b/Makefile @@ -171,8 +171,7 @@ with-code-cache test-code-cache: $(warning '$@' target is a noop) out/Makefile: config.gypi common.gypi common_node.gypi node.gyp \ - deps/uv/uv.gyp deps/llhttp/llhttp.gyp deps/zlib/zlib.gyp \ - deps/simdutf/simdutf.gyp deps/ada/ada.gyp deps/nbytes/nbytes.gyp \ + deps/*/*.gyp \ tools/v8_gypfiles/toolchain.gypi \ tools/v8_gypfiles/features.gypi \ tools/v8_gypfiles/inspector.gypi tools/v8_gypfiles/v8.gyp @@ -295,6 +294,7 @@ coverage-report-js: ## Report JavaScript coverage results. cctest: all ## Run the C++ tests using the built `cctest` executable. @out/$(BUILDTYPE)/$@ --gtest_filter=$(GTEST_FILTER) $(NODE) ./test/embedding/test-embedding.js + $(NODE) ./test/sqlite/test-sqlite-extensions.mjs .PHONY: list-gtests list-gtests: ## List all available C++ gtests. @@ -575,6 +575,7 @@ test-ci: | clear-stalled bench-addons-build build-addons build-js-native-api-tes --mode=$(BUILDTYPE_LOWER) --flaky-tests=$(FLAKY_TESTS) \ $(TEST_CI_ARGS) $(CI_JS_SUITES) $(CI_NATIVE_SUITES) $(CI_DOC) $(NODE) ./test/embedding/test-embedding.js + $(NODE) ./test/sqlite/test-sqlite-extensions.mjs $(info Clean up any leftover processes, error if found.) ps awwx | grep Release/node | grep -v grep | cat @PS_OUT=`ps awwx | grep Release/node | grep -v grep | awk '{print $$1}'`; \ @@ -933,6 +934,9 @@ else ifeq ($(findstring riscv64,$(UNAME_M)),riscv64) DESTCPU ?= riscv64 else +ifeq ($(findstring loongarch64,$(UNAME_M)),loongarch64) +DESTCPU ?= loong64 +else DESTCPU ?= x86 endif endif @@ -946,6 +950,7 @@ endif endif endif endif +endif ifeq ($(DESTCPU),x64) ARCH=x64 else @@ -970,6 +975,9 @@ else ifeq ($(DESTCPU),riscv64) ARCH=riscv64 else +ifeq ($(DESTCPU),loong64) +ARCH=loong64 +else ARCH=x86 endif endif @@ -979,6 +987,7 @@ endif endif endif endif +endif # node and v8 use different arch names (e.g. node 'x86' vs v8 'ia32'). # pass the proper v8 arch name to $V8_ARCH based on user-specified $DESTCPU. @@ -1433,6 +1442,7 @@ LINT_CPP_FILES = $(filter-out $(LINT_CPP_EXCLUDE), $(wildcard \ test/cctest/*.h \ test/embedding/*.cc \ test/embedding/*.h \ + test/sqlite/*.c \ test/fixtures/*.c \ test/js-native-api/*/*.cc \ test/node-api/*/*.cc \ diff --git a/README.md b/README.md index 69777ccb2dad40..35a30716eb668c 100644 --- a/README.md +++ b/README.md @@ -379,6 +379,8 @@ For information about the governance of the Node.js project, see **Nitzan Uziely** <> * [LiviaMedeiros](https://github.com/LiviaMedeiros) - **LiviaMedeiros** <> +* [ljharb](https://github.com/ljharb) - + **Jordan Harband** <> * [lpinca](https://github.com/lpinca) - **Luigi Pinca** <> (he/him) * [lukekarrys](https://github.com/lukekarrys) - @@ -757,8 +759,6 @@ maintaining the Node.js project. **Mert Can Altin** <> * [preveen-stack](https://github.com/preveen-stack) - **Preveen Padmanabhan** <> (he/him) -* [RedYetiDev](https://github.com/RedYetiDev) - - **Aviv Keller** <> (they/them) * [VoltrexKeyva](https://github.com/VoltrexKeyva) - **Mohammed Keyvanzadeh** <> (he/him) diff --git a/SECURITY.md b/SECURITY.md index fc95e1941698e6..19e876939f0f55 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -218,7 +218,7 @@ as any other stable feature. Security notifications will be distributed via the following methods. * -* +* ## Comments on this policy diff --git a/benchmark/fs/readfile-permission-enabled.js b/benchmark/fs/readfile-permission-enabled.js index 46f20be6a0b06e..c688e9eecb0e00 100644 --- a/benchmark/fs/readfile-permission-enabled.js +++ b/benchmark/fs/readfile-permission-enabled.js @@ -17,7 +17,7 @@ const bench = common.createBenchmark(main, { concurrent: [1, 10], }, { flags: [ - '--experimental-permission', + '--permission', '--allow-fs-read=*', '--allow-fs-write=*', '--allow-child-process', diff --git a/benchmark/permission/permission-processhas-fs-read.js b/benchmark/permission/permission-processhas-fs-read.js index c2c90636aa1f62..ea06aed4bc27a9 100644 --- a/benchmark/permission/permission-processhas-fs-read.js +++ b/benchmark/permission/permission-processhas-fs-read.js @@ -11,7 +11,7 @@ const rootPath = path.resolve(__dirname, '../../..'); const options = { flags: [ - '--experimental-permission', + '--permission', `--allow-fs-read=${rootPath}`, '--allow-child-process', '--no-warnings', diff --git a/benchmark/permission/permission-startup.js b/benchmark/permission/permission-startup.js index 08326909aa4e41..6a197cdff56111 100644 --- a/benchmark/permission/permission-startup.js +++ b/benchmark/permission/permission-startup.js @@ -48,7 +48,7 @@ function spawnProcess(script, bench, state) { function main({ count, script, nFiles, prefixPath }) { script = path.resolve(__dirname, '../../', `${script}.js`); const optionsWithScript = [ - '--experimental-permission', + '--permission', `--allow-fs-read=${script}`, ...mockFiles(nFiles, prefixPath).map((file) => '--allow-fs-read=' + file), script, diff --git a/benchmark/util/text-decoder.js b/benchmark/util/text-decoder.js index dd4f02016df077..1aa60f2dd0bcd6 100644 --- a/benchmark/util/text-decoder.js +++ b/benchmark/util/text-decoder.js @@ -3,7 +3,7 @@ const common = require('../common.js'); const bench = common.createBenchmark(main, { - encoding: ['utf-8', 'latin1', 'iso-8859-3'], + encoding: ['utf-8', 'windows-1252', 'iso-8859-3'], ignoreBOM: [0, 1], fatal: [0, 1], len: [256, 1024 * 16, 1024 * 128], diff --git a/common.gypi b/common.gypi index de83a566724a36..62f26bb07d27a0 100644 --- a/common.gypi +++ b/common.gypi @@ -36,7 +36,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.21', + 'v8_embedder_string': '-node.22', ##### V8 defaults for Node.js ##### diff --git a/configure.py b/configure.py index 4ab91cd34fd138..712ed40f77e54d 100755 --- a/configure.py +++ b/configure.py @@ -130,6 +130,12 @@ default=None, help='use the prefix to look for pre-installed headers') +parser.add_argument('--use_clang', + action='store_true', + dest='use_clang', + default=None, + help='use clang instead of gcc') + parser.add_argument('--dest-os', action='store', dest='dest_os', @@ -1405,6 +1411,10 @@ def configure_node(o): o['variables']['target_arch'] = target_arch o['variables']['node_byteorder'] = sys.byteorder + # Allow overriding the compiler - needed by embedders. + if options.use_clang: + o['variables']['clang'] = 1 + cross_compiling = (options.cross_compiling if options.cross_compiling is not None else target_arch != host_arch) @@ -1672,6 +1682,9 @@ def configure_v8(o, configs): raise Exception( 'Only one of the --v8-enable-object-print or --v8-disable-object-print options ' 'can be specified at a time.') + if sys.platform != 'darwin': + if o['variables']['v8_enable_webassembly'] and o['variables']['target_arch'] == 'x64': + o['variables']['v8_enable_wasm_simd256_revec'] = 1 def configure_openssl(o): variables = o['variables'] diff --git a/deps/cares/CMakeLists.txt b/deps/cares/CMakeLists.txt index f6560d56b08ddd..139defd8ffd159 100644 --- a/deps/cares/CMakeLists.txt +++ b/deps/cares/CMakeLists.txt @@ -1,6 +1,6 @@ # Copyright (C) The c-ares project and its contributors # SPDX-License-Identifier: MIT -CMAKE_MINIMUM_REQUIRED (VERSION 3.5.0) +CMAKE_MINIMUM_REQUIRED (VERSION 3.5.0...3.10.0) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") @@ -12,7 +12,7 @@ INCLUDE (CheckCSourceCompiles) INCLUDE (CheckStructHasMember) INCLUDE (CheckLibraryExists) -PROJECT (c-ares LANGUAGES C VERSION "1.34.3" ) +PROJECT (c-ares LANGUAGES C VERSION "1.34.4" ) # Set this version before release SET (CARES_VERSION "${PROJECT_VERSION}") @@ -30,7 +30,7 @@ INCLUDE (GNUInstallDirs) # include this *AFTER* PROJECT(), otherwise paths are w # For example, a version of 4:0:2 would generate output such as: # libname.so -> libname.so.2 # libname.so.2 -> libname.so.2.2.0 -SET (CARES_LIB_VERSIONINFO "21:2:19") +SET (CARES_LIB_VERSIONINFO "21:3:19") OPTION (CARES_STATIC "Build as a static library" OFF) @@ -271,6 +271,8 @@ ELSEIF (CMAKE_SYSTEM_NAME STREQUAL "AIX") LIST (APPEND SYSFLAGS -D_ALL_SOURCE -D_XOPEN_SOURCE=700 -D_USE_IRS) ELSEIF (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") # Don't define _XOPEN_SOURCE on FreeBSD, it actually reduces visibility instead of increasing it +ELSEIF (CMAKE_SYSTEM_NAME STREQUAL "QNX") + LIST (APPEND SYSFLAGS -D_QNX_SOURCE) ELSEIF (WIN32) LIST (APPEND SYSFLAGS -DWIN32_LEAN_AND_MEAN -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -D_WIN32_WINNT=0x0602) ENDIF () @@ -406,6 +408,7 @@ ENDIF () CHECK_STRUCT_HAS_MEMBER("struct sockaddr_in6" sin6_scope_id "${CMAKE_EXTRA_INCLUDE_FILES}" HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID LANGUAGE C) +CHECK_SYMBOL_EXISTS (strnlen "${CMAKE_EXTRA_INCLUDE_FILES}" HAVE_STRNLEN) CHECK_SYMBOL_EXISTS (memmem "${CMAKE_EXTRA_INCLUDE_FILES}" HAVE_MEMMEM) CHECK_SYMBOL_EXISTS (closesocket "${CMAKE_EXTRA_INCLUDE_FILES}" HAVE_CLOSESOCKET) CHECK_SYMBOL_EXISTS (CloseSocket "${CMAKE_EXTRA_INCLUDE_FILES}" HAVE_CLOSESOCKET_CAMEL) diff --git a/deps/cares/Makefile.am b/deps/cares/Makefile.am index e99161a45f7883..51b5f6be32be78 100644 --- a/deps/cares/Makefile.am +++ b/deps/cares/Makefile.am @@ -3,17 +3,24 @@ # Copyright (C) the Massachusetts Institute of Technology. # Copyright (C) Daniel Stenberg # -# Permission to use, copy, modify, and distribute this -# software and its documentation for any purpose and without -# fee is hereby granted, provided that the above copyright -# notice appear in all copies and that both that copyright -# notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in -# advertising or publicity pertaining to distribution of the -# software without specific, written prior permission. -# M.I.T. makes no representations about the suitability of -# this software for any purpose. It is provided "as is" -# without express or implied warranty. +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. # # SPDX-License-Identifier: MIT # diff --git a/deps/cares/Makefile.in b/deps/cares/Makefile.in index ba78cb77cbe335..2342125d136526 100644 --- a/deps/cares/Makefile.in +++ b/deps/cares/Makefile.in @@ -19,17 +19,24 @@ # Copyright (C) the Massachusetts Institute of Technology. # Copyright (C) Daniel Stenberg # -# Permission to use, copy, modify, and distribute this -# software and its documentation for any purpose and without -# fee is hereby granted, provided that the above copyright -# notice appear in all copies and that both that copyright -# notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in -# advertising or publicity pertaining to distribution of the -# software without specific, written prior permission. -# M.I.T. makes no representations about the suitability of -# this software for any purpose. It is provided "as is" -# without express or implied warranty. +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. # # SPDX-License-Identifier: MIT # @@ -111,7 +118,9 @@ build_triplet = @build@ host_triplet = @host@ subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ +am__aclocal_m4_deps = $(top_srcdir)/m4/ares_check_user_namespace.m4 \ + $(top_srcdir)/m4/ares_check_uts_namespace.m4 \ + $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_ac_print_to_file.m4 \ $(top_srcdir)/m4/ax_add_am_macro_static.m4 \ $(top_srcdir)/m4/ax_am_macros_static.m4 \ @@ -121,8 +130,6 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_check_compile_flag.m4 \ $(top_srcdir)/m4/ax_check_gnu_make.m4 \ $(top_srcdir)/m4/ax_check_link_flag.m4 \ - $(top_srcdir)/m4/ax_check_user_namespace.m4 \ - $(top_srcdir)/m4/ax_check_uts_namespace.m4 \ $(top_srcdir)/m4/ax_code_coverage.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ diff --git a/deps/cares/Makefile.msvc b/deps/cares/Makefile.msvc index 8395d1a7d67728..3266db415e09fe 100644 --- a/deps/cares/Makefile.msvc +++ b/deps/cares/Makefile.msvc @@ -1,17 +1,24 @@ # Copyright (C) 2009-2013 by Daniel Stenberg # -# Permission to use, copy, modify, and distribute this -# software and its documentation for any purpose and without -# fee is hereby granted, provided that the above copyright -# notice appear in all copies and that both that copyright -# notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in -# advertising or publicity pertaining to distribution of the -# software without specific, written prior permission. -# M.I.T. makes no representations about the suitability of -# this software for any purpose. It is provided "as is" -# without express or implied warranty. +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. # # SPDX-License-Identifier: MIT diff --git a/deps/cares/RELEASE-NOTES.md b/deps/cares/RELEASE-NOTES.md index f9d58d278432f1..19a204b3ea96bd 100644 --- a/deps/cares/RELEASE-NOTES.md +++ b/deps/cares/RELEASE-NOTES.md @@ -1,97 +1,25 @@ -## c-ares version 1.34.3 - November 9 2024 +## c-ares version 1.34.4 - December 14 2024 This is a bugfix release. Changes: -* Build the release package in an automated way so we can provide - provenance as per [SLSA3](https://slsa.dev/). - [PR #906](https://github.com/c-ares/c-ares/pull/906) +* QNX Port: Port to QNX 8, add primary config reading support, add CI build. [PR #934](https://github.com/c-ares/c-ares/pull/934), [PR #937](https://github.com/c-ares/c-ares/pull/937), [PR #938](https://github.com/c-ares/c-ares/pull/938) Bugfixes: -* Some upstream servers are non-compliant with EDNS options, resend queries - without EDNS. [Issue #911](https://github.com/c-ares/c-ares/issues/911) -* Android: <=7 needs sys/system_properties.h - [a70637c](https://github.com/c-ares/c-ares/commit/a70637c) -* Android: CMake needs `-D_GNU_SOURCE` and others. - [PR #915](https://github.com/c-ares/c-ares/pull/914) -* TSAN warns on missing lock, but lock isn't actually necessary. - [PR #915](https://github.com/c-ares/c-ares/pull/915) -* `ares_getaddrinfo()` for `AF_UNSPEC` should retry IPv4 if only IPv6 is - received. [765d558](https://github.com/c-ares/c-ares/commit/765d558) -* `ares_send()` shouldn't return `ARES_EBADRESP`, its `ARES_EBADQUERY`. - [91519e7](https://github.com/c-ares/c-ares/commit/91519e7) -* Fix typos in man pages. [PR #905](https://github.com/c-ares/c-ares/pull/905) +* Empty TXT records were not being preserved. [PR #922](https://github.com/c-ares/c-ares/pull/922) +* docs: update deprecation notices for `ares_create_query()` and `ares_mkquery()`. [PR #910](https://github.com/c-ares/c-ares/pull/910) +* license: some files weren't properly updated. [PR #920](https://github.com/c-ares/c-ares/pull/920) +* Fix bind local device regression from 1.34.0. [PR #929](https://github.com/c-ares/c-ares/pull/929), [PR #931](https://github.com/c-ares/c-ares/pull/931), [PR #935](https://github.com/c-ares/c-ares/pull/935) +* CMake: set policy version to prevent deprecation warnings. [PR #932](https://github.com/c-ares/c-ares/pull/932) +* CMake: shared and static library names should be the same on unix platforms like autotools uses. [PR #933](https://github.com/c-ares/c-ares/pull/933) +* Update to latest autoconf archive macros for enhanced system compatibility. [PR #936](https://github.com/c-ares/c-ares/pull/936) Thanks go to these friendly people for their efforts and contributions for this release: * Brad House (@bradh352) -* Jiwoo Park (@jimmy-park) - - -## c-ares version 1.34.2 - October 15 2024 - -This release contains a fix for downstream packages detecting the c-ares -version based on the contents of the header file rather than the -distributed pkgconf or cmake files. - -## c-ares version 1.34.1 - October 9 2024 - -This release fixes a packaging issue. - - -## c-ares version 1.34.0 - October 9 2024 - -This is a feature and bugfix release. - -Features: -* adig: read arguments from adigrc. - [PR #856](https://github.com/c-ares/c-ares/pull/856) -* Add new pending write callback optimization via `ares_set_pending_write_cb`. - [PR #857](https://github.com/c-ares/c-ares/pull/857) -* New function `ares_process_fds()`. - [PR #875](https://github.com/c-ares/c-ares/pull/875) -* Failed servers should be probed rather than redirecting queries which could - cause unexpected latency. - [PR #877](https://github.com/c-ares/c-ares/pull/877) -* adig: rework command line arguments to mimic dig from bind. - [PR #890](https://github.com/c-ares/c-ares/pull/890) -* Add new method for overriding network functions - `ares_set_socket_function_ex()` to properly support all new functionality. - [PR #894](https://github.com/c-ares/c-ares/pull/894) -* Fix regression with custom socket callbacks due to DNS cookie support. - [PR #895](https://github.com/c-ares/c-ares/pull/895) -* ares_socket: set IP_BIND_ADDRESS_NO_PORT on ares_set_local_ip* tcp sockets - [PR #887](https://github.com/c-ares/c-ares/pull/887) -* URI parser/writer for ares_set_servers_csv()/ares_get_servers_csv(). - [PR #882](https://github.com/c-ares/c-ares/pull/882) - -Changes: -* Connection handling modularization. - [PR #857](https://github.com/c-ares/c-ares/pull/857), - [PR #876](https://github.com/c-ares/c-ares/pull/876) -* Expose library/utility functions to tools. - [PR #860](https://github.com/c-ares/c-ares/pull/860) -* Remove `ares__` prefix, just use `ares_` for internal functions. - [PR #872](https://github.com/c-ares/c-ares/pull/872) - - -Bugfixes: -* fix: potential WIN32_LEAN_AND_MEAN redefinition. - [PR #869](https://github.com/c-ares/c-ares/pull/869) -* Fix googletest v1.15 compatibility. - [PR #874](https://github.com/c-ares/c-ares/pull/874) -* Fix pkgconfig thread dependencies. - [PR #884](https://github.com/c-ares/c-ares/pull/884) - - -Thanks go to these friendly people for their efforts and contributions for this -release: - -* Brad House (@bradh352) -* Cristian Rodríguez (@crrodriguez) -* Georg (@tacerus) -* @lifenjoiner -* Shelley Vohr (@codebytere) -* 前进,前进,进 (@leleliu008) - +* Daniel Stenberg (@bagder) +* Gregor Jasny (@gjasny) +* @marcovsz +* Nikolaos Chatzikonstantinou (@createyourpersonalaccount) +* @vlasovsoft1979 diff --git a/deps/cares/aclocal.m4 b/deps/cares/aclocal.m4 index ce7ad1c8a86a43..04f8786c9c0c89 100644 --- a/deps/cares/aclocal.m4 +++ b/deps/cares/aclocal.m4 @@ -1221,6 +1221,8 @@ AC_SUBST([am__tar]) AC_SUBST([am__untar]) ]) # _AM_PROG_TAR +m4_include([m4/ares_check_user_namespace.m4]) +m4_include([m4/ares_check_uts_namespace.m4]) m4_include([m4/ax_ac_append_to_file.m4]) m4_include([m4/ax_ac_print_to_file.m4]) m4_include([m4/ax_add_am_macro_static.m4]) @@ -1231,8 +1233,6 @@ m4_include([m4/ax_append_link_flags.m4]) m4_include([m4/ax_check_compile_flag.m4]) m4_include([m4/ax_check_gnu_make.m4]) m4_include([m4/ax_check_link_flag.m4]) -m4_include([m4/ax_check_user_namespace.m4]) -m4_include([m4/ax_check_uts_namespace.m4]) m4_include([m4/ax_code_coverage.m4]) m4_include([m4/ax_compiler_vendor.m4]) m4_include([m4/ax_cxx_compile_stdcxx.m4]) diff --git a/deps/cares/aminclude_static.am b/deps/cares/aminclude_static.am index b83549f81adde4..ec7a86a43e6829 100644 --- a/deps/cares/aminclude_static.am +++ b/deps/cares/aminclude_static.am @@ -1,6 +1,6 @@ # aminclude_static.am generated automatically by Autoconf -# from AX_AM_MACROS_STATIC on Sat Nov 9 17:40:37 UTC 2024 +# from AX_AM_MACROS_STATIC on Sat Dec 14 15:15:44 UTC 2024 # Code coverage @@ -66,7 +66,7 @@ code_coverage_v_lcov_cap_ = $(code_coverage_v_lcov_cap_$(AM_DEFAULT_VERBOSITY)) code_coverage_v_lcov_cap_0 = @echo " LCOV --capture" $(CODE_COVERAGE_OUTPUT_FILE); code_coverage_v_lcov_ign = $(code_coverage_v_lcov_ign_$(V)) code_coverage_v_lcov_ign_ = $(code_coverage_v_lcov_ign_$(AM_DEFAULT_VERBOSITY)) -code_coverage_v_lcov_ign_0 = @echo " LCOV --remove /tmp/*" $(CODE_COVERAGE_IGNORE_PATTERN); +code_coverage_v_lcov_ign_0 = @echo " LCOV --remove" "$(CODE_COVERAGE_OUTPUT_FILE).tmp" $(CODE_COVERAGE_IGNORE_PATTERN); code_coverage_v_genhtml = $(code_coverage_v_genhtml_$(V)) code_coverage_v_genhtml_ = $(code_coverage_v_genhtml_$(AM_DEFAULT_VERBOSITY)) code_coverage_v_genhtml_0 = @echo " GEN " "$(CODE_COVERAGE_OUTPUT_DIRECTORY)"; @@ -85,7 +85,7 @@ check-code-coverage: # Capture code coverage data code-coverage-capture: code-coverage-capture-hook $(code_coverage_v_lcov_cap)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --capture --output-file "$(CODE_COVERAGE_OUTPUT_FILE).tmp" --test-name "$(call code_coverage_sanitize,$(PACKAGE_NAME)-$(PACKAGE_VERSION))" --no-checksum --compat-libtool $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_OPTIONS) - $(code_coverage_v_lcov_ign)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --remove "$(CODE_COVERAGE_OUTPUT_FILE).tmp" "/tmp/*" $(CODE_COVERAGE_IGNORE_PATTERN) --output-file "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_RMOPTS) + $(code_coverage_v_lcov_ign)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --remove "$(CODE_COVERAGE_OUTPUT_FILE).tmp" $(CODE_COVERAGE_IGNORE_PATTERN) --output-file "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_RMOPTS) -@rm -f "$(CODE_COVERAGE_OUTPUT_FILE).tmp" $(code_coverage_v_genhtml)LANG=C $(GENHTML) $(code_coverage_quiet) $(addprefix --prefix ,$(CODE_COVERAGE_DIRECTORY)) --output-directory "$(CODE_COVERAGE_OUTPUT_DIRECTORY)" --title "$(PACKAGE_NAME)-$(PACKAGE_VERSION) Code Coverage" --legend --show-details "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_GENHTML_OPTIONS) @echo "file://$(abs_builddir)/$(CODE_COVERAGE_OUTPUT_DIRECTORY)/index.html" diff --git a/deps/cares/configure b/deps/cares/configure index 76b0ddf39c136a..d02f117d2f0b64 100755 --- a/deps/cares/configure +++ b/deps/cares/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for c-ares 1.34.3. +# Generated by GNU Autoconf 2.71 for c-ares 1.34.4. # # Report bugs to . # @@ -621,8 +621,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='c-ares' PACKAGE_TARNAME='c-ares' -PACKAGE_VERSION='1.34.3' -PACKAGE_STRING='c-ares 1.34.3' +PACKAGE_VERSION='1.34.4' +PACKAGE_STRING='c-ares 1.34.4' PACKAGE_BUGREPORT='c-ares mailing list: http://lists.haxx.se/listinfo/c-ares' PACKAGE_URL='' @@ -853,6 +853,7 @@ with_gcov enable_code_coverage enable_largefile enable_libgcc +enable_tests_crossbuild ' ac_precious_vars='build_alias host_alias @@ -1423,7 +1424,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures c-ares 1.34.3 to adapt to many kinds of systems. +\`configure' configures c-ares 1.34.4 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1494,7 +1495,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of c-ares 1.34.3:";; + short | recursive ) echo "Configuration of c-ares 1.34.4:";; esac cat <<\_ACEOF @@ -1525,6 +1526,8 @@ Optional Features: --enable-code-coverage Whether to enable code coverage support --disable-largefile omit support for large files --enable-libgcc use libgcc when linking + --enable-tests-crossbuild + Enable test building even when cross building Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1634,7 +1637,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -c-ares configure 1.34.3 +c-ares configure 1.34.4 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -2258,7 +2261,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by c-ares $as_me 1.34.3, which was +It was created by c-ares $as_me 1.34.4, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3232,7 +3235,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu -CARES_VERSION_INFO="21:2:19" +CARES_VERSION_INFO="21:3:19" @@ -4891,7 +4894,17 @@ else $as_nop // MSVC always sets __cplusplus to 199711L in older versions; newer versions // only set it correctly if /Zc:__cplusplus is specified as well as a // /std:c++NN switch: +// // https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ +// +// The value __cplusplus ought to have is available in _MSVC_LANG since +// Visual Studio 2015 Update 3: +// +// https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros +// +// This was also the first MSVC version to support C++14 so we can't use the +// value of either __cplusplus or _MSVC_LANG to quickly rule out MSVC having +// C++11 or C++14 support, but we can check _MSVC_LANG for C++17 and later. #elif __cplusplus < 201103L && !defined _MSC_VER #error "This is not a C++11 compiler" @@ -5914,7 +5927,7 @@ fi # Define the identity of the package. PACKAGE='c-ares' - VERSION='1.34.3' + VERSION='1.34.4' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -19525,10 +19538,52 @@ then : fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for _gcov_init in -lgcov" >&5 +printf %s "checking for _gcov_init in -lgcov... " >&6; } +if test ${ac_cv_lib_gcov__gcov_init+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgcov $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char _gcov_init (); +int +main (void) +{ +return _gcov_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_gcov__gcov_init=yes +else $as_nop + ac_cv_lib_gcov__gcov_init=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gcov__gcov_init" >&5 +printf "%s\n" "$ac_cv_lib_gcov__gcov_init" >&6; } +if test "x$ac_cv_lib_gcov__gcov_init" = xyes +then : + CODE_COVERAGE_LIBS="-lgcov" +else $as_nop + CODE_COVERAGE_LIBS="" +fi + + CODE_COVERAGE_CPPFLAGS="-DNDEBUG" CODE_COVERAGE_CFLAGS="-O0 -g -fprofile-arcs -ftest-coverage" CODE_COVERAGE_CXXFLAGS="-O0 -g -fprofile-arcs -ftest-coverage" - CODE_COVERAGE_LIBS="-lgcov" @@ -19805,27 +19860,37 @@ eval ac_res=\$$as_CACHEVAR printf "%s\n" "$ac_res" >&6; } if eval test \"x\$"$as_CACHEVAR"\" = x"yes" then : - if test ${LDFLAGS+y} + +if test ${LDFLAGS+y} then : - case " $LDFLAGS " in - *" $flag "*) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : LDFLAGS already contains \$flag"; } >&5 + + case " $LDFLAGS " in #( + *" $flag "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : LDFLAGS already contains \$flag"; } >&5 (: LDFLAGS already contains $flag) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - ;; - *) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : LDFLAGS=\"\$LDFLAGS \$flag\""; } >&5 - (: LDFLAGS="$LDFLAGS $flag") 2>&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append LDFLAGS " $flag" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : LDFLAGS=\"\$LDFLAGS\""; } >&5 + (: LDFLAGS="$LDFLAGS") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } - LDFLAGS="$LDFLAGS $flag" - ;; - esac + ;; +esac + else $as_nop - LDFLAGS="$flag" + + LDFLAGS=$flag + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : LDFLAGS=\"\$LDFLAGS\""; } >&5 + (: LDFLAGS="$LDFLAGS") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + fi else $as_nop @@ -19870,27 +19935,37 @@ if test "x$enable_shared" = "xno" -a "x$enable_static" = "xyes" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we need CARES_STATICLIB definition" >&5 printf %s "checking whether we need CARES_STATICLIB definition... " >&6; } if test "$ac_cv_native_windows" = "yes" ; then - if test ${AM_CPPFLAGS+y} + +if test ${AM_CPPFLAGS+y} then : - case " $AM_CPPFLAGS " in - *" -DCARES_STATICLIB "*) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CPPFLAGS already contains -DCARES_STATICLIB"; } >&5 + + case " $AM_CPPFLAGS " in #( + *" -DCARES_STATICLIB "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CPPFLAGS already contains -DCARES_STATICLIB"; } >&5 (: AM_CPPFLAGS already contains -DCARES_STATICLIB) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - ;; - *) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CPPFLAGS=\"\$AM_CPPFLAGS -DCARES_STATICLIB\""; } >&5 - (: AM_CPPFLAGS="$AM_CPPFLAGS -DCARES_STATICLIB") 2>&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append AM_CPPFLAGS " -DCARES_STATICLIB" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CPPFLAGS=\"\$AM_CPPFLAGS\""; } >&5 + (: AM_CPPFLAGS="$AM_CPPFLAGS") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } - AM_CPPFLAGS="$AM_CPPFLAGS -DCARES_STATICLIB" - ;; - esac + ;; +esac + else $as_nop - AM_CPPFLAGS="-DCARES_STATICLIB" + + AM_CPPFLAGS=-DCARES_STATICLIB + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CPPFLAGS=\"\$AM_CPPFLAGS\""; } >&5 + (: AM_CPPFLAGS="$AM_CPPFLAGS") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + fi PKGCONFIG_CFLAGS="-DCARES_STATICLIB" @@ -19910,57 +19985,24 @@ if test "$symbol_hiding" != "no" ; then else case "$ax_cv_c_compiler_vendor" in clang|gnu|intel) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts " >&5 -printf %s "checking whether C compiler accepts ... " >&6; } -if test ${ax_cv_check_cflags__+y} -then : - printf %s "(cached) " >&6 -else $as_nop - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS " - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__=yes -else $as_nop - ax_cv_check_cflags__=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__" >&5 -printf "%s\n" "$ax_cv_check_cflags__" >&6; } -if test x"$ax_cv_check_cflags__" = xyes -then : - : -else $as_nop - : -fi for flag in -fvisibility=hidden; do as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags__$flag" | $as_tr_sh` -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $flag" >&5 -printf %s "checking whether C compiler accepts $flag... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler accepts $flag" >&5 +printf %s "checking whether the C compiler accepts $flag... " >&6; } if eval test \${$as_CACHEVAR+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS $flag" + if test x"$GCC" = xyes ; then + add_gnu_werror="-Werror" + fi + CFLAGS="$CFLAGS $flag $add_gnu_werror" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -19984,29 +20026,39 @@ fi eval ac_res=\$$as_CACHEVAR { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } -if test x"`eval 'as_val=${'$as_CACHEVAR'};printf "%s\n" "$as_val"'`" = xyes +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" then : - if test ${CARES_SYMBOL_HIDING_CFLAG+y} + +if test ${CARES_SYMBOL_HIDING_CFLAG+y} then : - case " $CARES_SYMBOL_HIDING_CFLAG " in - *" $flag "*) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG already contains \$flag"; } >&5 + + case " $CARES_SYMBOL_HIDING_CFLAG " in #( + *" $flag "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG already contains \$flag"; } >&5 (: CARES_SYMBOL_HIDING_CFLAG already contains $flag) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - ;; - *) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG=\"\$CARES_SYMBOL_HIDING_CFLAG \$flag\""; } >&5 - (: CARES_SYMBOL_HIDING_CFLAG="$CARES_SYMBOL_HIDING_CFLAG $flag") 2>&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append CARES_SYMBOL_HIDING_CFLAG " $flag" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG=\"\$CARES_SYMBOL_HIDING_CFLAG\""; } >&5 + (: CARES_SYMBOL_HIDING_CFLAG="$CARES_SYMBOL_HIDING_CFLAG") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } - CARES_SYMBOL_HIDING_CFLAG="$CARES_SYMBOL_HIDING_CFLAG $flag" - ;; - esac + ;; +esac + else $as_nop - CARES_SYMBOL_HIDING_CFLAG="$flag" + + CARES_SYMBOL_HIDING_CFLAG=$flag + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG=\"\$CARES_SYMBOL_HIDING_CFLAG\""; } >&5 + (: CARES_SYMBOL_HIDING_CFLAG="$CARES_SYMBOL_HIDING_CFLAG") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + fi else $as_nop @@ -20022,17 +20074,22 @@ done sun) + + for flag in -xldscope=hidden; do as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags__$flag" | $as_tr_sh` -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $flag" >&5 -printf %s "checking whether C compiler accepts $flag... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler accepts $flag" >&5 +printf %s "checking whether the C compiler accepts $flag... " >&6; } if eval test \${$as_CACHEVAR+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS $flag" + if test x"$GCC" = xyes ; then + add_gnu_werror="-Werror" + fi + CFLAGS="$CFLAGS $flag $add_gnu_werror" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -20056,29 +20113,39 @@ fi eval ac_res=\$$as_CACHEVAR { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } -if test x"`eval 'as_val=${'$as_CACHEVAR'};printf "%s\n" "$as_val"'`" = xyes +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" then : - if test ${CARES_SYMBOL_HIDING_CFLAG+y} + +if test ${CARES_SYMBOL_HIDING_CFLAG+y} then : - case " $CARES_SYMBOL_HIDING_CFLAG " in - *" $flag "*) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG already contains \$flag"; } >&5 + + case " $CARES_SYMBOL_HIDING_CFLAG " in #( + *" $flag "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG already contains \$flag"; } >&5 (: CARES_SYMBOL_HIDING_CFLAG already contains $flag) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - ;; - *) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG=\"\$CARES_SYMBOL_HIDING_CFLAG \$flag\""; } >&5 - (: CARES_SYMBOL_HIDING_CFLAG="$CARES_SYMBOL_HIDING_CFLAG $flag") 2>&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append CARES_SYMBOL_HIDING_CFLAG " $flag" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG=\"\$CARES_SYMBOL_HIDING_CFLAG\""; } >&5 + (: CARES_SYMBOL_HIDING_CFLAG="$CARES_SYMBOL_HIDING_CFLAG") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } - CARES_SYMBOL_HIDING_CFLAG="$CARES_SYMBOL_HIDING_CFLAG $flag" - ;; - esac + ;; +esac + else $as_nop - CARES_SYMBOL_HIDING_CFLAG="$flag" + + CARES_SYMBOL_HIDING_CFLAG=$flag + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : CARES_SYMBOL_HIDING_CFLAG=\"\$CARES_SYMBOL_HIDING_CFLAG\""; } >&5 + (: CARES_SYMBOL_HIDING_CFLAG="$CARES_SYMBOL_HIDING_CFLAG") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + fi else $as_nop @@ -20120,17 +20187,22 @@ fi if test "$enable_warnings" = "yes"; then + + for flag in -Wall -Wextra -Waggregate-return -Wcast-align -Wcast-qual -Wconversion -Wdeclaration-after-statement -Wdouble-promotion -Wfloat-equal -Wformat-security -Winit-self -Wjump-misses-init -Wlogical-op -Wmissing-braces -Wmissing-declarations -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-prototypes -Wnested-externs -Wno-coverage-mismatch -Wold-style-definition -Wpacked -Wpedantic -Wpointer-arith -Wredundant-decls -Wshadow -Wsign-conversion -Wstrict-overflow -Wstrict-prototypes -Wtrampolines -Wundef -Wunreachable-code -Wunused -Wvariadic-macros -Wvla -Wwrite-strings -Werror=implicit-int -Werror=implicit-function-declaration -Werror=partial-availability -Wno-long-long ; do as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags_-Werror_$flag" | $as_tr_sh` -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $flag" >&5 -printf %s "checking whether C compiler accepts $flag... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler accepts $flag" >&5 +printf %s "checking whether the C compiler accepts $flag... " >&6; } if eval test \${$as_CACHEVAR+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror $flag" + if test x"$GCC" = xyes ; then + add_gnu_werror="-Werror" + fi + CFLAGS="$CFLAGS -Werror $flag $add_gnu_werror" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -20154,29 +20226,39 @@ fi eval ac_res=\$$as_CACHEVAR { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } -if test x"`eval 'as_val=${'$as_CACHEVAR'};printf "%s\n" "$as_val"'`" = xyes +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" then : - if test ${AM_CFLAGS+y} + +if test ${AM_CFLAGS+y} then : - case " $AM_CFLAGS " in - *" $flag "*) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS already contains \$flag"; } >&5 + + case " $AM_CFLAGS " in #( + *" $flag "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS already contains \$flag"; } >&5 (: AM_CFLAGS already contains $flag) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - ;; - *) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS \$flag\""; } >&5 - (: AM_CFLAGS="$AM_CFLAGS $flag") 2>&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append AM_CFLAGS " $flag" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS\""; } >&5 + (: AM_CFLAGS="$AM_CFLAGS") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } - AM_CFLAGS="$AM_CFLAGS $flag" - ;; - esac + ;; +esac + else $as_nop - AM_CFLAGS="$flag" + + AM_CFLAGS=$flag + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS\""; } >&5 + (: AM_CFLAGS="$AM_CFLAGS") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + fi else $as_nop @@ -20185,22 +20267,28 @@ fi done +fi + +case $host_os in + *qnx*|*android*) + - case $host_os in - *android*) for flag in -std=c99; do as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags_-Werror_$flag" | $as_tr_sh` -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $flag" >&5 -printf %s "checking whether C compiler accepts $flag... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler accepts $flag" >&5 +printf %s "checking whether the C compiler accepts $flag... " >&6; } if eval test \${$as_CACHEVAR+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror $flag" + if test x"$GCC" = xyes ; then + add_gnu_werror="-Werror" + fi + CFLAGS="$CFLAGS -Werror $flag $add_gnu_werror" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -20224,29 +20312,39 @@ fi eval ac_res=\$$as_CACHEVAR { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } -if test x"`eval 'as_val=${'$as_CACHEVAR'};printf "%s\n" "$as_val"'`" = xyes +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" then : - if test ${AM_CFLAGS+y} + +if test ${AM_CFLAGS+y} then : - case " $AM_CFLAGS " in - *" $flag "*) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS already contains \$flag"; } >&5 + + case " $AM_CFLAGS " in #( + *" $flag "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS already contains \$flag"; } >&5 (: AM_CFLAGS already contains $flag) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - ;; - *) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS \$flag\""; } >&5 - (: AM_CFLAGS="$AM_CFLAGS $flag") 2>&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append AM_CFLAGS " $flag" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS\""; } >&5 + (: AM_CFLAGS="$AM_CFLAGS") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } - AM_CFLAGS="$AM_CFLAGS $flag" - ;; - esac + ;; +esac + else $as_nop - AM_CFLAGS="$flag" + + AM_CFLAGS=$flag + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS\""; } >&5 + (: AM_CFLAGS="$AM_CFLAGS") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + fi else $as_nop @@ -20255,21 +20353,26 @@ fi done - ;; - *) + ;; + *) + + for flag in -std=c90; do as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags_-Werror_$flag" | $as_tr_sh` -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $flag" >&5 -printf %s "checking whether C compiler accepts $flag... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler accepts $flag" >&5 +printf %s "checking whether the C compiler accepts $flag... " >&6; } if eval test \${$as_CACHEVAR+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror $flag" + if test x"$GCC" = xyes ; then + add_gnu_werror="-Werror" + fi + CFLAGS="$CFLAGS -Werror $flag $add_gnu_werror" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -20293,29 +20396,39 @@ fi eval ac_res=\$$as_CACHEVAR { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } -if test x"`eval 'as_val=${'$as_CACHEVAR'};printf "%s\n" "$as_val"'`" = xyes +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" then : - if test ${AM_CFLAGS+y} + +if test ${AM_CFLAGS+y} then : - case " $AM_CFLAGS " in - *" $flag "*) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS already contains \$flag"; } >&5 + + case " $AM_CFLAGS " in #( + *" $flag "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS already contains \$flag"; } >&5 (: AM_CFLAGS already contains $flag) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - ;; - *) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS \$flag\""; } >&5 - (: AM_CFLAGS="$AM_CFLAGS $flag") 2>&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append AM_CFLAGS " $flag" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS\""; } >&5 + (: AM_CFLAGS="$AM_CFLAGS") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } - AM_CFLAGS="$AM_CFLAGS $flag" - ;; - esac + ;; +esac + else $as_nop - AM_CFLAGS="$flag" + + AM_CFLAGS=$flag + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS\""; } >&5 + (: AM_CFLAGS="$AM_CFLAGS") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + fi else $as_nop @@ -20324,24 +20437,115 @@ fi done - ;; - esac + ;; +esac + +case $host_os in + *qnx*) + + + + +for flag in -D_QNX_SOURCE; do + as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags_-Werror_$flag" | $as_tr_sh` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler accepts $flag" >&5 +printf %s "checking whether the C compiler accepts $flag... " >&6; } +if eval test \${$as_CACHEVAR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + if test x"$GCC" = xyes ; then + add_gnu_werror="-Werror" + fi + CFLAGS="$CFLAGS -Werror $flag $add_gnu_werror" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$as_CACHEVAR=yes" +else $as_nop + eval "$as_CACHEVAR=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +eval ac_res=\$$as_CACHEVAR + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" +then : + +if test ${AM_CPPFLAGS+y} +then : + + case " $AM_CPPFLAGS " in #( + *" $flag "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CPPFLAGS already contains \$flag"; } >&5 + (: AM_CPPFLAGS already contains $flag) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append AM_CPPFLAGS " $flag" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CPPFLAGS=\"\$AM_CPPFLAGS\""; } >&5 + (: AM_CPPFLAGS="$AM_CPPFLAGS") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + ;; +esac + +else $as_nop + + AM_CPPFLAGS=$flag + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CPPFLAGS=\"\$AM_CPPFLAGS\""; } >&5 + (: AM_CPPFLAGS="$AM_CPPFLAGS") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + +fi + +else $as_nop + : fi +done + + ;; +esac + if test "$ax_cv_c_compiler_vendor" = "intel"; then + + for flag in -shared-intel; do as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags__$flag" | $as_tr_sh` -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $flag" >&5 -printf %s "checking whether C compiler accepts $flag... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler accepts $flag" >&5 +printf %s "checking whether the C compiler accepts $flag... " >&6; } if eval test \${$as_CACHEVAR+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS $flag" + if test x"$GCC" = xyes ; then + add_gnu_werror="-Werror" + fi + CFLAGS="$CFLAGS $flag $add_gnu_werror" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -20365,29 +20569,39 @@ fi eval ac_res=\$$as_CACHEVAR { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } -if test x"`eval 'as_val=${'$as_CACHEVAR'};printf "%s\n" "$as_val"'`" = xyes +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" then : - if test ${AM_CFLAGS+y} + +if test ${AM_CFLAGS+y} then : - case " $AM_CFLAGS " in - *" $flag "*) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS already contains \$flag"; } >&5 + + case " $AM_CFLAGS " in #( + *" $flag "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS already contains \$flag"; } >&5 (: AM_CFLAGS already contains $flag) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - ;; - *) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS \$flag\""; } >&5 - (: AM_CFLAGS="$AM_CFLAGS $flag") 2>&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append AM_CFLAGS " $flag" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS\""; } >&5 + (: AM_CFLAGS="$AM_CFLAGS") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } - AM_CFLAGS="$AM_CFLAGS $flag" - ;; - esac + ;; +esac + else $as_nop - AM_CFLAGS="$flag" + + AM_CFLAGS=$flag + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : AM_CFLAGS=\"\$AM_CFLAGS\""; } >&5 + (: AM_CFLAGS="$AM_CFLAGS") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + fi else $as_nop @@ -20708,27 +20922,37 @@ eval ac_res=\$$as_CACHEVAR printf "%s\n" "$ac_res" >&6; } if eval test \"x\$"$as_CACHEVAR"\" = x"yes" then : - if test ${XNET_LIBS+y} + +if test ${XNET_LIBS+y} then : - case " $XNET_LIBS " in - *" $flag "*) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : XNET_LIBS already contains \$flag"; } >&5 + + case " $XNET_LIBS " in #( + *" $flag "*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : XNET_LIBS already contains \$flag"; } >&5 (: XNET_LIBS already contains $flag) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - ;; - *) - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : XNET_LIBS=\"\$XNET_LIBS \$flag\""; } >&5 - (: XNET_LIBS="$XNET_LIBS $flag") 2>&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append XNET_LIBS " $flag" + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : XNET_LIBS=\"\$XNET_LIBS\""; } >&5 + (: XNET_LIBS="$XNET_LIBS") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } - XNET_LIBS="$XNET_LIBS $flag" - ;; - esac + ;; +esac + else $as_nop - XNET_LIBS="$flag" + + XNET_LIBS=$flag + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: : XNET_LIBS=\"\$XNET_LIBS\""; } >&5 + (: XNET_LIBS="$XNET_LIBS") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + fi else $as_nop @@ -22131,6 +22355,14 @@ fi +ac_fn_check_decl "$LINENO" "strnlen" "ac_cv_have_decl_strnlen" "$cares_all_includes +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_strnlen" = xyes +then : + +printf "%s\n" "#define HAVE_STRNLEN 1" >>confdefs.h + +fi ac_fn_check_decl "$LINENO" "memmem" "ac_cv_have_decl_memmem" "$cares_all_includes " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_memmem" = xyes @@ -23708,6 +23940,15 @@ printf "%s\n" "$as_me: WARNING: cannot build tests when cross compiling" >&2;} as_fn_error $? "*** Tests not supported when cross compiling" "$LINENO" 5 fi fi + +# Check whether --enable-tests-crossbuild was given. +if test ${enable_tests_crossbuild+y} +then : + enableval=$enable_tests_crossbuild; build_tests="$enableval" + +fi + + if test "x$build_tests" != "xno" ; then @@ -23993,7 +24234,7 @@ fi if test "x$have_gmock_v112" = "xyes" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether user namespaces are supported" >&5 printf %s "checking whether user namespaces are supported... " >&6; } -if test ${ax_cv_user_namespace+y} +if test ${ares_cv_user_namespace+y} then : printf %s "(cached) " >&6 else $as_nop @@ -24006,7 +24247,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu if test "$cross_compiling" = yes then : - ax_cv_user_namespace=no + ares_cv_user_namespace=no else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -24046,9 +24287,9 @@ int main() { _ACEOF if ac_fn_c_try_run "$LINENO" then : - ax_cv_user_namespace=yes + ares_cv_user_namespace=yes else $as_nop - ax_cv_user_namespace=no + ares_cv_user_namespace=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext @@ -24062,9 +24303,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_user_namespace" >&5 -printf "%s\n" "$ax_cv_user_namespace" >&6; } - if test "$ax_cv_user_namespace" = yes; then +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ares_cv_user_namespace" >&5 +printf "%s\n" "$ares_cv_user_namespace" >&6; } + if test "$ares_cv_user_namespace" = yes; then printf "%s\n" "#define HAVE_USER_NAMESPACE 1" >>confdefs.h @@ -24072,7 +24313,7 @@ printf "%s\n" "#define HAVE_USER_NAMESPACE 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether UTS namespaces are supported" >&5 printf %s "checking whether UTS namespaces are supported... " >&6; } -if test ${ax_cv_uts_namespace+y} +if test ${ares_cv_uts_namespace+y} then : printf %s "(cached) " >&6 else $as_nop @@ -24085,7 +24326,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu if test "$cross_compiling" = yes then : - ax_cv_uts_namespace=no + ares_cv_uts_namespace=no else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -24145,9 +24386,9 @@ int main() { _ACEOF if ac_fn_c_try_run "$LINENO" then : - ax_cv_uts_namespace=yes + ares_cv_uts_namespace=yes else $as_nop - ax_cv_uts_namespace=no + ares_cv_uts_namespace=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext @@ -24161,9 +24402,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_uts_namespace" >&5 -printf "%s\n" "$ax_cv_uts_namespace" >&6; } - if test "$ax_cv_uts_namespace" = yes; then +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ares_cv_uts_namespace" >&5 +printf "%s\n" "$ares_cv_uts_namespace" >&6; } + if test "$ares_cv_uts_namespace" = yes; then printf "%s\n" "#define HAVE_UTS_NAMESPACE 1" >>confdefs.h @@ -24218,7 +24459,17 @@ else $as_nop // MSVC always sets __cplusplus to 199711L in older versions; newer versions // only set it correctly if /Zc:__cplusplus is specified as well as a // /std:c++NN switch: +// // https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ +// +// The value __cplusplus ought to have is available in _MSVC_LANG since +// Visual Studio 2015 Update 3: +// +// https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros +// +// This was also the first MSVC version to support C++14 so we can't use the +// value of either __cplusplus or _MSVC_LANG to quickly rule out MSVC having +// C++11 or C++14 support, but we can check _MSVC_LANG for C++17 and later. #elif __cplusplus < 201103L && !defined _MSC_VER #error "This is not a C++11 compiler" @@ -26007,7 +26258,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by c-ares $as_me 1.34.3, which was +This file was extended by c-ares $as_me 1.34.4, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -26075,7 +26326,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -c-ares config.status 1.34.3 +c-ares config.status 1.34.4 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/deps/cares/configure.ac b/deps/cares/configure.ac index 5f848c28598a95..9dacf1fb2e4a40 100644 --- a/deps/cares/configure.ac +++ b/deps/cares/configure.ac @@ -2,10 +2,10 @@ dnl Copyright (C) The c-ares project and its contributors dnl SPDX-License-Identifier: MIT AC_PREREQ([2.69]) -AC_INIT([c-ares], [1.34.3], +AC_INIT([c-ares], [1.34.4], [c-ares mailing list: http://lists.haxx.se/listinfo/c-ares]) -CARES_VERSION_INFO="21:2:19" +CARES_VERSION_INFO="21:3:19" dnl This flag accepts an argument of the form current[:revision[:age]]. So, dnl passing -version-info 3:12:1 sets current to 3, revision to 12, and age to dnl 1. @@ -245,18 +245,25 @@ AC_SUBST(CARES_SYMBOL_HIDING_CFLAG) if test "$enable_warnings" = "yes"; then AX_APPEND_COMPILE_FLAGS([-Wall -Wextra -Waggregate-return -Wcast-align -Wcast-qual -Wconversion -Wdeclaration-after-statement -Wdouble-promotion -Wfloat-equal -Wformat-security -Winit-self -Wjump-misses-init -Wlogical-op -Wmissing-braces -Wmissing-declarations -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-prototypes -Wnested-externs -Wno-coverage-mismatch -Wold-style-definition -Wpacked -Wpedantic -Wpointer-arith -Wredundant-decls -Wshadow -Wsign-conversion -Wstrict-overflow -Wstrict-prototypes -Wtrampolines -Wundef -Wunreachable-code -Wunused -Wvariadic-macros -Wvla -Wwrite-strings -Werror=implicit-int -Werror=implicit-function-declaration -Werror=partial-availability -Wno-long-long ], [AM_CFLAGS], [-Werror]) - - dnl Android requires c99, all others should use c90 - case $host_os in - *android*) - AX_APPEND_COMPILE_FLAGS([-std=c99], [AM_CFLAGS], [-Werror]) - ;; - *) - AX_APPEND_COMPILE_FLAGS([-std=c90], [AM_CFLAGS], [-Werror]) - ;; - esac fi +dnl Android and QNX require c99, all others should use c90 +case $host_os in + *qnx*|*android*) + AX_APPEND_COMPILE_FLAGS([-std=c99], [AM_CFLAGS], [-Werror]) + ;; + *) + AX_APPEND_COMPILE_FLAGS([-std=c90], [AM_CFLAGS], [-Werror]) + ;; +esac + +dnl QNX needs -D_QNX_SOURCE +case $host_os in + *qnx*) + AX_APPEND_COMPILE_FLAGS([-D_QNX_SOURCE], [AM_CPPFLAGS], [-Werror]) + ;; +esac + if test "$ax_cv_c_compiler_vendor" = "intel"; then AX_APPEND_COMPILE_FLAGS([-shared-intel], [AM_CFLAGS]) fi @@ -543,6 +550,7 @@ dnl https://mailman.videolan.org/pipermail/vlc-devel/2015-March/101802.html dnl which would require we check each individually and provide function arguments dnl for the test. +AC_CHECK_DECL(strnlen, [AC_DEFINE([HAVE_STRNLEN], 1, [Define to 1 if you have `strnlen`] )], [], $cares_all_includes) AC_CHECK_DECL(memmem, [AC_DEFINE([HAVE_MEMMEM], 1, [Define to 1 if you have `memmem`] )], [], $cares_all_includes) AC_CHECK_DECL(recv, [AC_DEFINE([HAVE_RECV], 1, [Define to 1 if you have `recv`] )], [], $cares_all_includes) AC_CHECK_DECL(recvfrom, [AC_DEFINE([HAVE_RECVFROM], 1, [Define to 1 if you have `recvfrom`] )], [], $cares_all_includes) @@ -813,6 +821,13 @@ if test "x$build_tests" != "xno" -a "x$cross_compiling" = "xyes" ; then AC_MSG_ERROR([*** Tests not supported when cross compiling]) fi fi + +dnl Forces compiling of tests even when cross-compiling. +AC_ARG_ENABLE(tests-crossbuild, + AS_HELP_STRING([--enable-tests-crossbuild], [Enable test building even when cross building]), + [build_tests="$enableval"] +) + if test "x$build_tests" != "xno" ; then PKG_CHECK_MODULES([GMOCK], [gmock], [ have_gmock=yes ], [ have_gmock=no ]) if test "x$have_gmock" = "xno" ; then @@ -825,8 +840,8 @@ if test "x$build_tests" != "xno" ; then else PKG_CHECK_MODULES([GMOCK112], [gmock >= 1.12.0], [ have_gmock_v112=yes ], [ have_gmock_v112=no ]) if test "x$have_gmock_v112" = "xyes" ; then - AX_CHECK_USER_NAMESPACE - AX_CHECK_UTS_NAMESPACE + ARES_CHECK_USER_NAMESPACE + ARES_CHECK_UTS_NAMESPACE fi fi fi diff --git a/deps/cares/docs/Makefile.in b/deps/cares/docs/Makefile.in index 6b7bb8e30d1a20..0d1873c9662c92 100644 --- a/deps/cares/docs/Makefile.in +++ b/deps/cares/docs/Makefile.in @@ -92,7 +92,9 @@ build_triplet = @build@ host_triplet = @host@ subdir = docs ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ +am__aclocal_m4_deps = $(top_srcdir)/m4/ares_check_user_namespace.m4 \ + $(top_srcdir)/m4/ares_check_uts_namespace.m4 \ + $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_ac_print_to_file.m4 \ $(top_srcdir)/m4/ax_add_am_macro_static.m4 \ $(top_srcdir)/m4/ax_am_macros_static.m4 \ @@ -102,8 +104,6 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_check_compile_flag.m4 \ $(top_srcdir)/m4/ax_check_gnu_make.m4 \ $(top_srcdir)/m4/ax_check_link_flag.m4 \ - $(top_srcdir)/m4/ax_check_user_namespace.m4 \ - $(top_srcdir)/m4/ax_check_uts_namespace.m4 \ $(top_srcdir)/m4/ax_code_coverage.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ diff --git a/deps/cares/docs/ares_create_query.3 b/deps/cares/docs/ares_create_query.3 index a54eec3e2a6bd1..3af6ba4cc3dc5b 100644 --- a/deps/cares/docs/ares_create_query.3 +++ b/deps/cares/docs/ares_create_query.3 @@ -19,6 +19,9 @@ int ares_create_query(const char *\fIname\fP, int \fImax_udp_size\fP) .fi .SH DESCRIPTION +This function is deprecated as of c-ares 1.22, please use +\fIares_dns_record_create(3)\fP instead. + The \fIares_create_query(3)\fP function composes a DNS query with a single question. The parameter \fIname\fP gives the query name as a NUL-terminated C string of period-separated labels optionally ending with a period; periods and diff --git a/deps/cares/docs/ares_mkquery.3 b/deps/cares/docs/ares_mkquery.3 index 0e7b5edbb89353..2f42d169210fef 100644 --- a/deps/cares/docs/ares_mkquery.3 +++ b/deps/cares/docs/ares_mkquery.3 @@ -14,7 +14,8 @@ int ares_mkquery(const char *\fIname\fP, int \fIdnsclass\fP, int \fItype\fP, int *\fIbuflen\fP) .fi .SH DESCRIPTION -Deprecated function. See \fIares_create_query(3)\fP instead! +This function is deprecated as of c-ares 1.10, please use +\fIares_dns_record_create(3)\fP instead. The .B ares_mkquery diff --git a/deps/cares/docs/ares_send.3 b/deps/cares/docs/ares_send.3 index f6ea9140e2510c..df3e3bbe4136b0 100644 --- a/deps/cares/docs/ares_send.3 +++ b/deps/cares/docs/ares_send.3 @@ -113,6 +113,9 @@ is being destroyed; the query will not be completed. .B ARES_ENOSERVER The query will not be completed because no DNS servers were configured on the channel. +.TP 19 +.B ARES_EBADQUERY +Misformatted DNS query. .PP The callback argument diff --git a/deps/cares/include/Makefile.in b/deps/cares/include/Makefile.in index 0beee44a22bb22..7dc40eb08fab9c 100644 --- a/deps/cares/include/Makefile.in +++ b/deps/cares/include/Makefile.in @@ -90,7 +90,9 @@ build_triplet = @build@ host_triplet = @host@ subdir = include ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ +am__aclocal_m4_deps = $(top_srcdir)/m4/ares_check_user_namespace.m4 \ + $(top_srcdir)/m4/ares_check_uts_namespace.m4 \ + $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_ac_print_to_file.m4 \ $(top_srcdir)/m4/ax_add_am_macro_static.m4 \ $(top_srcdir)/m4/ax_am_macros_static.m4 \ @@ -100,8 +102,6 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_check_compile_flag.m4 \ $(top_srcdir)/m4/ax_check_gnu_make.m4 \ $(top_srcdir)/m4/ax_check_link_flag.m4 \ - $(top_srcdir)/m4/ax_check_user_namespace.m4 \ - $(top_srcdir)/m4/ax_check_uts_namespace.m4 \ $(top_srcdir)/m4/ax_code_coverage.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ diff --git a/deps/cares/include/ares.h b/deps/cares/include/ares.h index 139c6d66ee90df..7fe3ec78f4e651 100644 --- a/deps/cares/include/ares.h +++ b/deps/cares/include/ares.h @@ -74,7 +74,7 @@ #if defined(_AIX) || defined(__NOVELL_LIBC__) || defined(__NetBSD__) || \ defined(__minix) || defined(__SYMBIAN32__) || defined(__INTEGRITY) || \ defined(ANDROID) || defined(__ANDROID__) || defined(__OpenBSD__) || \ - defined(__QNXNTO__) || defined(__MVS__) || defined(__HAIKU__) + defined(__QNX__) || defined(__MVS__) || defined(__HAIKU__) # include #endif diff --git a/deps/cares/include/ares_version.h b/deps/cares/include/ares_version.h index 9cb8084dd56bc9..782046bd79d844 100644 --- a/deps/cares/include/ares_version.h +++ b/deps/cares/include/ares_version.h @@ -32,8 +32,8 @@ #define ARES_VERSION_MAJOR 1 #define ARES_VERSION_MINOR 34 -#define ARES_VERSION_PATCH 3 -#define ARES_VERSION_STR "1.34.3" +#define ARES_VERSION_PATCH 4 +#define ARES_VERSION_STR "1.34.4" /* NOTE: We cannot make the version string a C preprocessor stringify operation * due to assumptions made by integrators that aren't properly using diff --git a/deps/cares/m4/ax_check_user_namespace.m4 b/deps/cares/m4/ares_check_user_namespace.m4 similarity index 82% rename from deps/cares/m4/ax_check_user_namespace.m4 rename to deps/cares/m4/ares_check_user_namespace.m4 index aca721626f2e89..a26b384fda5c54 100644 --- a/deps/cares/m4/ax_check_user_namespace.m4 +++ b/deps/cares/m4/ares_check_user_namespace.m4 @@ -2,7 +2,7 @@ # SYNOPSIS # -# AX_CHECK_USER_NAMESPACE +# ARES_CHECK_USER_NAMESPACE # # DESCRIPTION # @@ -12,9 +12,9 @@ # Copyright (C) The c-ares team # SPDX-License-Identifier: MIT -AC_DEFUN([AX_CHECK_USER_NAMESPACE],[dnl +AC_DEFUN([ARES_CHECK_USER_NAMESPACE],[dnl AC_CACHE_CHECK([whether user namespaces are supported], - ax_cv_user_namespace,[ + ares_cv_user_namespace,[ AC_LANG_PUSH([C]) AC_RUN_IFELSE([AC_LANG_SOURCE([[ #define _GNU_SOURCE @@ -48,10 +48,10 @@ int main() { if (!WIFEXITED(status)) return 1; return WEXITSTATUS(status); } - ]])],[ax_cv_user_namespace=yes],[ax_cv_user_namespace=no],[ax_cv_user_namespace=no]) + ]])],[ares_cv_user_namespace=yes],[ares_cv_user_namespace=no],[ares_cv_user_namespace=no]) AC_LANG_POP([C]) ]) - if test "$ax_cv_user_namespace" = yes; then + if test "$ares_cv_user_namespace" = yes; then AC_DEFINE([HAVE_USER_NAMESPACE],[1],[Whether user namespaces are available]) fi -]) # AX_CHECK_USER_NAMESPACE +]) # ARES_CHECK_USER_NAMESPACE diff --git a/deps/cares/m4/ax_check_uts_namespace.m4 b/deps/cares/m4/ares_check_uts_namespace.m4 similarity index 87% rename from deps/cares/m4/ax_check_uts_namespace.m4 rename to deps/cares/m4/ares_check_uts_namespace.m4 index 5708acf1b9f376..0aeefe4a9b7b8b 100644 --- a/deps/cares/m4/ax_check_uts_namespace.m4 +++ b/deps/cares/m4/ares_check_uts_namespace.m4 @@ -2,7 +2,7 @@ # SYNOPSIS # -# AX_CHECK_UTS_NAMESPACE +# ARES_CHECK_UTS_NAMESPACE # # DESCRIPTION # @@ -14,9 +14,9 @@ # Copyright (C) The c-ares team # SPDX-License-Identifier: MIT -AC_DEFUN([AX_CHECK_UTS_NAMESPACE],[dnl +AC_DEFUN([ARES_CHECK_UTS_NAMESPACE],[dnl AC_CACHE_CHECK([whether UTS namespaces are supported], - ax_cv_uts_namespace,[ + ares_cv_uts_namespace,[ AC_LANG_PUSH([C]) AC_RUN_IFELSE([AC_LANG_SOURCE([[ #define _GNU_SOURCE @@ -70,10 +70,10 @@ int main() { return WEXITSTATUS(status); } ]]) - ],[ax_cv_uts_namespace=yes],[ax_cv_uts_namespace=no],[ax_cv_uts_namespace=no]) + ],[ares_cv_uts_namespace=yes],[ares_cv_uts_namespace=no],[ares_cv_uts_namespace=no]) AC_LANG_POP([C]) ]) - if test "$ax_cv_uts_namespace" = yes; then + if test "$ares_cv_uts_namespace" = yes; then AC_DEFINE([HAVE_UTS_NAMESPACE],[1],[Whether UTS namespaces are available]) fi -]) # AX_CHECK_UTS_NAMESPACE +]) # ARES_CHECK_UTS_NAMESPACE diff --git a/deps/cares/m4/ax_append_compile_flags.m4 b/deps/cares/m4/ax_append_compile_flags.m4 index 1f8e70845c20d9..9c856356c0cda6 100644 --- a/deps/cares/m4/ax_append_compile_flags.m4 +++ b/deps/cares/m4/ax_append_compile_flags.m4 @@ -1,10 +1,10 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_append_compile_flags.html -# =========================================================================== +# ============================================================================ +# https://www.gnu.org/software/autoconf-archive/ax_append_compile_flags.html +# ============================================================================ # # SYNOPSIS # -# AX_APPEND_COMPILE_FLAGS([FLAG1 FLAG2 ...], [FLAGS-VARIABLE], [EXTRA-FLAGS]) +# AX_APPEND_COMPILE_FLAGS([FLAG1 FLAG2 ...], [FLAGS-VARIABLE], [EXTRA-FLAGS], [INPUT]) # # DESCRIPTION # @@ -20,6 +20,8 @@ # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to # force the compiler to issue an error when a bad flag is given. # +# INPUT gives an alternative input source to AC_COMPILE_IFELSE. +# # NOTE: This macro depends on the AX_APPEND_FLAG and # AX_CHECK_COMPILE_FLAG. Please keep this macro in sync with # AX_APPEND_LINK_FLAGS. @@ -28,38 +30,17 @@ # # Copyright (c) 2011 Maarten Bosmans # -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. -#serial 3 +#serial 7 AC_DEFUN([AX_APPEND_COMPILE_FLAGS], -[AC_REQUIRE([AX_CHECK_COMPILE_FLAG]) -AC_REQUIRE([AX_APPEND_FLAG]) +[AX_REQUIRE_DEFINED([AX_CHECK_COMPILE_FLAG]) +AX_REQUIRE_DEFINED([AX_APPEND_FLAG]) for flag in $1; do - AX_CHECK_COMPILE_FLAG([$flag], [AX_APPEND_FLAG([$flag], [$2])], [], [$3]) + AX_CHECK_COMPILE_FLAG([$flag], [AX_APPEND_FLAG([$flag], [$2])], [], [$3], [$4]) done ])dnl AX_APPEND_COMPILE_FLAGS diff --git a/deps/cares/m4/ax_append_flag.m4 b/deps/cares/m4/ax_append_flag.m4 index 1d38b76fb8e157..dd6d8b61406c32 100644 --- a/deps/cares/m4/ax_append_flag.m4 +++ b/deps/cares/m4/ax_append_flag.m4 @@ -1,5 +1,5 @@ # =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_append_flag.html +# https://www.gnu.org/software/autoconf-archive/ax_append_flag.html # =========================================================================== # # SYNOPSIS @@ -23,47 +23,28 @@ # Copyright (c) 2008 Guido U. Draheim # Copyright (c) 2011 Maarten Bosmans # -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. -#serial 2 +#serial 8 AC_DEFUN([AX_APPEND_FLAG], -[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX -AS_VAR_PUSHDEF([FLAGS], [m4_default($2,_AC_LANG_PREFIX[FLAGS])])dnl -AS_VAR_SET_IF(FLAGS, - [case " AS_VAR_GET(FLAGS) " in - *" $1 "*) - AC_RUN_LOG([: FLAGS already contains $1]) - ;; - *) - AC_RUN_LOG([: FLAGS="$FLAGS $1"]) - AS_VAR_SET(FLAGS, ["AS_VAR_GET(FLAGS) $1"]) - ;; - esac], - [AS_VAR_SET(FLAGS,["$1"])]) +[dnl +AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_SET_IF +AS_VAR_PUSHDEF([FLAGS], [m4_default($2,_AC_LANG_PREFIX[FLAGS])]) +AS_VAR_SET_IF(FLAGS,[ + AS_CASE([" AS_VAR_GET(FLAGS) "], + [*" $1 "*], [AC_RUN_LOG([: FLAGS already contains $1])], + [ + AS_VAR_APPEND(FLAGS,[" $1"]) + AC_RUN_LOG([: FLAGS="$FLAGS"]) + ]) + ], + [ + AS_VAR_SET(FLAGS,[$1]) + AC_RUN_LOG([: FLAGS="$FLAGS"]) + ]) AS_VAR_POPDEF([FLAGS])dnl ])dnl AX_APPEND_FLAG diff --git a/deps/cares/m4/ax_check_compile_flag.m4 b/deps/cares/m4/ax_check_compile_flag.m4 index c3a8d695a1bcda..54191c55353ee5 100644 --- a/deps/cares/m4/ax_check_compile_flag.m4 +++ b/deps/cares/m4/ax_check_compile_flag.m4 @@ -1,10 +1,10 @@ # =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html +# https://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html # =========================================================================== # # SYNOPSIS # -# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) +# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) # # DESCRIPTION # @@ -19,6 +19,8 @@ # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to # force the compiler to issue an error when a bad flag is given. # +# INPUT gives an alternative input source to AC_COMPILE_IFELSE. +# # NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this # macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. # @@ -27,45 +29,34 @@ # Copyright (c) 2008 Guido U. Draheim # Copyright (c) 2011 Maarten Bosmans # -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. -#serial 2 +#serial 11 AC_DEFUN([AX_CHECK_COMPILE_FLAG], -[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX +[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl -AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ +AC_CACHE_CHECK([whether the _AC_LANG compiler accepts $1], CACHEVAR, [ ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" - AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], + if test x"m4_case(_AC_LANG, + [C], [$GCC], + [C++], [$GXX], + [Fortran], [$GFC], + [Fortran 77], [$G77], + [Objective C], [$GOBJC], + [Objective C++], [$GOBJCXX], + [no])" = xyes ; then + add_gnu_werror="-Werror" + fi + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1 $add_gnu_werror" + AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], [AS_VAR_SET(CACHEVAR,[yes])], [AS_VAR_SET(CACHEVAR,[no])]) _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) -AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes], +AS_VAR_IF(CACHEVAR,yes, [m4_default([$2], :)], [m4_default([$3], :)]) AS_VAR_POPDEF([CACHEVAR])dnl diff --git a/deps/cares/m4/ax_code_coverage.m4 b/deps/cares/m4/ax_code_coverage.m4 index ad4063305ebcdd..216708a41f10c9 100644 --- a/deps/cares/m4/ax_code_coverage.m4 +++ b/deps/cares/m4/ax_code_coverage.m4 @@ -74,7 +74,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -#serial 34 +#serial 37 m4_define(_AX_CODE_COVERAGE_RULES,[ AX_ADD_AM_MACRO_STATIC([ @@ -144,7 +144,7 @@ code_coverage_v_lcov_cap_ = \$(code_coverage_v_lcov_cap_\$(AM_DEFAULT_VERBOSITY) code_coverage_v_lcov_cap_0 = @echo \" LCOV --capture\" \$(CODE_COVERAGE_OUTPUT_FILE); code_coverage_v_lcov_ign = \$(code_coverage_v_lcov_ign_\$(V)) code_coverage_v_lcov_ign_ = \$(code_coverage_v_lcov_ign_\$(AM_DEFAULT_VERBOSITY)) -code_coverage_v_lcov_ign_0 = @echo \" LCOV --remove /tmp/*\" \$(CODE_COVERAGE_IGNORE_PATTERN); +code_coverage_v_lcov_ign_0 = @echo \" LCOV --remove\" \"\$(CODE_COVERAGE_OUTPUT_FILE).tmp\" \$(CODE_COVERAGE_IGNORE_PATTERN); code_coverage_v_genhtml = \$(code_coverage_v_genhtml_\$(V)) code_coverage_v_genhtml_ = \$(code_coverage_v_genhtml_\$(AM_DEFAULT_VERBOSITY)) code_coverage_v_genhtml_0 = @echo \" GEN \" \"\$(CODE_COVERAGE_OUTPUT_DIRECTORY)\"; @@ -163,7 +163,7 @@ check-code-coverage: # Capture code coverage data code-coverage-capture: code-coverage-capture-hook \$(code_coverage_v_lcov_cap)\$(LCOV) \$(code_coverage_quiet) \$(addprefix --directory ,\$(CODE_COVERAGE_DIRECTORY)) --capture --output-file \"\$(CODE_COVERAGE_OUTPUT_FILE).tmp\" --test-name \"\$(call code_coverage_sanitize,\$(PACKAGE_NAME)-\$(PACKAGE_VERSION))\" --no-checksum --compat-libtool \$(CODE_COVERAGE_LCOV_SHOPTS) \$(CODE_COVERAGE_LCOV_OPTIONS) - \$(code_coverage_v_lcov_ign)\$(LCOV) \$(code_coverage_quiet) \$(addprefix --directory ,\$(CODE_COVERAGE_DIRECTORY)) --remove \"\$(CODE_COVERAGE_OUTPUT_FILE).tmp\" \"/tmp/*\" \$(CODE_COVERAGE_IGNORE_PATTERN) --output-file \"\$(CODE_COVERAGE_OUTPUT_FILE)\" \$(CODE_COVERAGE_LCOV_SHOPTS) \$(CODE_COVERAGE_LCOV_RMOPTS) + \$(code_coverage_v_lcov_ign)\$(LCOV) \$(code_coverage_quiet) \$(addprefix --directory ,\$(CODE_COVERAGE_DIRECTORY)) --remove \"\$(CODE_COVERAGE_OUTPUT_FILE).tmp\" \$(CODE_COVERAGE_IGNORE_PATTERN) --output-file \"\$(CODE_COVERAGE_OUTPUT_FILE)\" \$(CODE_COVERAGE_LCOV_SHOPTS) \$(CODE_COVERAGE_LCOV_RMOPTS) -@rm -f \"\$(CODE_COVERAGE_OUTPUT_FILE).tmp\" \$(code_coverage_v_genhtml)LANG=C \$(GENHTML) \$(code_coverage_quiet) \$(addprefix --prefix ,\$(CODE_COVERAGE_DIRECTORY)) --output-directory \"\$(CODE_COVERAGE_OUTPUT_DIRECTORY)\" --title \"\$(PACKAGE_NAME)-\$(PACKAGE_VERSION) Code Coverage\" --legend --show-details \"\$(CODE_COVERAGE_OUTPUT_FILE)\" \$(CODE_COVERAGE_GENHTML_OPTIONS) @echo \"file://\$(abs_builddir)/\$(CODE_COVERAGE_OUTPUT_DIRECTORY)/index.html\" @@ -206,14 +206,14 @@ code-coverage-capture-hook: ]) AC_DEFUN([_AX_CODE_COVERAGE_ENABLED],[ - AX_CHECK_GNU_MAKE([],AC_MSG_ERROR([not using GNU make that is needed for coverage])) + AX_CHECK_GNU_MAKE([],[AC_MSG_ERROR([not using GNU make that is needed for coverage])]) AC_REQUIRE([AX_ADD_AM_MACRO_STATIC]) # check for gcov AC_CHECK_TOOL([GCOV], [$_AX_CODE_COVERAGE_GCOV_PROG_WITH], [:]) AS_IF([test "X$GCOV" = "X:"], - AC_MSG_ERROR([gcov is needed to do coverage])) + [AC_MSG_ERROR([gcov is needed to do coverage])]) AC_SUBST([GCOV]) dnl Check if gcc is being used @@ -232,12 +232,13 @@ AC_DEFUN([_AX_CODE_COVERAGE_ENABLED],[ AC_MSG_ERROR([Could not find genhtml from the lcov package]) ]) + AC_CHECK_LIB([gcov], [_gcov_init], [CODE_COVERAGE_LIBS="-lgcov"], [CODE_COVERAGE_LIBS=""]) + dnl Build the code coverage flags dnl Define CODE_COVERAGE_LDFLAGS for backwards compatibility CODE_COVERAGE_CPPFLAGS="-DNDEBUG" CODE_COVERAGE_CFLAGS="-O0 -g -fprofile-arcs -ftest-coverage" CODE_COVERAGE_CXXFLAGS="-O0 -g -fprofile-arcs -ftest-coverage" - CODE_COVERAGE_LIBS="-lgcov" AC_SUBST([CODE_COVERAGE_CPPFLAGS]) AC_SUBST([CODE_COVERAGE_CFLAGS]) diff --git a/deps/cares/m4/ax_cxx_compile_stdcxx.m4 b/deps/cares/m4/ax_cxx_compile_stdcxx.m4 index 8edf5152ec7a91..fe6ae17e6c4d32 100644 --- a/deps/cares/m4/ax_cxx_compile_stdcxx.m4 +++ b/deps/cares/m4/ax_cxx_compile_stdcxx.m4 @@ -10,8 +10,8 @@ # # Check for baseline language coverage in the compiler for the specified # version of the C++ standard. If necessary, add switches to CXX and -# CXXCPP to enable support. VERSION may be '11', '14', '17', or '20' for -# the respective C++ standard version. +# CXXCPP to enable support. VERSION may be '11', '14', '17', '20', or +# '23' for the respective C++ standard version. # # The second argument, if specified, indicates whether you insist on an # extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. @@ -36,14 +36,15 @@ # Copyright (c) 2016, 2018 Krzesimir Nowak # Copyright (c) 2019 Enji Cooper # Copyright (c) 2020 Jason Merrill -# Copyright (c) 2021 Jörn Heusipp +# Copyright (c) 2021, 2024 Jörn Heusipp +# Copyright (c) 2015, 2022, 2023, 2024 Olly Betts # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. -#serial 18 +#serial 25 dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro dnl (serial version number 13). @@ -53,6 +54,7 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl [$1], [14], [ax_cxx_compile_alternatives="14 1y"], [$1], [17], [ax_cxx_compile_alternatives="17 1z"], [$1], [20], [ax_cxx_compile_alternatives="20"], + [$1], [23], [ax_cxx_compile_alternatives="23"], [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl m4_if([$2], [], [], [$2], [ext], [], @@ -159,31 +161,41 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl dnl Test body for checking C++11 support m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + [_AX_CXX_COMPILE_STDCXX_testbody_new_in_11] ) dnl Test body for checking C++14 support m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 + [_AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14] ) dnl Test body for checking C++17 support m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 + [_AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_17] ) dnl Test body for checking C++20 support m4_define([_AX_CXX_COMPILE_STDCXX_testbody_20], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_20 + [_AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_20] +) + +dnl Test body for checking C++23 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_23], + [_AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_20 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_23] ) @@ -201,7 +213,17 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ // MSVC always sets __cplusplus to 199711L in older versions; newer versions // only set it correctly if /Zc:__cplusplus is specified as well as a // /std:c++NN switch: +// // https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ +// +// The value __cplusplus ought to have is available in _MSVC_LANG since +// Visual Studio 2015 Update 3: +// +// https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros +// +// This was also the first MSVC version to support C++14 so we can't use the +// value of either __cplusplus or _MSVC_LANG to quickly rule out MSVC having +// C++11 or C++14 support, but we can check _MSVC_LANG for C++17 and later. #elif __cplusplus < 201103L && !defined _MSC_VER #error "This is not a C++11 compiler" @@ -617,7 +639,7 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[ #error "This is not a C++ compiler" -#elif __cplusplus < 201703L && !defined _MSC_VER +#elif (defined _MSVC_LANG ? _MSVC_LANG : __cplusplus) < 201703L #error "This is not a C++17 compiler" @@ -983,7 +1005,7 @@ namespace cxx17 } // namespace cxx17 -#endif // __cplusplus < 201703L && !defined _MSC_VER +#endif // (defined _MSVC_LANG ? _MSVC_LANG : __cplusplus) < 201703L ]]) @@ -996,7 +1018,7 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_20], [[ #error "This is not a C++ compiler" -#elif __cplusplus < 202002L && !defined _MSC_VER +#elif (defined _MSVC_LANG ? _MSVC_LANG : __cplusplus) < 202002L #error "This is not a C++20 compiler" @@ -1013,6 +1035,36 @@ namespace cxx20 } // namespace cxx20 -#endif // __cplusplus < 202002L && !defined _MSC_VER +#endif // (defined _MSVC_LANG ? _MSVC_LANG : __cplusplus) < 202002L + +]]) + + +dnl Tests for new features in C++23 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_23], [[ + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif (defined _MSVC_LANG ? _MSVC_LANG : __cplusplus) < 202302L + +#error "This is not a C++23 compiler" + +#else + +#include + +namespace cxx23 +{ + +// As C++23 supports feature test macros in the standard, there is no +// immediate need to actually test for feature availability on the +// Autoconf side. + +} // namespace cxx23 + +#endif // (defined _MSVC_LANG ? _MSVC_LANG : __cplusplus) < 202302L ]]) diff --git a/deps/cares/src/Makefile.in b/deps/cares/src/Makefile.in index 0c3c0864d4460a..1f286880247aa1 100644 --- a/deps/cares/src/Makefile.in +++ b/deps/cares/src/Makefile.in @@ -89,7 +89,9 @@ build_triplet = @build@ host_triplet = @host@ subdir = src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ +am__aclocal_m4_deps = $(top_srcdir)/m4/ares_check_user_namespace.m4 \ + $(top_srcdir)/m4/ares_check_uts_namespace.m4 \ + $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_ac_print_to_file.m4 \ $(top_srcdir)/m4/ax_add_am_macro_static.m4 \ $(top_srcdir)/m4/ax_am_macros_static.m4 \ @@ -99,8 +101,6 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_check_compile_flag.m4 \ $(top_srcdir)/m4/ax_check_gnu_make.m4 \ $(top_srcdir)/m4/ax_check_link_flag.m4 \ - $(top_srcdir)/m4/ax_check_user_namespace.m4 \ - $(top_srcdir)/m4/ax_check_uts_namespace.m4 \ $(top_srcdir)/m4/ax_code_coverage.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ diff --git a/deps/cares/src/lib/CMakeLists.txt b/deps/cares/src/lib/CMakeLists.txt index 9956fd625b2ad6..9d4e10924d0adb 100644 --- a/deps/cares/src/lib/CMakeLists.txt +++ b/deps/cares/src/lib/CMakeLists.txt @@ -92,11 +92,23 @@ IF (CARES_STATIC) SET_TARGET_PROPERTIES (${LIBNAME} PROPERTIES EXPORT_NAME cares${STATIC_SUFFIX} - OUTPUT_NAME cares${STATIC_SUFFIX} COMPILE_PDB_NAME cares${STATIC_SUFFIX} C_STANDARD 90 ) + # On Windows, the output name should have a static suffix since otherwise + # we would have conflicting output names (libcares.lib) for the link + # library. + # However on Unix-like systems, we typically have something like + # libcares.so for shared libraries and libcares.a for static + # libraries, so these don't conflict. + # This behavior better emulates what happens with autotools builds + IF (WIN32) + SET_TARGET_PROPERTIES(${LIBNAME} PROPERTIES OUTPUT_NAME cares${STATIC_SUFFIX}) + ELSE () + SET_TARGET_PROPERTIES(${LIBNAME} PROPERTIES OUTPUT_NAME cares) + ENDIF() + IF (ANDROID) SET_TARGET_PROPERTIES (${LIBNAME} PROPERTIES C_STANDARD 99) ENDIF () diff --git a/deps/cares/src/lib/Makefile.in b/deps/cares/src/lib/Makefile.in index 4aff043b26a310..a45fc10b544755 100644 --- a/deps/cares/src/lib/Makefile.in +++ b/deps/cares/src/lib/Makefile.in @@ -15,7 +15,7 @@ @SET_MAKE@ # aminclude_static.am generated automatically by Autoconf -# from AX_AM_MACROS_STATIC on Sat Nov 9 17:40:37 UTC 2024 +# from AX_AM_MACROS_STATIC on Sat Dec 14 15:15:44 UTC 2024 # Copyright (C) The c-ares project and its contributors # SPDX-License-Identifier: MIT @@ -100,7 +100,9 @@ host_triplet = @host@ subdir = src/lib SUBDIRS = ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ +am__aclocal_m4_deps = $(top_srcdir)/m4/ares_check_user_namespace.m4 \ + $(top_srcdir)/m4/ares_check_uts_namespace.m4 \ + $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_ac_print_to_file.m4 \ $(top_srcdir)/m4/ax_add_am_macro_static.m4 \ $(top_srcdir)/m4/ax_am_macros_static.m4 \ @@ -110,8 +112,6 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_check_compile_flag.m4 \ $(top_srcdir)/m4/ax_check_gnu_make.m4 \ $(top_srcdir)/m4/ax_check_link_flag.m4 \ - $(top_srcdir)/m4/ax_check_user_namespace.m4 \ - $(top_srcdir)/m4/ax_check_uts_namespace.m4 \ $(top_srcdir)/m4/ax_code_coverage.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ @@ -629,7 +629,7 @@ libcares_la_CPPFLAGS_EXTRA = -DCARES_BUILDING_LIBRARY $(am__append_3) \ @CODE_COVERAGE_ENABLED_TRUE@code_coverage_v_lcov_cap_0 = @echo " LCOV --capture" $(CODE_COVERAGE_OUTPUT_FILE); @CODE_COVERAGE_ENABLED_TRUE@code_coverage_v_lcov_ign = $(code_coverage_v_lcov_ign_$(V)) @CODE_COVERAGE_ENABLED_TRUE@code_coverage_v_lcov_ign_ = $(code_coverage_v_lcov_ign_$(AM_DEFAULT_VERBOSITY)) -@CODE_COVERAGE_ENABLED_TRUE@code_coverage_v_lcov_ign_0 = @echo " LCOV --remove /tmp/*" $(CODE_COVERAGE_IGNORE_PATTERN); +@CODE_COVERAGE_ENABLED_TRUE@code_coverage_v_lcov_ign_0 = @echo " LCOV --remove" "$(CODE_COVERAGE_OUTPUT_FILE).tmp" $(CODE_COVERAGE_IGNORE_PATTERN); @CODE_COVERAGE_ENABLED_TRUE@code_coverage_v_genhtml = $(code_coverage_v_genhtml_$(V)) @CODE_COVERAGE_ENABLED_TRUE@code_coverage_v_genhtml_ = $(code_coverage_v_genhtml_$(AM_DEFAULT_VERBOSITY)) @CODE_COVERAGE_ENABLED_TRUE@code_coverage_v_genhtml_0 = @echo " GEN " "$(CODE_COVERAGE_OUTPUT_DIRECTORY)"; @@ -2328,7 +2328,7 @@ uninstall-am: uninstall-libLTLIBRARIES # Capture code coverage data @CODE_COVERAGE_ENABLED_TRUE@code-coverage-capture: code-coverage-capture-hook @CODE_COVERAGE_ENABLED_TRUE@ $(code_coverage_v_lcov_cap)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --capture --output-file "$(CODE_COVERAGE_OUTPUT_FILE).tmp" --test-name "$(call code_coverage_sanitize,$(PACKAGE_NAME)-$(PACKAGE_VERSION))" --no-checksum --compat-libtool $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_OPTIONS) -@CODE_COVERAGE_ENABLED_TRUE@ $(code_coverage_v_lcov_ign)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --remove "$(CODE_COVERAGE_OUTPUT_FILE).tmp" "/tmp/*" $(CODE_COVERAGE_IGNORE_PATTERN) --output-file "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_RMOPTS) +@CODE_COVERAGE_ENABLED_TRUE@ $(code_coverage_v_lcov_ign)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --remove "$(CODE_COVERAGE_OUTPUT_FILE).tmp" $(CODE_COVERAGE_IGNORE_PATTERN) --output-file "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_RMOPTS) @CODE_COVERAGE_ENABLED_TRUE@ -@rm -f "$(CODE_COVERAGE_OUTPUT_FILE).tmp" @CODE_COVERAGE_ENABLED_TRUE@ $(code_coverage_v_genhtml)LANG=C $(GENHTML) $(code_coverage_quiet) $(addprefix --prefix ,$(CODE_COVERAGE_DIRECTORY)) --output-directory "$(CODE_COVERAGE_OUTPUT_DIRECTORY)" --title "$(PACKAGE_NAME)-$(PACKAGE_VERSION) Code Coverage" --legend --show-details "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_GENHTML_OPTIONS) @CODE_COVERAGE_ENABLED_TRUE@ @echo "file://$(abs_builddir)/$(CODE_COVERAGE_OUTPUT_DIRECTORY)/index.html" diff --git a/deps/cares/src/lib/ares_config.h.cmake b/deps/cares/src/lib/ares_config.h.cmake index 051b97f494fd32..51744fe143868c 100644 --- a/deps/cares/src/lib/ares_config.h.cmake +++ b/deps/cares/src/lib/ares_config.h.cmake @@ -257,6 +257,9 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SIGNAL_H 1 +/* Define to 1 if you have the strnlen function. */ +#cmakedefine HAVE_STRNLEN 1 + /* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */ #cmakedefine HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID 1 diff --git a/deps/cares/src/lib/ares_config.h.in b/deps/cares/src/lib/ares_config.h.in index d1f09d694db68e..a62e17089358aa 100644 --- a/deps/cares/src/lib/ares_config.h.in +++ b/deps/cares/src/lib/ares_config.h.in @@ -309,6 +309,9 @@ /* Define to 1 if you have `strnicmp` */ #undef HAVE_STRNICMP +/* Define to 1 if you have `strnlen` */ +#undef HAVE_STRNLEN + /* Define to 1 if the system has the type `struct addrinfo'. */ #undef HAVE_STRUCT_ADDRINFO diff --git a/deps/cares/src/lib/ares_private.h b/deps/cares/src/lib/ares_private.h index ce8c3f2ddc2f6c..e6d44e8b8640f9 100644 --- a/deps/cares/src/lib/ares_private.h +++ b/deps/cares/src/lib/ares_private.h @@ -388,8 +388,23 @@ ares_status_t ares_sysconfig_set_options(ares_sysconfig_t *sysconfig, ares_status_t ares_init_by_environment(ares_sysconfig_t *sysconfig); + +typedef ares_status_t (*ares_sysconfig_line_cb_t)(const ares_channel_t *channel, + ares_sysconfig_t *sysconfig, + ares_buf_t *line); + +ares_status_t ares_sysconfig_parse_resolv_line(const ares_channel_t *channel, + ares_sysconfig_t *sysconfig, + ares_buf_t *line); + +ares_status_t ares_sysconfig_process_buf(const ares_channel_t *channel, + ares_sysconfig_t *sysconfig, + ares_buf_t *buf, + ares_sysconfig_line_cb_t cb); + ares_status_t ares_init_sysconfig_files(const ares_channel_t *channel, - ares_sysconfig_t *sysconfig); + ares_sysconfig_t *sysconfig, + ares_bool_t process_resolvconf); #ifdef __APPLE__ ares_status_t ares_init_sysconfig_macos(const ares_channel_t *channel, ares_sysconfig_t *sysconfig); diff --git a/deps/cares/src/lib/ares_set_socket_functions.c b/deps/cares/src/lib/ares_set_socket_functions.c index 143c491174fdba..7216ffa933fc07 100644 --- a/deps/cares/src/lib/ares_set_socket_functions.c +++ b/deps/cares/src/lib/ares_set_socket_functions.c @@ -288,7 +288,9 @@ static int default_asetsockopt(ares_socket_t sock, ares_socket_opt_t opt, return setsockopt(sock, SOL_SOCKET, SO_RCVBUF, val, val_size); case ARES_SOCKET_OPT_BIND_DEVICE: - if (!ares_str_isprint(val, (size_t)val_size)) { + /* Count the number of characters before NULL terminator then + * validate those are all printable */ + if (!ares_str_isprint(val, ares_strnlen(val, (size_t)val_size))) { SET_SOCKERRNO(EINVAL); return -1; } diff --git a/deps/cares/src/lib/ares_socket.c b/deps/cares/src/lib/ares_socket.c index df02fd61b60b14..516852a84abfb8 100644 --- a/deps/cares/src/lib/ares_socket.c +++ b/deps/cares/src/lib/ares_socket.c @@ -263,7 +263,8 @@ ares_status_t ares_socket_configure(ares_channel_t *channel, int family, * compatibility */ (void)channel->sock_funcs.asetsockopt( fd, ARES_SOCKET_OPT_BIND_DEVICE, channel->local_dev_name, - sizeof(channel->local_dev_name), channel->sock_func_cb_data); + (ares_socklen_t)ares_strlen(channel->local_dev_name), + channel->sock_func_cb_data); } /* Bind to ip address if configured */ diff --git a/deps/cares/src/lib/ares_sysconfig.c b/deps/cares/src/lib/ares_sysconfig.c index 9f0d7e5061ffe0..286db60328f45b 100644 --- a/deps/cares/src/lib/ares_sysconfig.c +++ b/deps/cares/src/lib/ares_sysconfig.c @@ -260,6 +260,94 @@ static ares_status_t ares_init_sysconfig_android(const ares_channel_t *channel, } #endif +#if defined(__QNX__) +static ares_status_t + ares_init_sysconfig_qnx(const ares_channel_t *channel, + ares_sysconfig_t *sysconfig) +{ + /* QNX: + * 1. use confstr(_CS_RESOLVE, ...) as primary resolv.conf data, replacing + * "_" with " ". If that is empty, then do normal /etc/resolv.conf + * processing. + * 2. We want to process /etc/nsswitch.conf as normal. + * 3. if confstr(_CS_DOMAIN, ...) this is the domain name. Use this as + * preference over anything else found. + */ + ares_buf_t *buf = ares_buf_create(); + unsigned char *data = NULL; + size_t data_size = 0; + ares_bool_t process_resolvconf = ARES_TRUE; + ares_status_t status = ARES_SUCCESS; + + /* Prefer confstr(_CS_RESOLVE, ...) */ + buf = ares_buf_create(); + if (buf == NULL) { + status = ARES_ENOMEM; + goto done; + } + + data_size = 1024; + data = ares_buf_append_start(buf, &data_size); + if (data == NULL) { + status = ARES_ENOMEM; + goto done; + } + + data_size = confstr(_CS_RESOLVE, (char *)data, data_size); + if (data_size > 1) { + /* confstr returns byte for NULL terminator, strip */ + data_size--; + + ares_buf_append_finish(buf, data_size); + /* Its odd, this uses _ instead of " " between keywords, otherwise the + * format is the same as resolv.conf, replace. */ + ares_buf_replace(buf, (const unsigned char *)"_", 1, + (const unsigned char *)" ", 1); + + status = ares_sysconfig_process_buf(channel, sysconfig, buf, + ares_sysconfig_parse_resolv_line); + if (status != ARES_SUCCESS) { + /* ENOMEM is really the only error we'll get here */ + goto done; + } + + /* don't read resolv.conf if we processed *any* nameservers */ + if (ares_llist_len(sysconfig->sconfig) != 0) { + process_resolvconf = ARES_FALSE; + } + } + + /* Process files */ + status = ares_init_sysconfig_files(channel, sysconfig, process_resolvconf); + if (status != ARES_SUCCESS) { + goto done; + } + + /* Read confstr(_CS_DOMAIN, ...), but if we had a search path specified with + * more than one domain, lets prefer that instead. Its not exactly clear + * the best way to handle this. */ + if (sysconfig->ndomains <= 1) { + char domain[256]; + size_t domain_len; + + domain_len = confstr(_CS_DOMAIN, domain, sizeof(domain_len)); + if (domain_len != 0) { + ares_strsplit_free(sysconfig->domains, sysconfig->ndomains); + sysconfig->domains = ares_strsplit(domain, ", ", &sysconfig->ndomains); + if (sysconfig->domains == NULL) { + status = ARES_ENOMEM; + goto done; + } + } + } + +done: + ares_buf_destroy(buf); + + return status; +} +#endif + #if defined(CARES_USE_LIBRESOLV) static ares_status_t ares_init_sysconfig_libresolv(const ares_channel_t *channel, @@ -516,8 +604,10 @@ ares_status_t ares_init_by_sysconfig(ares_channel_t *channel) status = ares_init_sysconfig_macos(channel, &sysconfig); #elif defined(CARES_USE_LIBRESOLV) status = ares_init_sysconfig_libresolv(channel, &sysconfig); +#elif defined(__QNX__) + status = ares_init_sysconfig_qnx(channel, &sysconfig); #else - status = ares_init_sysconfig_files(channel, &sysconfig); + status = ares_init_sysconfig_files(channel, &sysconfig, ARES_TRUE); #endif if (status != ARES_SUCCESS) { diff --git a/deps/cares/src/lib/ares_sysconfig_files.c b/deps/cares/src/lib/ares_sysconfig_files.c index 49bc330d9d346d..a6c2a8e62bb34f 100644 --- a/deps/cares/src/lib/ares_sysconfig_files.c +++ b/deps/cares/src/lib/ares_sysconfig_files.c @@ -549,9 +549,9 @@ ares_status_t ares_init_by_environment(ares_sysconfig_t *sysconfig) /* This function will only return ARES_SUCCESS or ARES_ENOMEM. Any other * conditions are ignored. Users may mess up config files, but we want to * process anything we can. */ -static ares_status_t parse_resolvconf_line(const ares_channel_t *channel, - ares_sysconfig_t *sysconfig, - ares_buf_t *line) +ares_status_t ares_sysconfig_parse_resolv_line(const ares_channel_t *channel, + ares_sysconfig_t *sysconfig, + ares_buf_t *line) { char option[32]; char value[512]; @@ -726,9 +726,38 @@ static ares_status_t parse_svcconf_line(const ares_channel_t *channel, return status; } -typedef ares_status_t (*line_callback_t)(const ares_channel_t *channel, - ares_sysconfig_t *sysconfig, - ares_buf_t *line); + +ares_status_t ares_sysconfig_process_buf(const ares_channel_t *channel, + ares_sysconfig_t *sysconfig, + ares_buf_t *buf, + ares_sysconfig_line_cb_t cb) +{ + ares_array_t *lines = NULL; + size_t num; + size_t i; + ares_status_t status; + + status = ares_buf_split(buf, (const unsigned char *)"\n", 1, + ARES_BUF_SPLIT_TRIM, 0, &lines); + if (status != ARES_SUCCESS) { + goto done; + } + + num = ares_array_len(lines); + for (i = 0; i < num; i++) { + ares_buf_t **bufptr = ares_array_at(lines, i); + ares_buf_t *line = *bufptr; + + status = cb(channel, sysconfig, line); + if (status != ARES_SUCCESS) { + goto done; + } + } + +done: + ares_array_destroy(lines); + return status; +} /* Should only return: * ARES_ENOTFOUND - file not found @@ -737,16 +766,13 @@ typedef ares_status_t (*line_callback_t)(const ares_channel_t *channel, * ARES_SUCCESS - file processed, doesn't necessarily mean it was a good * file, but we're not erroring out if we can't parse * something (or anything at all) */ -static ares_status_t process_config_lines(const ares_channel_t *channel, - const char *filename, - ares_sysconfig_t *sysconfig, - line_callback_t cb) +static ares_status_t process_config_lines(const ares_channel_t *channel, + const char *filename, + ares_sysconfig_t *sysconfig, + ares_sysconfig_line_cb_t cb) { ares_status_t status = ARES_SUCCESS; - ares_array_t *lines = NULL; ares_buf_t *buf = NULL; - size_t num; - size_t i; buf = ares_buf_create(); if (buf == NULL) { @@ -759,43 +785,30 @@ static ares_status_t process_config_lines(const ares_channel_t *channel, goto done; } - status = ares_buf_split(buf, (const unsigned char *)"\n", 1, - ARES_BUF_SPLIT_TRIM, 0, &lines); - if (status != ARES_SUCCESS) { - goto done; - } - - num = ares_array_len(lines); - for (i = 0; i < num; i++) { - ares_buf_t **bufptr = ares_array_at(lines, i); - ares_buf_t *line = *bufptr; - - status = cb(channel, sysconfig, line); - if (status != ARES_SUCCESS) { - goto done; - } - } + status = ares_sysconfig_process_buf(channel, sysconfig, buf, cb); done: ares_buf_destroy(buf); - ares_array_destroy(lines); return status; } ares_status_t ares_init_sysconfig_files(const ares_channel_t *channel, - ares_sysconfig_t *sysconfig) + ares_sysconfig_t *sysconfig, + ares_bool_t process_resolvconf) { ares_status_t status = ARES_SUCCESS; /* Resolv.conf */ - status = process_config_lines(channel, - (channel->resolvconf_path != NULL) - ? channel->resolvconf_path - : PATH_RESOLV_CONF, - sysconfig, parse_resolvconf_line); - if (status != ARES_SUCCESS && status != ARES_ENOTFOUND) { - goto done; + if (process_resolvconf) { + status = process_config_lines(channel, + (channel->resolvconf_path != NULL) + ? channel->resolvconf_path + : PATH_RESOLV_CONF, + sysconfig, ares_sysconfig_parse_resolv_line); + if (status != ARES_SUCCESS && status != ARES_ENOTFOUND) { + goto done; + } } /* Nsswitch.conf */ diff --git a/deps/cares/src/lib/event/ares_event_configchg.c b/deps/cares/src/lib/event/ares_event_configchg.c index e3e665bd165523..5ecc6888ab719f 100644 --- a/deps/cares/src/lib/event/ares_event_configchg.c +++ b/deps/cares/src/lib/event/ares_event_configchg.c @@ -558,14 +558,24 @@ static ares_status_t config_change_check(ares_htable_strvp_t *filestat, const char *resolvconf_path) { size_t i; - const char *configfiles[5]; + const char *configfiles[16]; ares_bool_t changed = ARES_FALSE; + size_t cnt = 0; - configfiles[0] = resolvconf_path; - configfiles[1] = "/etc/nsswitch.conf"; - configfiles[2] = "/etc/netsvc.conf"; - configfiles[3] = "/etc/svc.conf"; - configfiles[4] = NULL; + memset(configfiles, 0, sizeof(configfiles)); + + configfiles[cnt++] = resolvconf_path; + configfiles[cnt++] = "/etc/nsswitch.conf"; +#ifdef _AIX + configfiles[cnt++] = "/etc/netsvc.conf"; +#endif +#ifdef __osf /* Tru64 */ + configfiles[cnt++] = "/etc/svc.conf"; +#endif +#ifdef __QNX__ + configfiles[cnt++] = "/etc/net.cfg"; +#endif + configfiles[cnt++] = NULL; for (i = 0; configfiles[i] != NULL; i++) { fileinfo_t *fi = ares_htable_strvp_get_direct(filestat, configfiles[i]); diff --git a/deps/cares/src/lib/include/ares_buf.h b/deps/cares/src/lib/include/ares_buf.h index 7836a313e066d1..10d29eaf83bd8e 100644 --- a/deps/cares/src/lib/include/ares_buf.h +++ b/deps/cares/src/lib/include/ares_buf.h @@ -219,6 +219,26 @@ CARES_EXTERN unsigned char *ares_buf_finish_bin(ares_buf_t *buf, size_t *len); */ CARES_EXTERN char *ares_buf_finish_str(ares_buf_t *buf, size_t *len); +/*! Replace the given search byte sequence with the replacement byte sequence. + * This is only valid for allocated buffers, not const buffers. Will replace + * all byte sequences starting at the current offset to the end of the buffer. + * + * \param[in] buf Initialized buffer object. Can not be a "const" buffer. + * \param[in] srch Search byte sequence, must not be NULL. + * \param[in] srch_size Size of byte sequence, must not be zero. + * \param[in] rplc Byte sequence to use as replacement. May be NULL if + * rplc_size is zero. + * \param[in] rplc_size Size of replacement byte sequence, may be 0. + * \return ARES_SUCCESS on success, otherwise on may return failure only on + * memory allocation failure or misuse. Will not return indication + * if any replacements occurred + */ +CARES_EXTERN ares_status_t ares_buf_replace(ares_buf_t *buf, + const unsigned char *srch, + size_t srch_size, + const unsigned char *rplc, + size_t rplc_size); + /*! Tag a position to save in the buffer in case parsing needs to rollback, * such as if insufficient data is available, but more data may be added in * the future. Only a single tag can be set per buffer object. Setting a diff --git a/deps/cares/src/lib/include/ares_str.h b/deps/cares/src/lib/include/ares_str.h index ea75b3b3e7441d..4ee339510bf026 100644 --- a/deps/cares/src/lib/include/ares_str.h +++ b/deps/cares/src/lib/include/ares_str.h @@ -29,6 +29,20 @@ CARES_EXTERN char *ares_strdup(const char *s1); +/*! Scan up to maxlen bytes for the first NULL character and return + * its index, or maxlen if not found. The function only returns + * maxlen if the first maxlen bytes were not NULL characters; it + * makes no guarantee for what \c str[maxlen] (if defined) is, and + * does not access it. It is behaving like the POSIX \c strnlen() + * function, except that it returns 0 if the \p str pointer is \c + * NULL. + * + * \param[in] str The string to scan for the NULL character + * \param[in] maxlen The maximum number of bytes to scan + * \return Index of first NULL byte. Between 0 and maxlen (inclusive). + */ +CARES_EXTERN size_t ares_strnlen(const char *str, size_t maxlen); + CARES_EXTERN size_t ares_strlen(const char *str); /*! Copy string from source to destination with destination buffer size diff --git a/deps/cares/src/lib/record/ares_dns_multistring.c b/deps/cares/src/lib/record/ares_dns_multistring.c index 57c0d1c0a803ec..44fcaccd65bb6a 100644 --- a/deps/cares/src/lib/record/ares_dns_multistring.c +++ b/deps/cares/src/lib/record/ares_dns_multistring.c @@ -146,6 +146,18 @@ ares_status_t ares_dns_multistring_add_own(ares_dns_multistring_t *strs, return status; } + /* Issue #921, ares_dns_multistring_get() doesn't have a way to indicate + * success or fail on a zero-length string which is actually valid. So we + * are going to allocate a 1-byte buffer to use as a placeholder in this + * case */ + if (str == NULL) { + str = ares_malloc_zero(1); + if (str == NULL) { + ares_array_remove_last(strs->strs); + return ARES_ENOMEM; + } + } + data->data = str; data->len = len; @@ -252,36 +264,38 @@ ares_status_t ares_dns_multistring_parse_buf(ares_buf_t *buf, break; /* LCOV_EXCL_LINE: DefensiveCoding */ } - if (len) { - /* When used by the _str() parser, it really needs to be validated to - * be a valid printable ascii string. Do that here */ - if (validate_printable && ares_buf_len(buf) >= len) { - size_t mylen; - const char *data = (const char *)ares_buf_peek(buf, &mylen); - if (!ares_str_isprint(data, len)) { - status = ARES_EBADSTR; - break; - } + + /* When used by the _str() parser, it really needs to be validated to + * be a valid printable ascii string. Do that here */ + if (len && validate_printable && ares_buf_len(buf) >= len) { + size_t mylen; + const char *data = (const char *)ares_buf_peek(buf, &mylen); + if (!ares_str_isprint(data, len)) { + status = ARES_EBADSTR; + break; } + } - if (strs != NULL) { - unsigned char *data = NULL; + if (strs != NULL) { + unsigned char *data = NULL; + if (len) { status = ares_buf_fetch_bytes_dup(buf, len, ARES_TRUE, &data); if (status != ARES_SUCCESS) { break; } - status = ares_dns_multistring_add_own(*strs, data, len); - if (status != ARES_SUCCESS) { - ares_free(data); - break; - } - } else { - status = ares_buf_consume(buf, len); - if (status != ARES_SUCCESS) { - break; - } + } + status = ares_dns_multistring_add_own(*strs, data, len); + if (status != ARES_SUCCESS) { + ares_free(data); + break; + } + } else { + status = ares_buf_consume(buf, len); + if (status != ARES_SUCCESS) { + break; } } + } if (status != ARES_SUCCESS && strs != NULL) { diff --git a/deps/cares/src/lib/str/ares_buf.c b/deps/cares/src/lib/str/ares_buf.c index 69e6b38aac849e..63acc6cf7714d3 100644 --- a/deps/cares/src/lib/str/ares_buf.c +++ b/deps/cares/src/lib/str/ares_buf.c @@ -1104,6 +1104,72 @@ const unsigned char *ares_buf_peek(const ares_buf_t *buf, size_t *len) return ares_buf_fetch(buf, len); } +ares_status_t ares_buf_replace(ares_buf_t *buf, const unsigned char *srch, + size_t srch_size, const unsigned char *rplc, + size_t rplc_size) +{ + size_t processed_len = 0; + ares_status_t status; + + if (buf->alloc_buf == NULL || srch == NULL || srch_size == 0 || + (rplc == NULL && rplc_size != 0)) { + return ARES_EFORMERR; + } + + while (1) { + unsigned char *ptr = buf->alloc_buf + buf->offset + processed_len; + size_t remaining_len = buf->data_len - buf->offset - processed_len; + size_t found_offset = 0; + size_t move_data_len; + + /* Find pattern */ + ptr = ares_memmem(ptr, remaining_len, srch, srch_size); + if (ptr == NULL) { + break; + } + + /* Store the offset this was found because our actual pointer might be + * switched out from under us by the call to ensure_space() if the + * replacement pattern is larger than the search pattern */ + found_offset = (size_t)(ptr - (size_t)(buf->alloc_buf + buf->offset)); + if (rplc_size > srch_size) { + status = ares_buf_ensure_space(buf, rplc_size - srch_size); + if (status != ARES_SUCCESS) { + return status; + } + } + + /* Impossible, but silence clang */ + if (buf->alloc_buf == NULL) { + return ARES_ENOMEM; + } + + /* Recalculate actual pointer */ + ptr = buf->alloc_buf + buf->offset + found_offset; + + /* Move the data */ + move_data_len = buf->data_len - buf->offset - found_offset - srch_size; + memmove(ptr + rplc_size, + ptr + srch_size, + move_data_len); + + /* Copy in the replacement data */ + if (rplc != NULL && rplc_size > 0) { + memcpy(ptr, rplc, rplc_size); + } + + if (rplc_size > srch_size) { + buf->data_len += rplc_size - srch_size; + } else { + buf->data_len -= srch_size - rplc_size; + } + + processed_len = found_offset + rplc_size; + } + + return ARES_SUCCESS; +} + ares_status_t ares_buf_peek_byte(const ares_buf_t *buf, unsigned char *b) { size_t remaining_len = 0; diff --git a/deps/cares/src/lib/str/ares_str.c b/deps/cares/src/lib/str/ares_str.c index f6bfabf11f4467..0eda1ab9f15783 100644 --- a/deps/cares/src/lib/str/ares_str.c +++ b/deps/cares/src/lib/str/ares_str.c @@ -32,6 +32,23 @@ # include #endif +size_t ares_strnlen(const char *str, size_t maxlen) { + const char *p = NULL; + if (str == NULL) { + return 0; + } +#ifdef HAVE_STRNLEN + (void)p; + return strnlen(str, maxlen); +#else + if ((p = memchr(str, 0, maxlen)) == NULL) { + return maxlen; + } else { + return (size_t)(p - str); + } +#endif /* HAVE_STRNLEN */ +} + size_t ares_strlen(const char *str) { if (str == NULL) { diff --git a/deps/cares/src/tools/Makefile.in b/deps/cares/src/tools/Makefile.in index 9a96a74fa6957d..19e99a253378c7 100644 --- a/deps/cares/src/tools/Makefile.in +++ b/deps/cares/src/tools/Makefile.in @@ -91,7 +91,9 @@ host_triplet = @host@ noinst_PROGRAMS = $(am__EXEEXT_1) subdir = src/tools ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ +am__aclocal_m4_deps = $(top_srcdir)/m4/ares_check_user_namespace.m4 \ + $(top_srcdir)/m4/ares_check_uts_namespace.m4 \ + $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_ac_print_to_file.m4 \ $(top_srcdir)/m4/ax_add_am_macro_static.m4 \ $(top_srcdir)/m4/ax_am_macros_static.m4 \ @@ -101,8 +103,6 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/ax_ac_append_to_file.m4 \ $(top_srcdir)/m4/ax_check_compile_flag.m4 \ $(top_srcdir)/m4/ax_check_gnu_make.m4 \ $(top_srcdir)/m4/ax_check_link_flag.m4 \ - $(top_srcdir)/m4/ax_check_user_namespace.m4 \ - $(top_srcdir)/m4/ax_check_uts_namespace.m4 \ $(top_srcdir)/m4/ax_code_coverage.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ diff --git a/deps/corepack/CHANGELOG.md b/deps/corepack/CHANGELOG.md index 7de934c0d2c0db..941d0b6b7e5e25 100644 --- a/deps/corepack/CHANGELOG.md +++ b/deps/corepack/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## [0.30.0](https://github.com/nodejs/corepack/compare/v0.29.4...v0.30.0) (2024-11-23) + + +### Features + +* update package manager versions ([#578](https://github.com/nodejs/corepack/issues/578)) ([a286c8f](https://github.com/nodejs/corepack/commit/a286c8f5537ea9ecf9b6ff53c7bc3e8da4e3c8bb)) + + +### Performance Improvements + +* prefer `module.enableCompileCache` over `v8-compile-cache` ([#574](https://github.com/nodejs/corepack/issues/574)) ([cba6905](https://github.com/nodejs/corepack/commit/cba690575bd606faeee54bd512ccb8797d49055f)) + ## [0.29.4](https://github.com/nodejs/corepack/compare/v0.29.3...v0.29.4) (2024-09-07) diff --git a/deps/corepack/dist/corepack.js b/deps/corepack/dist/corepack.js index b1b22662466f86..6179b11c083cb5 100755 --- a/deps/corepack/dist/corepack.js +++ b/deps/corepack/dist/corepack.js @@ -1,3 +1,4 @@ #!/usr/bin/env node process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='0'; +require('module').enableCompileCache?.(); require('./lib/corepack.cjs').runMain(process.argv.slice(2)); \ No newline at end of file diff --git a/deps/corepack/dist/lib/corepack.cjs b/deps/corepack/dist/lib/corepack.cjs index 2978fc336232e0..e1919339dc38bd 100644 --- a/deps/corepack/dist/lib/corepack.cjs +++ b/deps/corepack/dist/lib/corepack.cjs @@ -21260,7 +21260,7 @@ function String2(descriptor, ...args) { } // package.json -var version = "0.29.4"; +var version = "0.30.0"; // sources/Engine.ts var import_fs9 = __toESM(require("fs")); @@ -21274,7 +21274,7 @@ var import_valid3 = __toESM(require_valid2()); var config_default = { definitions: { npm: { - default: "10.8.3+sha1.e6085b2864fcfd9b1aad7b602601b5a2fc116699", + default: "10.9.1+sha1.ab141c1229765c11c8c59060fc9cf450a2207bd6", fetchLatestFrom: { type: "npm", package: "npm" @@ -21311,7 +21311,7 @@ var config_default = { } }, pnpm: { - default: "9.9.0+sha1.3edbe440f4e570aa8f049adbd06b9483d55cc2d2", + default: "9.14.2+sha1.5202b50ab92394b3c922d2e293f196e2df6d441b", fetchLatestFrom: { type: "npm", package: "pnpm" @@ -21375,7 +21375,7 @@ var config_default = { package: "yarn" }, transparent: { - default: "4.4.1+sha224.fd21d9eb5fba020083811af1d4953acc21eeb9f6ff97efd1b3f9d4de", + default: "4.5.2+sha224.c2e2e9ed3cdadd6ec250589b3393f71ae56d5ec297af11cec1eba3b4", commands: [ [ "yarn", @@ -21965,8 +21965,11 @@ async function runVersion(locator, installSpec, binName, args) { } if (!binPath) throw new Error(`Assertion failed: Unable to locate path for bin '${binName}'`); - if (locator.name !== `npm` || (0, import_lt.default)(locator.reference, `9.7.0`)) - await Promise.resolve().then(() => __toESM(require_v8_compile_cache())); + if (!import_module.default.enableCompileCache) { + if (locator.name !== `npm` || (0, import_lt.default)(locator.reference, `9.7.0`)) { + await Promise.resolve().then(() => __toESM(require_v8_compile_cache())); + } + } process.env.COREPACK_ROOT = import_path7.default.dirname(require.resolve("corepack/package.json")); process.argv = [ process.execPath, @@ -21976,6 +21979,9 @@ async function runVersion(locator, installSpec, binName, args) { process.execArgv = []; process.mainModule = void 0; process.nextTick(import_module.default.runMain, binPath); + if (import_module.default.flushCompileCache) { + setImmediate(import_module.default.flushCompileCache); + } } function shouldSkipIntegrityCheck() { return process.env.COREPACK_INTEGRITY_KEYS === `` || process.env.COREPACK_INTEGRITY_KEYS === `0`; @@ -22553,7 +22559,7 @@ var EnableCommand = class extends Command { [`enable`] ]; static usage = Command.Usage({ - description: `Add the Corepack shims to the install directories`, + description: `Add the Corepack shims to the install directory`, details: ` When run, this command will check whether the shims for the specified package managers can be found with the correct values inside the install directory. If not, or if they don't exist, they will be created. diff --git a/deps/corepack/dist/npm.js b/deps/corepack/dist/npm.js index 7d10ba5bdf36b2..75f68b058f2dd6 100755 --- a/deps/corepack/dist/npm.js +++ b/deps/corepack/dist/npm.js @@ -1,3 +1,4 @@ #!/usr/bin/env node process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1' +require('module').enableCompileCache?.(); require('./lib/corepack.cjs').runMain(['npm', ...process.argv.slice(2)]); \ No newline at end of file diff --git a/deps/corepack/dist/npx.js b/deps/corepack/dist/npx.js index a8bd3e69014313..b1138bb48e1a82 100755 --- a/deps/corepack/dist/npx.js +++ b/deps/corepack/dist/npx.js @@ -1,3 +1,4 @@ #!/usr/bin/env node process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1' +require('module').enableCompileCache?.(); require('./lib/corepack.cjs').runMain(['npx', ...process.argv.slice(2)]); \ No newline at end of file diff --git a/deps/corepack/dist/pnpm.js b/deps/corepack/dist/pnpm.js index a0a87263435562..56ba509405033d 100755 --- a/deps/corepack/dist/pnpm.js +++ b/deps/corepack/dist/pnpm.js @@ -1,3 +1,4 @@ #!/usr/bin/env node process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1' +require('module').enableCompileCache?.(); require('./lib/corepack.cjs').runMain(['pnpm', ...process.argv.slice(2)]); \ No newline at end of file diff --git a/deps/corepack/dist/pnpx.js b/deps/corepack/dist/pnpx.js index 57ad4842631cd7..ee36be2e99c686 100755 --- a/deps/corepack/dist/pnpx.js +++ b/deps/corepack/dist/pnpx.js @@ -1,3 +1,4 @@ #!/usr/bin/env node process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1' +require('module').enableCompileCache?.(); require('./lib/corepack.cjs').runMain(['pnpx', ...process.argv.slice(2)]); \ No newline at end of file diff --git a/deps/corepack/dist/yarn.js b/deps/corepack/dist/yarn.js index eaed8596eabaa3..ce628c82b6a782 100755 --- a/deps/corepack/dist/yarn.js +++ b/deps/corepack/dist/yarn.js @@ -1,3 +1,4 @@ #!/usr/bin/env node process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1' +require('module').enableCompileCache?.(); require('./lib/corepack.cjs').runMain(['yarn', ...process.argv.slice(2)]); \ No newline at end of file diff --git a/deps/corepack/dist/yarnpkg.js b/deps/corepack/dist/yarnpkg.js index aada6032fa67ff..9541ed726aaa3b 100755 --- a/deps/corepack/dist/yarnpkg.js +++ b/deps/corepack/dist/yarnpkg.js @@ -1,3 +1,4 @@ #!/usr/bin/env node process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1' +require('module').enableCompileCache?.(); require('./lib/corepack.cjs').runMain(['yarnpkg', ...process.argv.slice(2)]); \ No newline at end of file diff --git a/deps/corepack/package.json b/deps/corepack/package.json index 571c359407e07a..c9c6662e99e6c9 100644 --- a/deps/corepack/package.json +++ b/deps/corepack/package.json @@ -1,6 +1,6 @@ { "name": "corepack", - "version": "0.29.4", + "version": "0.30.0", "homepage": "https://github.com/nodejs/corepack#readme", "bugs": { "url": "https://github.com/nodejs/corepack/issues" diff --git a/deps/ncrypto/unofficial.gni b/deps/ncrypto/unofficial.gni index ea024af73e215b..7cb27d22b9b8e0 100644 --- a/deps/ncrypto/unofficial.gni +++ b/deps/ncrypto/unofficial.gni @@ -27,6 +27,6 @@ template("ncrypto_gn_build") { forward_variables_from(invoker, "*") public_configs = [ ":ncrypto_config" ] sources = gypi_values.ncrypto_sources - deps = [ "../openssl" ] + deps = [ "$node_openssl_path" ] } } diff --git a/deps/ngtcp2/nghttp3/lib/includes/nghttp3/nghttp3.h b/deps/ngtcp2/nghttp3/lib/includes/nghttp3/nghttp3.h index 77eb1fbf263815..6cf947487bf899 100644 --- a/deps/ngtcp2/nghttp3/lib/includes/nghttp3/nghttp3.h +++ b/deps/ngtcp2/nghttp3/lib/includes/nghttp3/nghttp3.h @@ -31,11 +31,11 @@ libcurl) */ #if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32) # define WIN32 -#endif +#endif /* (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32) */ #ifdef __cplusplus extern "C" { -#endif +#endif /* defined(__cplusplus) */ #include #if defined(_MSC_VER) && (_MSC_VER < 1800) @@ -43,9 +43,9 @@ extern "C" { compliant. See compiler macros and version number in https://sourceforge.net/p/predef/wiki/Compilers/ */ # include -#else /* !defined(_MSC_VER) || (_MSC_VER >= 1800) */ +#else /* !(defined(_MSC_VER) && (_MSC_VER < 1800)) */ # include -#endif /* !defined(_MSC_VER) || (_MSC_VER >= 1800) */ +#endif /* !(defined(_MSC_VER) && (_MSC_VER < 1800)) */ #include #include #include @@ -57,22 +57,22 @@ extern "C" { #elif defined(WIN32) # ifdef BUILDING_NGHTTP3 # define NGHTTP3_EXTERN __declspec(dllexport) -# else /* !BUILDING_NGHTTP3 */ +# else /* !defined(BUILDING_NGHTTP3) */ # define NGHTTP3_EXTERN __declspec(dllimport) -# endif /* !BUILDING_NGHTTP3 */ -#else /* !defined(WIN32) */ +# endif /* !defined(BUILDING_NGHTTP3) */ +#else /* !(defined(NGHTTP3_STATICLIB) || defined(WIN32)) */ # ifdef BUILDING_NGHTTP3 # define NGHTTP3_EXTERN __attribute__((visibility("default"))) -# else /* !BUILDING_NGHTTP3 */ +# else /* !defined(BUILDING_NGHTTP3) */ # define NGHTTP3_EXTERN -# endif /* !BUILDING_NGHTTP3 */ -#endif /* !defined(WIN32) */ +# endif /* !defined(BUILDING_NGHTTP3) */ +#endif /* !(defined(NGHTTP3_STATICLIB) || defined(WIN32)) */ #ifdef _MSC_VER # define NGHTTP3_ALIGN(N) __declspec(align(N)) -#else /* !_MSC_VER */ +#else /* !defined(_MSC_VER) */ # define NGHTTP3_ALIGN(N) __attribute__((aligned(N))) -#endif /* !_MSC_VER */ +#endif /* !defined(_MSC_VER) */ /** * @typedef @@ -624,7 +624,7 @@ typedef struct nghttp3_buf { */ uint8_t *end; /** - * :member:`pos` pointers to the start of data. Typically, this + * :member:`pos` points to the start of data. Typically, this * points to the address that next data should be read. Initially, * it points to :member:`begin`. */ @@ -1161,8 +1161,8 @@ NGHTTP3_EXTERN void nghttp3_qpack_encoder_del(nghttp3_qpack_encoder *encoder); * anymore. */ NGHTTP3_EXTERN int nghttp3_qpack_encoder_encode( - nghttp3_qpack_encoder *encoder, nghttp3_buf *pbuf, nghttp3_buf *rbuf, - nghttp3_buf *ebuf, int64_t stream_id, const nghttp3_nv *nva, size_t nvlen); + nghttp3_qpack_encoder *encoder, nghttp3_buf *pbuf, nghttp3_buf *rbuf, + nghttp3_buf *ebuf, int64_t stream_id, const nghttp3_nv *nva, size_t nvlen); /** * @function @@ -1182,7 +1182,7 @@ NGHTTP3_EXTERN int nghttp3_qpack_encoder_encode( * |encoder| is unable to process input because it is malformed. */ NGHTTP3_EXTERN nghttp3_ssize nghttp3_qpack_encoder_read_decoder( - nghttp3_qpack_encoder *encoder, const uint8_t *src, size_t srclen); + nghttp3_qpack_encoder *encoder, const uint8_t *src, size_t srclen); /** * @function @@ -1343,7 +1343,7 @@ NGHTTP3_EXTERN void nghttp3_qpack_decoder_del(nghttp3_qpack_decoder *decoder); * Could not interpret encoder stream instruction. */ NGHTTP3_EXTERN nghttp3_ssize nghttp3_qpack_decoder_read_encoder( - nghttp3_qpack_decoder *decoder, const uint8_t *src, size_t srclen); + nghttp3_qpack_decoder *decoder, const uint8_t *src, size_t srclen); /** * @function @@ -1436,9 +1436,9 @@ nghttp3_qpack_decoder_get_icnt(const nghttp3_qpack_decoder *decoder); * HTTP field is too large. */ NGHTTP3_EXTERN nghttp3_ssize nghttp3_qpack_decoder_read_request( - nghttp3_qpack_decoder *decoder, nghttp3_qpack_stream_context *sctx, - nghttp3_qpack_nv *nv, uint8_t *pflags, const uint8_t *src, size_t srclen, - int fin); + nghttp3_qpack_decoder *decoder, nghttp3_qpack_stream_context *sctx, + nghttp3_qpack_nv *nv, uint8_t *pflags, const uint8_t *src, size_t srclen, + int fin); /** * @function @@ -1568,7 +1568,7 @@ typedef void (*nghttp3_debug_vprintf_callback)(const char *format, * times because this is important. */ NGHTTP3_EXTERN void nghttp3_set_debug_vprintf_callback( - nghttp3_debug_vprintf_callback debug_vprintf_callback); + nghttp3_debug_vprintf_callback debug_vprintf_callback); /** * @macrosection @@ -2118,9 +2118,10 @@ NGHTTP3_EXTERN int nghttp3_conn_bind_qpack_streams(nghttp3_conn *conn, * :macro:`NGHTTP3_ERR_CALLBACK_FAILURE` * User callback failed. * - * It may return the other error codes. In general, the negative - * error code means that |conn| encountered a connection error, and - * the connection should be closed. + * It may return the other error codes. The negative error code means + * that |conn| encountered a connection error, and the connection must + * be closed. Calling nghttp3 API other than `nghttp3_conn_del` + * causes undefined behavior. */ NGHTTP3_EXTERN nghttp3_ssize nghttp3_conn_read_stream(nghttp3_conn *conn, int64_t stream_id, @@ -2152,9 +2153,10 @@ NGHTTP3_EXTERN nghttp3_ssize nghttp3_conn_read_stream(nghttp3_conn *conn, * :macro:`NGHTTP3_ERR_CALLBACK_FAILURE` * User callback failed. * - * It may return the other error codes. In general, the negative - * error code means that |conn| encountered a connection error, and - * the connection should be closed. + * It may return the other error codes. The negative error code means + * that |conn| encountered a connection error, and the connection must + * be closed. Calling nghttp3 API other than `nghttp3_conn_del` + * causes undefined behavior. */ NGHTTP3_EXTERN nghttp3_ssize nghttp3_conn_writev_stream(nghttp3_conn *conn, int64_t *pstream_id, @@ -2198,6 +2200,9 @@ NGHTTP3_EXTERN int nghttp3_conn_add_write_offset(nghttp3_conn *conn, * If a stream denoted by |stream_id| is not found, this function * returns 0. * + * Alternatively, `nghttp3_conn_update_ack_offset` can be used to + * accomplish the same thing. + * * This function returns 0 if it succeeds, or one of the following * negative error codes: * @@ -2207,6 +2212,31 @@ NGHTTP3_EXTERN int nghttp3_conn_add_write_offset(nghttp3_conn *conn, NGHTTP3_EXTERN int nghttp3_conn_add_ack_offset(nghttp3_conn *conn, int64_t stream_id, uint64_t n); +/** + * @function + * + * `nghttp3_conn_update_ack_offset` tells |conn| that QUIC stack has + * acknowledged the stream data up to |offset| for a stream denoted by + * |stream_id|. + * + * If a stream denoted by |stream_id| is not found, this function + * returns 0. + * + * Alternatively, `nghttp3_conn_add_ack_offset` can be used to + * accomplish the same thing. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGHTTP3_ERR_INVALID_ARGUMENT` + * |offset| is less than the number of bytes acknowledged so far. + * :macro:`NGHTTP3_ERR_CALLBACK_FAILURE` + * User callback failed. + */ +NGHTTP3_EXTERN int nghttp3_conn_update_ack_offset(nghttp3_conn *conn, + int64_t stream_id, + uint64_t offset); + /** * @function * @@ -2314,9 +2344,9 @@ NGHTTP3_EXTERN int nghttp3_conn_resume_stream(nghttp3_conn *conn, /** * @function * - * `nghttp3_conn_close_stream` closes stream identified by - * |stream_id|. QUIC application error code |app_error_code| is the - * reason of the closure. + * `nghttp3_conn_close_stream` tells the library that a stream + * identified by |stream_id| has been closed. QUIC application error + * code |app_error_code| is the reason of the closure. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -2419,8 +2449,8 @@ nghttp3_conn_set_max_concurrent_streams(nghttp3_conn *conn, * stream. */ typedef nghttp3_ssize (*nghttp3_read_data_callback)( - nghttp3_conn *conn, int64_t stream_id, nghttp3_vec *vec, size_t veccnt, - uint32_t *pflags, void *conn_user_data, void *stream_user_data); + nghttp3_conn *conn, int64_t stream_id, nghttp3_vec *vec, size_t veccnt, + uint32_t *pflags, void *conn_user_data, void *stream_user_data); /** * @struct @@ -2460,8 +2490,8 @@ typedef struct nghttp3_data_reader { * Out of memory. */ NGHTTP3_EXTERN int nghttp3_conn_submit_request( - nghttp3_conn *conn, int64_t stream_id, const nghttp3_nv *nva, size_t nvlen, - const nghttp3_data_reader *dr, void *stream_user_data); + nghttp3_conn *conn, int64_t stream_id, const nghttp3_nv *nva, size_t nvlen, + const nghttp3_data_reader *dr, void *stream_user_data); /** * @function @@ -2667,7 +2697,7 @@ typedef struct NGHTTP3_ALIGN(8) nghttp3_pri { * Stream not found. */ NGHTTP3_EXTERN int nghttp3_conn_get_stream_priority_versioned( - nghttp3_conn *conn, int pri_version, nghttp3_pri *dest, int64_t stream_id); + nghttp3_conn *conn, int pri_version, nghttp3_pri *dest, int64_t stream_id); /** * @function @@ -2720,8 +2750,8 @@ NGHTTP3_EXTERN int nghttp3_conn_set_client_stream_priority(nghttp3_conn *conn, * Out of memory. */ NGHTTP3_EXTERN int nghttp3_conn_set_server_stream_priority_versioned( - nghttp3_conn *conn, int64_t stream_id, int pri_version, - const nghttp3_pri *pri); + nghttp3_conn *conn, int64_t stream_id, int pri_version, + const nghttp3_pri *pri); /** * @function @@ -2884,7 +2914,7 @@ NGHTTP3_EXTERN int nghttp3_err_is_fatal(int liberr); */ #define nghttp3_conn_set_server_stream_priority(CONN, STREAM_ID, PRI) \ nghttp3_conn_set_server_stream_priority_versioned( \ - (CONN), (STREAM_ID), NGHTTP3_PRI_VERSION, (PRI)) + (CONN), (STREAM_ID), NGHTTP3_PRI_VERSION, (PRI)) /* * `nghttp3_conn_get_stream_priority` is a wrapper around @@ -2906,6 +2936,6 @@ NGHTTP3_EXTERN int nghttp3_err_is_fatal(int liberr); #ifdef __cplusplus } -#endif +#endif /* defined(__cplusplus) */ -#endif /* NGHTTP3_H */ +#endif /* !defined(NGHTTP3_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/includes/nghttp3/version.h b/deps/ngtcp2/nghttp3/lib/includes/nghttp3/version.h index bc57eb2cfcf2d6..7f6cb8acffe672 100644 --- a/deps/ngtcp2/nghttp3/lib/includes/nghttp3/version.h +++ b/deps/ngtcp2/nghttp3/lib/includes/nghttp3/version.h @@ -31,7 +31,7 @@ * * Version number of the nghttp3 library release. */ -#define NGHTTP3_VERSION "0.7.0" +#define NGHTTP3_VERSION "1.6.0" /** * @macro @@ -41,6 +41,6 @@ * number, 8 bits for minor and 8 bits for patch. Version 1.2.3 * becomes 0x010203. */ -#define NGHTTP3_VERSION_NUM 0x000700 +#define NGHTTP3_VERSION_NUM 0x010600 -#endif /* NGHTTP3_VERSION_H */ +#endif /* !defined(NGHTTP3_VERSION_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_balloc.c b/deps/ngtcp2/nghttp3/lib/nghttp3_balloc.c index e134d0f4dceb75..544e4fb1306a5f 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_balloc.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_balloc.c @@ -66,8 +66,8 @@ int nghttp3_balloc_get(nghttp3_balloc *balloc, void **pbuf, size_t n) { assert(n <= balloc->blklen); if (nghttp3_buf_left(&balloc->buf) < n) { - p = nghttp3_mem_malloc(balloc->mem, sizeof(nghttp3_memblock_hd) + 0x10u + - balloc->blklen); + p = nghttp3_mem_malloc(balloc->mem, + sizeof(nghttp3_memblock_hd) + 0x8u + balloc->blklen); if (p == NULL) { return NGHTTP3_ERR_NOMEM; } @@ -76,10 +76,10 @@ int nghttp3_balloc_get(nghttp3_balloc *balloc, void **pbuf, size_t n) { hd->next = balloc->head; balloc->head = hd; nghttp3_buf_wrap_init( - &balloc->buf, - (uint8_t *)(((uintptr_t)p + sizeof(nghttp3_memblock_hd) + 0xfu) & - ~(uintptr_t)0xfu), - balloc->blklen); + &balloc->buf, + (uint8_t *)(((uintptr_t)p + sizeof(nghttp3_memblock_hd) + 0xfu) & + ~(uintptr_t)0xfu), + balloc->blklen); } assert(((uintptr_t)balloc->buf.last & 0xfu) == 0); diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_balloc.h b/deps/ngtcp2/nghttp3/lib/nghttp3_balloc.h index e02f61d16b5763..c95f0426a924bf 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_balloc.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_balloc.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -40,7 +40,10 @@ typedef struct nghttp3_memblock_hd nghttp3_memblock_hd; * nghttp3_memblock_hd is the header of memory block. */ struct nghttp3_memblock_hd { - nghttp3_memblock_hd *next; + union { + nghttp3_memblock_hd *next; + uint64_t pad; + }; }; /* @@ -89,4 +92,4 @@ int nghttp3_balloc_get(nghttp3_balloc *balloc, void **pbuf, size_t n); */ void nghttp3_balloc_clear(nghttp3_balloc *balloc); -#endif /* NGHTTP3_BALLOC_H */ +#endif /* !defined(NGHTTP3_BALLOC_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_buf.h b/deps/ngtcp2/nghttp3/lib/nghttp3_buf.h index 472a4b7b14a80e..9fa067de91b949 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_buf.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_buf.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -71,4 +71,4 @@ void nghttp3_typed_buf_init(nghttp3_typed_buf *tbuf, const nghttp3_buf *buf, void nghttp3_typed_buf_free(nghttp3_typed_buf *tbuf); -#endif /* NGHTTP3_BUF_H */ +#endif /* !defined(NGHTTP3_BUF_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_conn.c b/deps/ngtcp2/nghttp3/lib/nghttp3_conn.c index 25aaf685734cb1..f70b4f5472de64 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_conn.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_conn.c @@ -210,9 +210,9 @@ static int conn_call_recv_settings(nghttp3_conn *conn) { static int ricnt_less(const nghttp3_pq_entry *lhsx, const nghttp3_pq_entry *rhsx) { nghttp3_stream *lhs = - nghttp3_struct_of(lhsx, nghttp3_stream, qpack_blocked_pe); + nghttp3_struct_of(lhsx, nghttp3_stream, qpack_blocked_pe); nghttp3_stream *rhs = - nghttp3_struct_of(rhsx, nghttp3_stream, qpack_blocked_pe); + nghttp3_struct_of(rhsx, nghttp3_stream, qpack_blocked_pe); return lhs->qpack_sctx.ricnt < rhs->qpack_sctx.ricnt; } @@ -250,19 +250,19 @@ static int conn_new(nghttp3_conn **pconn, int server, int callbacks_version, nghttp3_objalloc_init(&conn->out_chunk_objalloc, NGHTTP3_STREAM_MIN_CHUNK_SIZE * 16, mem); - nghttp3_objalloc_stream_init(&conn->stream_objalloc, 64, mem); + nghttp3_objalloc_stream_init(&conn->stream_objalloc, 8, mem); nghttp3_map_init(&conn->streams, mem); - rv = nghttp3_qpack_decoder_init(&conn->qdec, - settings->qpack_max_dtable_capacity, - settings->qpack_blocked_streams, mem); + rv = + nghttp3_qpack_decoder_init(&conn->qdec, settings->qpack_max_dtable_capacity, + settings->qpack_blocked_streams, mem); if (rv != 0) { goto qdec_init_fail; } rv = nghttp3_qpack_encoder_init( - &conn->qenc, settings->qpack_encoder_max_dtable_capacity, mem); + &conn->qenc, settings->qpack_encoder_max_dtable_capacity, mem); if (rv != 0) { goto qenc_init_fail; } @@ -273,7 +273,7 @@ static int conn_new(nghttp3_conn **pconn, int server, int callbacks_version, nghttp3_pq_init(&conn->sched[i].spq, cycle_less, mem); } - nghttp3_idtr_init(&conn->remote.bidi.idtr, server, mem); + nghttp3_idtr_init(&conn->remote.bidi.idtr, mem); conn->callbacks = *callbacks; conn->local.settings = *settings; @@ -368,7 +368,7 @@ void nghttp3_conn_del(nghttp3_conn *conn) { nghttp3_qpack_encoder_free(&conn->qenc); nghttp3_qpack_decoder_free(&conn->qdec); - nghttp3_map_each_free(&conn->streams, free_stream, NULL); + nghttp3_map_each(&conn->streams, free_stream, NULL); nghttp3_map_free(&conn->streams); nghttp3_objalloc_free(&conn->stream_objalloc); @@ -419,7 +419,7 @@ nghttp3_ssize nghttp3_conn_read_stream(nghttp3_conn *conn, int64_t stream_id, } conn->rx.max_stream_id_bidi = - nghttp3_max(conn->rx.max_stream_id_bidi, stream_id); + nghttp3_max_int64(conn->rx.max_stream_id_bidi, stream_id); rv = nghttp3_conn_create_stream(conn, &stream, stream_id); if (rv != 0) { return rv; @@ -498,7 +498,7 @@ static nghttp3_ssize conn_read_type(nghttp3_conn *conn, nghttp3_stream *stream, assert(srclen); - nread = nghttp3_read_varint(rvint, src, srclen, fin); + nread = nghttp3_read_varint(rvint, src, src + srclen, fin); if (nread < 0) { return NGHTTP3_ERR_H3_GENERAL_PROTOCOL_ERROR; } @@ -650,7 +650,7 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, switch (rstate->state) { case NGHTTP3_CTRL_STREAM_STATE_FRAME_TYPE: assert(end - p > 0); - nread = nghttp3_read_varint(rvint, p, (size_t)(end - p), /* fin = */ 0); + nread = nghttp3_read_varint(rvint, p, end, /* fin = */ 0); if (nread < 0) { return NGHTTP3_ERR_H3_GENERAL_PROTOCOL_ERROR; } @@ -670,7 +670,7 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, /* Fall through */ case NGHTTP3_CTRL_STREAM_STATE_FRAME_LENGTH: assert(end - p > 0); - nread = nghttp3_read_varint(rvint, p, (size_t)(end - p), /* fin = */ 0); + nread = nghttp3_read_varint(rvint, p, end, /* fin = */ 0); if (nread < 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } @@ -767,9 +767,9 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, } /* Read Identifier */ - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); assert(len > 0); - nread = nghttp3_read_varint(rvint, p, len, frame_fin(rstate, len)); + nread = nghttp3_read_varint(rvint, p, p + len, frame_fin(rstate, len)); if (nread < 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } @@ -795,7 +795,7 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, break; } - nread = nghttp3_read_varint(rvint, p, len, frame_fin(rstate, len)); + nread = nghttp3_read_varint(rvint, p, p + len, frame_fin(rstate, len)); if (nread < 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } @@ -811,16 +811,16 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, nghttp3_varint_read_state_reset(rvint); rv = - nghttp3_conn_on_settings_entry_received(conn, &rstate->fr.settings); + nghttp3_conn_on_settings_entry_received(conn, &rstate->fr.settings); if (rv != 0) { return rv; } } break; case NGHTTP3_CTRL_STREAM_STATE_SETTINGS_ID: - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); assert(len > 0); - nread = nghttp3_read_varint(rvint, p, len, frame_fin(rstate, len)); + nread = nghttp3_read_varint(rvint, p, p + len, frame_fin(rstate, len)); if (nread < 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } @@ -845,9 +845,9 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, } /* Fall through */ case NGHTTP3_CTRL_STREAM_STATE_SETTINGS_VALUE: - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); assert(len > 0); - nread = nghttp3_read_varint(rvint, p, len, frame_fin(rstate, len)); + nread = nghttp3_read_varint(rvint, p, p + len, frame_fin(rstate, len)); if (nread < 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } @@ -879,9 +879,9 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, nghttp3_stream_read_state_reset(rstate); break; case NGHTTP3_CTRL_STREAM_STATE_GOAWAY: - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); assert(len > 0); - nread = nghttp3_read_varint(rvint, p, len, frame_fin(rstate, len)); + nread = nghttp3_read_varint(rvint, p, p + len, frame_fin(rstate, len)); if (nread < 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } @@ -906,7 +906,7 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, if (conn->callbacks.shutdown) { rv = - conn->callbacks.shutdown(conn, conn->rx.goaway_id, conn->user_data); + conn->callbacks.shutdown(conn, conn->rx.goaway_id, conn->user_data); if (rv != 0) { return NGHTTP3_ERR_CALLBACK_FAILURE; } @@ -916,9 +916,9 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, break; case NGHTTP3_CTRL_STREAM_STATE_MAX_PUSH_ID: /* server side only */ - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); assert(len > 0); - nread = nghttp3_read_varint(rvint, p, len, frame_fin(rstate, len)); + nread = nghttp3_read_varint(rvint, p, p + len, frame_fin(rstate, len)); if (nread < 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } @@ -941,9 +941,9 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, break; case NGHTTP3_CTRL_STREAM_STATE_PRIORITY_UPDATE_PRI_ELEM_ID: /* server side only */ - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); assert(len > 0); - nread = nghttp3_read_varint(rvint, p, len, frame_fin(rstate, len)); + nread = nghttp3_read_varint(rvint, p, p + len, frame_fin(rstate, len)); if (nread < 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } @@ -977,7 +977,7 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, case NGHTTP3_CTRL_STREAM_STATE_PRIORITY_UPDATE: /* We need to buffer Priority Field Value because it might be fragmented. */ - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); assert(len > 0); if (conn->rx.pri_fieldbuflen == 0 && rstate->left == (int64_t)len) { /* Everything is in the input buffer. Apply same length @@ -1032,7 +1032,7 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, nghttp3_stream_read_state_reset(rstate); break; case NGHTTP3_CTRL_STREAM_STATE_IGN_FRAME: - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); p += len; nconsumed += len; rstate->left -= (int64_t)len; @@ -1076,7 +1076,7 @@ static int conn_delete_stream(nghttp3_conn *conn, nghttp3_stream *stream) { } rv = - nghttp3_map_remove(&conn->streams, (nghttp3_map_key_type)stream->node.id); + nghttp3_map_remove(&conn->streams, (nghttp3_map_key_type)stream->node.id); assert(0 == rv); @@ -1104,8 +1104,8 @@ static int conn_process_blocked_stream_data(nghttp3_conn *conn, buf = nghttp3_ringbuf_get(&stream->inq, 0); nconsumed = nghttp3_conn_read_bidi( - conn, &nproc, stream, buf->pos, nghttp3_buf_len(buf), - len == 1 && (stream->flags & NGHTTP3_STREAM_FLAG_READ_EOF)); + conn, &nproc, stream, buf->pos, nghttp3_buf_len(buf), + len == 1 && (stream->flags & NGHTTP3_STREAM_FLAG_READ_EOF)); if (nconsumed < 0) { return (int)nconsumed; } @@ -1144,7 +1144,7 @@ nghttp3_ssize nghttp3_conn_read_qpack_encoder(nghttp3_conn *conn, const uint8_t *src, size_t srclen) { nghttp3_ssize nconsumed = - nghttp3_qpack_decoder_read_encoder(&conn->qdec, src, srclen); + nghttp3_qpack_decoder_read_encoder(&conn->qdec, src, srclen); nghttp3_stream *stream; int rv; @@ -1240,7 +1240,7 @@ nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, switch (rstate->state) { case NGHTTP3_REQ_STREAM_STATE_FRAME_TYPE: assert(end - p > 0); - nread = nghttp3_read_varint(rvint, p, (size_t)(end - p), fin); + nread = nghttp3_read_varint(rvint, p, end, fin); if (nread < 0) { return NGHTTP3_ERR_H3_GENERAL_PROTOCOL_ERROR; } @@ -1260,7 +1260,7 @@ nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, /* Fall through */ case NGHTTP3_REQ_STREAM_STATE_FRAME_LENGTH: assert(end - p > 0); - nread = nghttp3_read_varint(rvint, p, (size_t)(end - p), fin); + nread = nghttp3_read_varint(rvint, p, end, fin); if (nread < 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } @@ -1277,14 +1277,14 @@ nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, switch (rstate->fr.hd.type) { case NGHTTP3_FRAME_DATA: rv = nghttp3_stream_transit_rx_http_state( - stream, NGHTTP3_HTTP_EVENT_DATA_BEGIN); + stream, NGHTTP3_HTTP_EVENT_DATA_BEGIN); if (rv != 0) { return rv; } /* DATA frame might be empty. */ if (rstate->left == 0) { rv = nghttp3_stream_transit_rx_http_state( - stream, NGHTTP3_HTTP_EVENT_DATA_END); + stream, NGHTTP3_HTTP_EVENT_DATA_END); assert(0 == rv); nghttp3_stream_read_state_reset(rstate); @@ -1294,7 +1294,7 @@ nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, break; case NGHTTP3_FRAME_HEADERS: rv = nghttp3_stream_transit_rx_http_state( - stream, NGHTTP3_HTTP_EVENT_HEADERS_BEGIN); + stream, NGHTTP3_HTTP_EVENT_HEADERS_BEGIN); if (rv != 0) { return rv; } @@ -1305,7 +1305,7 @@ nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, } rv = nghttp3_stream_transit_rx_http_state( - stream, NGHTTP3_HTTP_EVENT_HEADERS_END); + stream, NGHTTP3_HTTP_EVENT_HEADERS_END); assert(0 == rv); nghttp3_stream_read_state_reset(rstate); @@ -1351,7 +1351,7 @@ nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, } break; case NGHTTP3_REQ_STREAM_STATE_DATA: - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); rv = nghttp3_conn_on_data(conn, stream, p, len); if (rv != 0) { return rv; @@ -1370,7 +1370,7 @@ nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, nghttp3_stream_read_state_reset(rstate); break; case NGHTTP3_REQ_STREAM_STATE_HEADERS: - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); nread = nghttp3_conn_on_headers(conn, stream, p, len, (int64_t)len == rstate->left); if (nread < 0) { @@ -1478,7 +1478,7 @@ nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, break; case NGHTTP3_REQ_STREAM_STATE_IGN_FRAME: - len = (size_t)nghttp3_min(rstate->left, (int64_t)(end - p)); + len = (size_t)nghttp3_min_int64(rstate->left, (int64_t)(end - p)); p += len; nconsumed += len; rstate->left -= (int64_t)len; @@ -1590,9 +1590,9 @@ static nghttp3_ssize conn_decode_headers(nghttp3_conn *conn, buf.last = buf.end; for (;;) { - nread = nghttp3_qpack_decoder_read_request(qdec, &stream->qpack_sctx, &nv, - &flags, buf.pos, - nghttp3_buf_len(&buf), fin); + nread = + nghttp3_qpack_decoder_read_request(qdec, &stream->qpack_sctx, &nv, &flags, + buf.pos, nghttp3_buf_len(&buf), fin); if (nread < 0) { return (int)nread; @@ -1625,8 +1625,8 @@ static nghttp3_ssize conn_decode_headers(nghttp3_conn *conn, if (flags & NGHTTP3_QPACK_DECODE_FLAG_EMIT) { rv = nghttp3_http_on_header( - http, &nv, request, trailers, - conn->server && conn->local.settings.enable_connect_protocol); + http, &nv, request, trailers, + conn->server && conn->local.settings.enable_connect_protocol); switch (rv) { case NGHTTP3_ERR_MALFORMED_HTTP_HEADER: break; @@ -1705,7 +1705,7 @@ int nghttp3_conn_on_settings_entry_received(nghttp3_conn *conn, dest->qpack_blocked_streams = (size_t)ent->value; nghttp3_qpack_encoder_set_max_blocked_streams( - &conn->qenc, (size_t)nghttp3_min(100, ent->value)); + &conn->qenc, (size_t)nghttp3_min_uint64(100, ent->value)); break; case NGHTTP3_SETTINGS_ID_ENABLE_CONNECT_PROTOCOL: if (!conn->server) { @@ -1784,7 +1784,7 @@ conn_on_priority_update_stream(nghttp3_conn *conn, } conn->rx.max_stream_id_bidi = - nghttp3_max(conn->rx.max_stream_id_bidi, stream_id); + nghttp3_max_int64(conn->rx.max_stream_id_bidi, stream_id); rv = nghttp3_conn_create_stream(conn, &stream, stream_id); if (rv != 0) { return rv; @@ -1836,7 +1836,7 @@ int nghttp3_conn_create_stream(nghttp3_conn *conn, nghttp3_stream **pstream, nghttp3_stream *stream; int rv; nghttp3_stream_callbacks callbacks = { - conn_stream_acked_data, + conn_stream_acked_data, }; rv = nghttp3_stream_new(&stream, stream_id, &callbacks, @@ -1995,7 +1995,7 @@ nghttp3_ssize nghttp3_conn_writev_stream(nghttp3_conn *conn, if (conn->tx.ctrl && !nghttp3_stream_is_blocked(conn->tx.ctrl)) { ncnt = - conn_writev_stream(conn, pstream_id, pfin, vec, veccnt, conn->tx.ctrl); + conn_writev_stream(conn, pstream_id, pfin, vec, veccnt, conn->tx.ctrl); if (ncnt) { return ncnt; } @@ -2008,7 +2008,7 @@ nghttp3_ssize nghttp3_conn_writev_stream(nghttp3_conn *conn, } ncnt = - conn_writev_stream(conn, pstream_id, pfin, vec, veccnt, conn->tx.qdec); + conn_writev_stream(conn, pstream_id, pfin, vec, veccnt, conn->tx.qdec); if (ncnt) { return ncnt; } @@ -2016,7 +2016,7 @@ nghttp3_ssize nghttp3_conn_writev_stream(nghttp3_conn *conn, if (conn->tx.qenc && !nghttp3_stream_is_blocked(conn->tx.qenc)) { ncnt = - conn_writev_stream(conn, pstream_id, pfin, vec, veccnt, conn->tx.qenc); + conn_writev_stream(conn, pstream_id, pfin, vec, veccnt, conn->tx.qenc); if (ncnt) { return ncnt; } @@ -2095,7 +2095,22 @@ int nghttp3_conn_add_ack_offset(nghttp3_conn *conn, int64_t stream_id, return 0; } - return nghttp3_stream_add_ack_offset(stream, n); + return nghttp3_stream_update_ack_offset(stream, stream->ack_offset + n); +} + +int nghttp3_conn_update_ack_offset(nghttp3_conn *conn, int64_t stream_id, + uint64_t offset) { + nghttp3_stream *stream = nghttp3_conn_find_stream(conn, stream_id); + + if (stream == NULL) { + return 0; + } + + if (stream->ack_offset > offset) { + return NGHTTP3_ERR_INVALID_ARGUMENT; + } + + return nghttp3_stream_update_ack_offset(stream, offset); } static int conn_submit_headers_data(nghttp3_conn *conn, nghttp3_stream *stream, @@ -2304,7 +2319,7 @@ int nghttp3_conn_shutdown(nghttp3_conn *conn) { frent.fr.hd.type = NGHTTP3_FRAME_GOAWAY; if (conn->server) { frent.fr.goaway.id = - nghttp3_min((1ll << 62) - 4, conn->rx.max_stream_id_bidi + 4); + nghttp3_min_int64((1ll << 62) - 4, conn->rx.max_stream_id_bidi + 4); } else { frent.fr.goaway.id = 0; } @@ -2318,7 +2333,7 @@ int nghttp3_conn_shutdown(nghttp3_conn *conn) { conn->tx.goaway_id = frent.fr.goaway.id; conn->flags |= - NGHTTP3_CONN_FLAG_GOAWAY_QUEUED | NGHTTP3_CONN_FLAG_SHUTDOWN_COMMENCED; + NGHTTP3_CONN_FLAG_GOAWAY_QUEUED | NGHTTP3_CONN_FLAG_SHUTDOWN_COMMENCED; return 0; } @@ -2619,5 +2634,5 @@ void nghttp3_settings_default_versioned(int settings_version, memset(settings, 0, sizeof(nghttp3_settings)); settings->max_field_section_size = NGHTTP3_VARINT_MAX; settings->qpack_encoder_max_dtable_capacity = - NGHTTP3_QPACK_ENCODER_MAX_DTABLE_CAPACITY; + NGHTTP3_QPACK_ENCODER_MAX_DTABLE_CAPACITY; } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_conn.h b/deps/ngtcp2/nghttp3/lib/nghttp3_conn.h index 74f47583ce825c..1218ba508ba46a 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_conn.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_conn.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -204,4 +204,4 @@ int nghttp3_conn_reject_stream(nghttp3_conn *conn, nghttp3_stream *stream); */ nghttp3_stream *nghttp3_conn_get_next_tx_stream(nghttp3_conn *conn); -#endif /* NGHTTP3_CONN_H */ +#endif /* !defined(NGHTTP3_CONN_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_conv.c b/deps/ngtcp2/nghttp3/lib/nghttp3_conv.c index edd0adc4d0ff0a..6439a6d782960c 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_conv.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_conv.c @@ -31,34 +31,39 @@ #include "nghttp3_str.h" #include "nghttp3_unreachable.h" -int64_t nghttp3_get_varint(size_t *plen, const uint8_t *p) { +const uint8_t *nghttp3_get_varint(int64_t *dest, const uint8_t *p) { union { - char b[8]; + uint8_t n8; uint16_t n16; uint32_t n32; uint64_t n64; } n; - *plen = (size_t)(1u << (*p >> 6)); - - switch (*plen) { + switch (*p >> 6) { + case 0: + *dest = *p++; + return p; case 1: - return (int64_t)*p; - case 2: memcpy(&n, p, 2); - n.b[0] &= 0x3f; - return (int64_t)ntohs(n.n16); - case 4: + n.n8 &= 0x3f; + *dest = ntohs(n.n16); + + return p + 2; + case 2: memcpy(&n, p, 4); - n.b[0] &= 0x3f; - return (int64_t)ntohl(n.n32); - case 8: + n.n8 &= 0x3f; + *dest = ntohl(n.n32); + + return p + 4; + case 3: memcpy(&n, p, 8); - n.b[0] &= 0x3f; - return (int64_t)nghttp3_ntohl64(n.n64); - } + n.n8 &= 0x3f; + *dest = (int64_t)nghttp3_ntohl64(n.n64); - nghttp3_unreachable(); + return p + 8; + default: + nghttp3_unreachable(); + } } int64_t nghttp3_get_varint_fb(const uint8_t *p) { return *p & 0x3f; } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_conv.h b/deps/ngtcp2/nghttp3/lib/nghttp3_conv.h index 5522bc735bfd37..40f5d4de782883 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_conv.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_conv.h @@ -28,69 +28,67 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #ifdef HAVE_ARPA_INET_H # include -#endif /* HAVE_ARPA_INET_H */ +#endif /* defined(HAVE_ARPA_INET_H) */ #ifdef HAVE_NETINET_IN_H # include -#endif /* HAVE_NETINET_IN_H */ +#endif /* defined(HAVE_NETINET_IN_H) */ #ifdef HAVE_BYTESWAP_H # include -#endif /* HAVE_BYTESWAP_H */ +#endif /* defined(HAVE_BYTESWAP_H) */ #ifdef HAVE_ENDIAN_H # include -#endif /* HAVE_ENDIAN_H */ +#endif /* defined(HAVE_ENDIAN_H) */ #ifdef HAVE_SYS_ENDIAN_H # include -#endif /* HAVE_SYS_ENDIAN_H */ +#endif /* defined(HAVE_SYS_ENDIAN_H) */ -#if defined(__APPLE__) +#ifdef __APPLE__ # include -#endif // __APPLE__ +#endif /* defined(__APPLE__) */ #include -#if defined(HAVE_BE64TOH) || \ - (defined(HAVE_DECL_BE64TOH) && HAVE_DECL_BE64TOH > 0) +#if HAVE_DECL_BE64TOH # define nghttp3_ntohl64(N) be64toh(N) # define nghttp3_htonl64(N) htobe64(N) -#else /* !HAVE_BE64TOH */ -# if defined(WORDS_BIGENDIAN) +#else /* !HAVE_DECL_BE64TOH */ +# ifdef WORDS_BIGENDIAN # define nghttp3_ntohl64(N) (N) # define nghttp3_htonl64(N) (N) -# else /* !WORDS_BIGENDIAN */ -# if defined(HAVE_BSWAP_64) || \ - (defined(HAVE_DECL_BSWAP_64) && HAVE_DECL_BSWAP_64 > 0) +# else /* !defined(WORDS_BIGENDIAN) */ +# if HAVE_DECL_BSWAP_64 # define nghttp3_bswap64 bswap_64 # elif defined(WIN32) # define nghttp3_bswap64 _byteswap_uint64 # elif defined(__APPLE__) # define nghttp3_bswap64 OSSwapInt64 -# else /* !HAVE_BSWAP_64 && !WIN32 && !__APPLE__ */ +# else /* !(HAVE_DECL_BSWAP_64 || defined(WIN32) || defined(__APPLE__)) */ # define nghttp3_bswap64(N) \ ((uint64_t)(ntohl((uint32_t)(N))) << 32 | ntohl((uint32_t)((N) >> 32))) -# endif /* !HAVE_BSWAP_64 && !WIN32 && !__APPLE__ */ +# endif /* !(HAVE_DECL_BSWAP_64 || defined(WIN32) || defined(__APPLE__)) */ # define nghttp3_ntohl64(N) nghttp3_bswap64(N) # define nghttp3_htonl64(N) nghttp3_bswap64(N) -# endif /* !WORDS_BIGENDIAN */ -#endif /* !HAVE_BE64TOH */ +# endif /* !defined(WORDS_BIGENDIAN) */ +#endif /* !HAVE_DECL_BE64TOH */ -#if defined(WIN32) +#ifdef WIN32 /* Windows requires ws2_32 library for ntonl family of functions. We define inline functions for those functions so that we don't have dependency on that lib. */ # ifdef _MSC_VER # define STIN static __inline -# else +# else /* !defined(_MSC_VER) */ # define STIN static inline -# endif +# endif /* !defined(_MSC_VER) */ STIN uint32_t htonl(uint32_t hostlong) { uint32_t res; @@ -128,14 +126,14 @@ STIN uint16_t ntohs(uint16_t netshort) { return res; } -#endif /* WIN32 */ +#endif /* defined(WIN32) */ /* - * nghttp3_get_varint reads variable-length integer from |p|, and - * returns it in host byte order. The number of bytes read is stored - * in |*plen|. + * nghttp3_get_varint reads variable-length unsigned integer from |p|, + * and stores it in the buffer pointed by |dest| in host byte order. + * It returns |p| plus the number of bytes read from |p|. */ -int64_t nghttp3_get_varint(size_t *plen, const uint8_t *p); +const uint8_t *nghttp3_get_varint(int64_t *dest, const uint8_t *p); /* * nghttp3_get_varint_fb reads first byte of encoded variable-length @@ -193,4 +191,4 @@ uint64_t nghttp3_ord_stream_id(int64_t stream_id); */ #define NGHTTP3_PRI_INC_MASK (1 << 7) -#endif /* NGHTTP3_CONV_H */ +#endif /* !defined(NGHTTP3_CONV_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_debug.c b/deps/ngtcp2/nghttp3/lib/nghttp3_debug.c index 4021b0dc469b66..0235217e9627ce 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_debug.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_debug.c @@ -35,7 +35,7 @@ static void nghttp3_default_debug_vfprintf_callback(const char *fmt, } static nghttp3_debug_vprintf_callback static_debug_vprintf_callback = - nghttp3_default_debug_vfprintf_callback; + nghttp3_default_debug_vfprintf_callback; void nghttp3_debug_vprintf(const char *format, ...) { if (static_debug_vprintf_callback) { @@ -47,15 +47,15 @@ void nghttp3_debug_vprintf(const char *format, ...) { } void nghttp3_set_debug_vprintf_callback( - nghttp3_debug_vprintf_callback debug_vprintf_callback) { + nghttp3_debug_vprintf_callback debug_vprintf_callback) { static_debug_vprintf_callback = debug_vprintf_callback; } -#else /* !DEBUGBUILD */ +#else /* !defined(DEBUGBUILD) */ void nghttp3_set_debug_vprintf_callback( - nghttp3_debug_vprintf_callback debug_vprintf_callback) { + nghttp3_debug_vprintf_callback debug_vprintf_callback) { (void)debug_vprintf_callback; } -#endif /* !DEBUGBUILD */ +#endif /* !defined(DEBUGBUILD) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_debug.h b/deps/ngtcp2/nghttp3/lib/nghttp3_debug.h index 01ed918414cfe5..d73bf8ecf312cd 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_debug.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_debug.h @@ -28,17 +28,17 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include #ifdef DEBUGBUILD # define DEBUGF(...) nghttp3_debug_vprintf(__VA_ARGS__) void nghttp3_debug_vprintf(const char *format, ...); -#else +#else /* !defined(DEBUGBUILD) */ # define DEBUGF(...) \ do { \ } while (0) -#endif +#endif /* !defined(DEBUGBUILD) */ -#endif /* NGHTTP3_DEBUG_H */ +#endif /* !defined(NGHTTP3_DEBUG_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_err.h b/deps/ngtcp2/nghttp3/lib/nghttp3_err.h index 2fa914f86b189e..6f8205cc17ce7c 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_err.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_err.h @@ -27,8 +27,8 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include -#endif /* NGHTTP3_ERR_H */ +#endif /* !defined(NGHTTP3_ERR_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_frame.c b/deps/ngtcp2/nghttp3/lib/nghttp3_frame.c index 923a78f90f826f..1d87e448d887cf 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_frame.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_frame.c @@ -102,7 +102,7 @@ nghttp3_frame_write_priority_update(uint8_t *p, } size_t nghttp3_frame_write_priority_update_len( - int64_t *ppayloadlen, const nghttp3_frame_priority_update *fr) { + int64_t *ppayloadlen, const nghttp3_frame_priority_update *fr) { size_t payloadlen = nghttp3_put_varintlen(fr->pri_elem_id) + fr->datalen; *ppayloadlen = (int64_t)payloadlen; diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_frame.h b/deps/ngtcp2/nghttp3/lib/nghttp3_frame.h index 1079673d150ce3..e216967d740b86 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_frame.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_frame.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -188,7 +188,7 @@ nghttp3_frame_write_priority_update(uint8_t *dest, * stores payload length in |*ppayloadlen|. */ size_t nghttp3_frame_write_priority_update_len( - int64_t *ppayloadlen, const nghttp3_frame_priority_update *fr); + int64_t *ppayloadlen, const nghttp3_frame_priority_update *fr); /* * nghttp3_nva_copy copies name/value pairs from |nva|, which contains @@ -227,4 +227,4 @@ void nghttp3_frame_headers_free(nghttp3_frame_headers *fr, void nghttp3_frame_priority_update_free(nghttp3_frame_priority_update *fr, const nghttp3_mem *mem); -#endif /* NGHTTP3_FRAME_H */ +#endif /* !defined(NGHTTP3_FRAME_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_gaptr.c b/deps/ngtcp2/nghttp3/lib/nghttp3_gaptr.c index 88cb49a02f892f..20eed5faa2bcba 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_gaptr.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_gaptr.c @@ -37,14 +37,8 @@ void nghttp3_gaptr_init(nghttp3_gaptr *gaptr, const nghttp3_mem *mem) { static int gaptr_gap_init(nghttp3_gaptr *gaptr) { nghttp3_range range = {0, UINT64_MAX}; - int rv; - - rv = nghttp3_ksl_insert(&gaptr->gap, NULL, &range, NULL); - if (rv != 0) { - return rv; - } - return 0; + return nghttp3_ksl_insert(&gaptr->gap, NULL, &range, NULL); } void nghttp3_gaptr_free(nghttp3_gaptr *gaptr) { @@ -82,7 +76,9 @@ int nghttp3_gaptr_push(nghttp3_gaptr *gaptr, uint64_t offset, nghttp3_ksl_remove_hint(&gaptr->gap, &it, &it, &k); continue; } + nghttp3_range_cut(&l, &r, &k, &m); + if (nghttp3_range_len(&l)) { nghttp3_ksl_update_key(&gaptr->gap, &k, &l); @@ -95,23 +91,23 @@ int nghttp3_gaptr_push(nghttp3_gaptr *gaptr, uint64_t offset, } else if (nghttp3_range_len(&r)) { nghttp3_ksl_update_key(&gaptr->gap, &k, &r); } + nghttp3_ksl_it_next(&it); } + return 0; } uint64_t nghttp3_gaptr_first_gap_offset(nghttp3_gaptr *gaptr) { nghttp3_ksl_it it; - nghttp3_range r; if (nghttp3_ksl_len(&gaptr->gap) == 0) { return 0; } it = nghttp3_ksl_begin(&gaptr->gap); - r = *(nghttp3_range *)nghttp3_ksl_it_key(&it); - return r.begin; + return ((nghttp3_range *)nghttp3_ksl_it_key(&it))->begin; } nghttp3_range nghttp3_gaptr_get_first_gap_after(nghttp3_gaptr *gaptr, @@ -136,7 +132,6 @@ int nghttp3_gaptr_is_pushed(nghttp3_gaptr *gaptr, uint64_t offset, uint64_t datalen) { nghttp3_range q = {offset, offset + datalen}; nghttp3_ksl_it it; - nghttp3_range k; nghttp3_range m; if (nghttp3_ksl_len(&gaptr->gap) == 0) { @@ -145,8 +140,7 @@ int nghttp3_gaptr_is_pushed(nghttp3_gaptr *gaptr, uint64_t offset, it = nghttp3_ksl_lower_bound_compar(&gaptr->gap, &q, nghttp3_ksl_range_exclusive_compar); - k = *(nghttp3_range *)nghttp3_ksl_it_key(&it); - m = nghttp3_range_intersect(&q, &k); + m = nghttp3_range_intersect(&q, (nghttp3_range *)nghttp3_ksl_it_key(&it)); return nghttp3_range_len(&m) == 0; } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_gaptr.h b/deps/ngtcp2/nghttp3/lib/nghttp3_gaptr.h index 7c83c847c9fe29..7578fdc14f6010 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_gaptr.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_gaptr.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -40,8 +40,9 @@ * nghttp3_gaptr maintains the gap in the range [0, UINT64_MAX). */ typedef struct nghttp3_gaptr { - /* gap maintains the range of offset which is not received - yet. Initially, its range is [0, UINT64_MAX). */ + /* gap maintains the range of offset which is not pushed + yet. Initially, its range is [0, UINT64_MAX). "gap" is the range + that is not pushed yet. */ nghttp3_ksl gap; /* mem is custom memory allocator */ const nghttp3_mem *mem; @@ -58,8 +59,7 @@ void nghttp3_gaptr_init(nghttp3_gaptr *gaptr, const nghttp3_mem *mem); void nghttp3_gaptr_free(nghttp3_gaptr *gaptr); /* - * nghttp3_gaptr_push adds new data of length |datalen| at the stream - * offset |offset|. + * nghttp3_gaptr_push pushes the range [offset, offset + datalen). * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -77,7 +77,7 @@ uint64_t nghttp3_gaptr_first_gap_offset(nghttp3_gaptr *gaptr); /* * nghttp3_gaptr_get_first_gap_after returns the first gap which - * overlaps or comes after |offset|. + * includes or comes after |offset|. */ nghttp3_range nghttp3_gaptr_get_first_gap_after(nghttp3_gaptr *gaptr, uint64_t offset); @@ -96,4 +96,4 @@ int nghttp3_gaptr_is_pushed(nghttp3_gaptr *gaptr, uint64_t offset, */ void nghttp3_gaptr_drop_first_gap(nghttp3_gaptr *gaptr); -#endif /* NGHTTP3_GAPTR_H */ +#endif /* !defined(NGHTTP3_GAPTR_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_http.c b/deps/ngtcp2/nghttp3/lib/nghttp3_http.c index 963134f13df946..38092cfb7c322c 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_http.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_http.c @@ -28,11 +28,15 @@ #include #include +#ifdef __AVX2__ +# include +#endif /* __AVX2__ */ + #include "nghttp3_stream.h" #include "nghttp3_macro.h" #include "nghttp3_conv.h" #include "nghttp3_unreachable.h" -#include "sfparse.h" +#include "sfparse/sfparse.h" static uint8_t downcase(uint8_t c) { return 'A' <= c && c <= 'Z' ? (uint8_t)(c - 'A' + 'a') : c; @@ -175,26 +179,282 @@ int nghttp3_pri_parse_priority_versioned(int pri_version, nghttp3_pri *dest, return nghttp3_http_parse_priority(dest, value, valuelen); } +/* Generated by genauthroitychartbl.py */ +static char VALID_AUTHORITY_CHARS[] = { + 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, + 0 /* EOT */, 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, + 0 /* BS */, 0 /* HT */, 0 /* LF */, 0 /* VT */, + 0 /* FF */, 0 /* CR */, 0 /* SO */, 0 /* SI */, + 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, + 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, + 0 /* CAN */, 0 /* EM */, 0 /* SUB */, 0 /* ESC */, + 0 /* FS */, 0 /* GS */, 0 /* RS */, 0 /* US */, + 0 /* SPC */, 1 /* ! */, 0 /* " */, 0 /* # */, + 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, + 1 /* ( */, 1 /* ) */, 1 /* * */, 1 /* + */, + 1 /* , */, 1 /* - */, 1 /* . */, 0 /* / */, + 1 /* 0 */, 1 /* 1 */, 1 /* 2 */, 1 /* 3 */, + 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, 1 /* 7 */, + 1 /* 8 */, 1 /* 9 */, 1 /* : */, 1 /* ; */, + 0 /* < */, 1 /* = */, 0 /* > */, 0 /* ? */, + 1 /* @ */, 1 /* A */, 1 /* B */, 1 /* C */, + 1 /* D */, 1 /* E */, 1 /* F */, 1 /* G */, + 1 /* H */, 1 /* I */, 1 /* J */, 1 /* K */, + 1 /* L */, 1 /* M */, 1 /* N */, 1 /* O */, + 1 /* P */, 1 /* Q */, 1 /* R */, 1 /* S */, + 1 /* T */, 1 /* U */, 1 /* V */, 1 /* W */, + 1 /* X */, 1 /* Y */, 1 /* Z */, 1 /* [ */, + 0 /* \ */, 1 /* ] */, 0 /* ^ */, 1 /* _ */, + 0 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, + 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, + 1 /* h */, 1 /* i */, 1 /* j */, 1 /* k */, + 1 /* l */, 1 /* m */, 1 /* n */, 1 /* o */, + 1 /* p */, 1 /* q */, 1 /* r */, 1 /* s */, + 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, + 1 /* x */, 1 /* y */, 1 /* z */, 0 /* { */, + 0 /* | */, 0 /* } */, 1 /* ~ */, 0 /* DEL */, + 0 /* 0x80 */, 0 /* 0x81 */, 0 /* 0x82 */, 0 /* 0x83 */, + 0 /* 0x84 */, 0 /* 0x85 */, 0 /* 0x86 */, 0 /* 0x87 */, + 0 /* 0x88 */, 0 /* 0x89 */, 0 /* 0x8a */, 0 /* 0x8b */, + 0 /* 0x8c */, 0 /* 0x8d */, 0 /* 0x8e */, 0 /* 0x8f */, + 0 /* 0x90 */, 0 /* 0x91 */, 0 /* 0x92 */, 0 /* 0x93 */, + 0 /* 0x94 */, 0 /* 0x95 */, 0 /* 0x96 */, 0 /* 0x97 */, + 0 /* 0x98 */, 0 /* 0x99 */, 0 /* 0x9a */, 0 /* 0x9b */, + 0 /* 0x9c */, 0 /* 0x9d */, 0 /* 0x9e */, 0 /* 0x9f */, + 0 /* 0xa0 */, 0 /* 0xa1 */, 0 /* 0xa2 */, 0 /* 0xa3 */, + 0 /* 0xa4 */, 0 /* 0xa5 */, 0 /* 0xa6 */, 0 /* 0xa7 */, + 0 /* 0xa8 */, 0 /* 0xa9 */, 0 /* 0xaa */, 0 /* 0xab */, + 0 /* 0xac */, 0 /* 0xad */, 0 /* 0xae */, 0 /* 0xaf */, + 0 /* 0xb0 */, 0 /* 0xb1 */, 0 /* 0xb2 */, 0 /* 0xb3 */, + 0 /* 0xb4 */, 0 /* 0xb5 */, 0 /* 0xb6 */, 0 /* 0xb7 */, + 0 /* 0xb8 */, 0 /* 0xb9 */, 0 /* 0xba */, 0 /* 0xbb */, + 0 /* 0xbc */, 0 /* 0xbd */, 0 /* 0xbe */, 0 /* 0xbf */, + 0 /* 0xc0 */, 0 /* 0xc1 */, 0 /* 0xc2 */, 0 /* 0xc3 */, + 0 /* 0xc4 */, 0 /* 0xc5 */, 0 /* 0xc6 */, 0 /* 0xc7 */, + 0 /* 0xc8 */, 0 /* 0xc9 */, 0 /* 0xca */, 0 /* 0xcb */, + 0 /* 0xcc */, 0 /* 0xcd */, 0 /* 0xce */, 0 /* 0xcf */, + 0 /* 0xd0 */, 0 /* 0xd1 */, 0 /* 0xd2 */, 0 /* 0xd3 */, + 0 /* 0xd4 */, 0 /* 0xd5 */, 0 /* 0xd6 */, 0 /* 0xd7 */, + 0 /* 0xd8 */, 0 /* 0xd9 */, 0 /* 0xda */, 0 /* 0xdb */, + 0 /* 0xdc */, 0 /* 0xdd */, 0 /* 0xde */, 0 /* 0xdf */, + 0 /* 0xe0 */, 0 /* 0xe1 */, 0 /* 0xe2 */, 0 /* 0xe3 */, + 0 /* 0xe4 */, 0 /* 0xe5 */, 0 /* 0xe6 */, 0 /* 0xe7 */, + 0 /* 0xe8 */, 0 /* 0xe9 */, 0 /* 0xea */, 0 /* 0xeb */, + 0 /* 0xec */, 0 /* 0xed */, 0 /* 0xee */, 0 /* 0xef */, + 0 /* 0xf0 */, 0 /* 0xf1 */, 0 /* 0xf2 */, 0 /* 0xf3 */, + 0 /* 0xf4 */, 0 /* 0xf5 */, 0 /* 0xf6 */, 0 /* 0xf7 */, + 0 /* 0xf8 */, 0 /* 0xf9 */, 0 /* 0xfa */, 0 /* 0xfb */, + 0 /* 0xfc */, 0 /* 0xfd */, 0 /* 0xfe */, 0 /* 0xff */ +}; + +static int check_authority(const uint8_t *value, size_t len) { + const uint8_t *last; + for (last = value + len; value != last; ++value) { + if (!VALID_AUTHORITY_CHARS[*value]) { + return 0; + } + } + return 1; +} + +static int check_scheme(const uint8_t *value, size_t len) { + const uint8_t *last; + if (len == 0) { + return 0; + } + + if (!(('A' <= *value && *value <= 'Z') || ('a' <= *value && *value <= 'z'))) { + return 0; + } + + last = value + len; + ++value; + + for (; value != last; ++value) { + if (!(('A' <= *value && *value <= 'Z') || + ('a' <= *value && *value <= 'z') || + ('0' <= *value && *value <= '9') || *value == '+' || *value == '-' || + *value == '.')) { + return 0; + } + } + return 1; +} + +/* Generated by genmethodchartbl.py */ +static char VALID_METHOD_CHARS[] = { + 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, + 0 /* EOT */, 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, + 0 /* BS */, 0 /* HT */, 0 /* LF */, 0 /* VT */, + 0 /* FF */, 0 /* CR */, 0 /* SO */, 0 /* SI */, + 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, + 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, + 0 /* CAN */, 0 /* EM */, 0 /* SUB */, 0 /* ESC */, + 0 /* FS */, 0 /* GS */, 0 /* RS */, 0 /* US */, + 0 /* SPC */, 1 /* ! */, 0 /* " */, 1 /* # */, + 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, + 0 /* ( */, 0 /* ) */, 1 /* * */, 1 /* + */, + 0 /* , */, 1 /* - */, 1 /* . */, 0 /* / */, + 1 /* 0 */, 1 /* 1 */, 1 /* 2 */, 1 /* 3 */, + 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, 1 /* 7 */, + 1 /* 8 */, 1 /* 9 */, 0 /* : */, 0 /* ; */, + 0 /* < */, 0 /* = */, 0 /* > */, 0 /* ? */, + 0 /* @ */, 1 /* A */, 1 /* B */, 1 /* C */, + 1 /* D */, 1 /* E */, 1 /* F */, 1 /* G */, + 1 /* H */, 1 /* I */, 1 /* J */, 1 /* K */, + 1 /* L */, 1 /* M */, 1 /* N */, 1 /* O */, + 1 /* P */, 1 /* Q */, 1 /* R */, 1 /* S */, + 1 /* T */, 1 /* U */, 1 /* V */, 1 /* W */, + 1 /* X */, 1 /* Y */, 1 /* Z */, 0 /* [ */, + 0 /* \ */, 0 /* ] */, 1 /* ^ */, 1 /* _ */, + 1 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, + 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, + 1 /* h */, 1 /* i */, 1 /* j */, 1 /* k */, + 1 /* l */, 1 /* m */, 1 /* n */, 1 /* o */, + 1 /* p */, 1 /* q */, 1 /* r */, 1 /* s */, + 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, + 1 /* x */, 1 /* y */, 1 /* z */, 0 /* { */, + 1 /* | */, 0 /* } */, 1 /* ~ */, 0 /* DEL */, + 0 /* 0x80 */, 0 /* 0x81 */, 0 /* 0x82 */, 0 /* 0x83 */, + 0 /* 0x84 */, 0 /* 0x85 */, 0 /* 0x86 */, 0 /* 0x87 */, + 0 /* 0x88 */, 0 /* 0x89 */, 0 /* 0x8a */, 0 /* 0x8b */, + 0 /* 0x8c */, 0 /* 0x8d */, 0 /* 0x8e */, 0 /* 0x8f */, + 0 /* 0x90 */, 0 /* 0x91 */, 0 /* 0x92 */, 0 /* 0x93 */, + 0 /* 0x94 */, 0 /* 0x95 */, 0 /* 0x96 */, 0 /* 0x97 */, + 0 /* 0x98 */, 0 /* 0x99 */, 0 /* 0x9a */, 0 /* 0x9b */, + 0 /* 0x9c */, 0 /* 0x9d */, 0 /* 0x9e */, 0 /* 0x9f */, + 0 /* 0xa0 */, 0 /* 0xa1 */, 0 /* 0xa2 */, 0 /* 0xa3 */, + 0 /* 0xa4 */, 0 /* 0xa5 */, 0 /* 0xa6 */, 0 /* 0xa7 */, + 0 /* 0xa8 */, 0 /* 0xa9 */, 0 /* 0xaa */, 0 /* 0xab */, + 0 /* 0xac */, 0 /* 0xad */, 0 /* 0xae */, 0 /* 0xaf */, + 0 /* 0xb0 */, 0 /* 0xb1 */, 0 /* 0xb2 */, 0 /* 0xb3 */, + 0 /* 0xb4 */, 0 /* 0xb5 */, 0 /* 0xb6 */, 0 /* 0xb7 */, + 0 /* 0xb8 */, 0 /* 0xb9 */, 0 /* 0xba */, 0 /* 0xbb */, + 0 /* 0xbc */, 0 /* 0xbd */, 0 /* 0xbe */, 0 /* 0xbf */, + 0 /* 0xc0 */, 0 /* 0xc1 */, 0 /* 0xc2 */, 0 /* 0xc3 */, + 0 /* 0xc4 */, 0 /* 0xc5 */, 0 /* 0xc6 */, 0 /* 0xc7 */, + 0 /* 0xc8 */, 0 /* 0xc9 */, 0 /* 0xca */, 0 /* 0xcb */, + 0 /* 0xcc */, 0 /* 0xcd */, 0 /* 0xce */, 0 /* 0xcf */, + 0 /* 0xd0 */, 0 /* 0xd1 */, 0 /* 0xd2 */, 0 /* 0xd3 */, + 0 /* 0xd4 */, 0 /* 0xd5 */, 0 /* 0xd6 */, 0 /* 0xd7 */, + 0 /* 0xd8 */, 0 /* 0xd9 */, 0 /* 0xda */, 0 /* 0xdb */, + 0 /* 0xdc */, 0 /* 0xdd */, 0 /* 0xde */, 0 /* 0xdf */, + 0 /* 0xe0 */, 0 /* 0xe1 */, 0 /* 0xe2 */, 0 /* 0xe3 */, + 0 /* 0xe4 */, 0 /* 0xe5 */, 0 /* 0xe6 */, 0 /* 0xe7 */, + 0 /* 0xe8 */, 0 /* 0xe9 */, 0 /* 0xea */, 0 /* 0xeb */, + 0 /* 0xec */, 0 /* 0xed */, 0 /* 0xee */, 0 /* 0xef */, + 0 /* 0xf0 */, 0 /* 0xf1 */, 0 /* 0xf2 */, 0 /* 0xf3 */, + 0 /* 0xf4 */, 0 /* 0xf5 */, 0 /* 0xf6 */, 0 /* 0xf7 */, + 0 /* 0xf8 */, 0 /* 0xf9 */, 0 /* 0xfa */, 0 /* 0xfb */, + 0 /* 0xfc */, 0 /* 0xfd */, 0 /* 0xfe */, 0 /* 0xff */ +}; + +static int check_method(const uint8_t *value, size_t len) { + const uint8_t *last; + if (len == 0) { + return 0; + } + for (last = value + len; value != last; ++value) { + if (!VALID_METHOD_CHARS[*value]) { + return 0; + } + } + return 1; +} + +/* Generated by genpathchartbl.py */ +static char VALID_PATH_CHARS[] = { + 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, + 0 /* EOT */, 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, + 0 /* BS */, 0 /* HT */, 0 /* LF */, 0 /* VT */, + 0 /* FF */, 0 /* CR */, 0 /* SO */, 0 /* SI */, + 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, + 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, + 0 /* CAN */, 0 /* EM */, 0 /* SUB */, 0 /* ESC */, + 0 /* FS */, 0 /* GS */, 0 /* RS */, 0 /* US */, + 0 /* SPC */, 1 /* ! */, 1 /* " */, 1 /* # */, + 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, + 1 /* ( */, 1 /* ) */, 1 /* * */, 1 /* + */, + 1 /* , */, 1 /* - */, 1 /* . */, 1 /* / */, + 1 /* 0 */, 1 /* 1 */, 1 /* 2 */, 1 /* 3 */, + 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, 1 /* 7 */, + 1 /* 8 */, 1 /* 9 */, 1 /* : */, 1 /* ; */, + 1 /* < */, 1 /* = */, 1 /* > */, 1 /* ? */, + 1 /* @ */, 1 /* A */, 1 /* B */, 1 /* C */, + 1 /* D */, 1 /* E */, 1 /* F */, 1 /* G */, + 1 /* H */, 1 /* I */, 1 /* J */, 1 /* K */, + 1 /* L */, 1 /* M */, 1 /* N */, 1 /* O */, + 1 /* P */, 1 /* Q */, 1 /* R */, 1 /* S */, + 1 /* T */, 1 /* U */, 1 /* V */, 1 /* W */, + 1 /* X */, 1 /* Y */, 1 /* Z */, 1 /* [ */, + 1 /* \ */, 1 /* ] */, 1 /* ^ */, 1 /* _ */, + 1 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, + 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, + 1 /* h */, 1 /* i */, 1 /* j */, 1 /* k */, + 1 /* l */, 1 /* m */, 1 /* n */, 1 /* o */, + 1 /* p */, 1 /* q */, 1 /* r */, 1 /* s */, + 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, + 1 /* x */, 1 /* y */, 1 /* z */, 1 /* { */, + 1 /* | */, 1 /* } */, 1 /* ~ */, 0 /* DEL */, + 1 /* 0x80 */, 1 /* 0x81 */, 1 /* 0x82 */, 1 /* 0x83 */, + 1 /* 0x84 */, 1 /* 0x85 */, 1 /* 0x86 */, 1 /* 0x87 */, + 1 /* 0x88 */, 1 /* 0x89 */, 1 /* 0x8a */, 1 /* 0x8b */, + 1 /* 0x8c */, 1 /* 0x8d */, 1 /* 0x8e */, 1 /* 0x8f */, + 1 /* 0x90 */, 1 /* 0x91 */, 1 /* 0x92 */, 1 /* 0x93 */, + 1 /* 0x94 */, 1 /* 0x95 */, 1 /* 0x96 */, 1 /* 0x97 */, + 1 /* 0x98 */, 1 /* 0x99 */, 1 /* 0x9a */, 1 /* 0x9b */, + 1 /* 0x9c */, 1 /* 0x9d */, 1 /* 0x9e */, 1 /* 0x9f */, + 1 /* 0xa0 */, 1 /* 0xa1 */, 1 /* 0xa2 */, 1 /* 0xa3 */, + 1 /* 0xa4 */, 1 /* 0xa5 */, 1 /* 0xa6 */, 1 /* 0xa7 */, + 1 /* 0xa8 */, 1 /* 0xa9 */, 1 /* 0xaa */, 1 /* 0xab */, + 1 /* 0xac */, 1 /* 0xad */, 1 /* 0xae */, 1 /* 0xaf */, + 1 /* 0xb0 */, 1 /* 0xb1 */, 1 /* 0xb2 */, 1 /* 0xb3 */, + 1 /* 0xb4 */, 1 /* 0xb5 */, 1 /* 0xb6 */, 1 /* 0xb7 */, + 1 /* 0xb8 */, 1 /* 0xb9 */, 1 /* 0xba */, 1 /* 0xbb */, + 1 /* 0xbc */, 1 /* 0xbd */, 1 /* 0xbe */, 1 /* 0xbf */, + 1 /* 0xc0 */, 1 /* 0xc1 */, 1 /* 0xc2 */, 1 /* 0xc3 */, + 1 /* 0xc4 */, 1 /* 0xc5 */, 1 /* 0xc6 */, 1 /* 0xc7 */, + 1 /* 0xc8 */, 1 /* 0xc9 */, 1 /* 0xca */, 1 /* 0xcb */, + 1 /* 0xcc */, 1 /* 0xcd */, 1 /* 0xce */, 1 /* 0xcf */, + 1 /* 0xd0 */, 1 /* 0xd1 */, 1 /* 0xd2 */, 1 /* 0xd3 */, + 1 /* 0xd4 */, 1 /* 0xd5 */, 1 /* 0xd6 */, 1 /* 0xd7 */, + 1 /* 0xd8 */, 1 /* 0xd9 */, 1 /* 0xda */, 1 /* 0xdb */, + 1 /* 0xdc */, 1 /* 0xdd */, 1 /* 0xde */, 1 /* 0xdf */, + 1 /* 0xe0 */, 1 /* 0xe1 */, 1 /* 0xe2 */, 1 /* 0xe3 */, + 1 /* 0xe4 */, 1 /* 0xe5 */, 1 /* 0xe6 */, 1 /* 0xe7 */, + 1 /* 0xe8 */, 1 /* 0xe9 */, 1 /* 0xea */, 1 /* 0xeb */, + 1 /* 0xec */, 1 /* 0xed */, 1 /* 0xee */, 1 /* 0xef */, + 1 /* 0xf0 */, 1 /* 0xf1 */, 1 /* 0xf2 */, 1 /* 0xf3 */, + 1 /* 0xf4 */, 1 /* 0xf5 */, 1 /* 0xf6 */, 1 /* 0xf7 */, + 1 /* 0xf8 */, 1 /* 0xf9 */, 1 /* 0xfa */, 1 /* 0xfb */, + 1 /* 0xfc */, 1 /* 0xfd */, 1 /* 0xfe */, 1 /* 0xff */ +}; + +static int check_path(const uint8_t *value, size_t len) { + const uint8_t *last; + for (last = value + len; value != last; ++value) { + if (!VALID_PATH_CHARS[*value]) { + return 0; + } + } + return 1; +} + static int http_request_on_header(nghttp3_http_state *http, nghttp3_qpack_nv *nv, int trailers, int connect_protocol) { nghttp3_pri pri; - if (nv->name->base[0] == ':') { - if (trailers || - (http->flags & NGHTTP3_HTTP_FLAG_PSEUDO_HEADER_DISALLOWED)) { - return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; - } - } - switch (nv->token) { case NGHTTP3_QPACK_TOKEN__AUTHORITY: - if (!check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__AUTHORITY)) { + if (!check_authority(nv->value->base, nv->value->len) || + !check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__AUTHORITY)) { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } break; case NGHTTP3_QPACK_TOKEN__METHOD: - if (!check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__METHOD)) { + if (!check_method(nv->value->base, nv->value->len) || + !check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__METHOD)) { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } switch (nv->value->len) { @@ -220,7 +480,8 @@ static int http_request_on_header(nghttp3_http_state *http, } break; case NGHTTP3_QPACK_TOKEN__PATH: - if (!check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__PATH)) { + if (!check_path(nv->value->base, nv->value->len) || + !check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__PATH)) { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } if (nv->value->base[0] == '/') { @@ -230,7 +491,8 @@ static int http_request_on_header(nghttp3_http_state *http, } break; case NGHTTP3_QPACK_TOKEN__SCHEME: - if (!check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__SCHEME)) { + if (!check_scheme(nv->value->base, nv->value->len) || + !check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__SCHEME)) { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } /* scheme is case-insensitive: @@ -241,15 +503,16 @@ static int http_request_on_header(nghttp3_http_state *http, } break; case NGHTTP3_QPACK_TOKEN__PROTOCOL: - if (!connect_protocol) { - return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; - } - - if (!check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__PROTOCOL)) { + if (!connect_protocol || + !nghttp3_check_header_value(nv->value->base, nv->value->len) || + !check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__PROTOCOL)) { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } break; case NGHTTP3_QPACK_TOKEN_HOST: + if (!check_authority(nv->value->base, nv->value->len)) { + return NGHTTP3_ERR_REMOVE_HTTP_HEADER; + } if (!check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG_HOST)) { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } @@ -284,22 +547,35 @@ static int http_request_on_header(nghttp3_http_state *http, } break; case NGHTTP3_QPACK_TOKEN_PRIORITY: - if (!trailers && !(http->flags & NGHTTP3_HTTP_FLAG_BAD_PRIORITY)) { - pri = http->pri; - if (nghttp3_http_parse_priority(&pri, nv->value->base, nv->value->len) == - 0) { - http->pri = pri; - http->flags |= NGHTTP3_HTTP_FLAG_PRIORITY; - } else { - http->flags &= ~NGHTTP3_HTTP_FLAG_PRIORITY; - http->flags |= NGHTTP3_HTTP_FLAG_BAD_PRIORITY; - } + if (!nghttp3_check_header_value(nv->value->base, nv->value->len)) { + return NGHTTP3_ERR_REMOVE_HTTP_HEADER; } + + if (trailers || (http->flags & NGHTTP3_HTTP_FLAG_BAD_PRIORITY)) { + break; + } + + pri = http->pri; + + if (nghttp3_http_parse_priority(&pri, nv->value->base, nv->value->len) == + 0) { + http->pri = pri; + http->flags |= NGHTTP3_HTTP_FLAG_PRIORITY; + break; + } + + http->flags &= ~NGHTTP3_HTTP_FLAG_PRIORITY; + http->flags |= NGHTTP3_HTTP_FLAG_BAD_PRIORITY; + break; default: if (nv->name->base[0] == ':') { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } + + if (!nghttp3_check_header_value(nv->value->base, nv->value->len)) { + return NGHTTP3_ERR_REMOVE_HTTP_HEADER; + } } return 0; @@ -307,19 +583,10 @@ static int http_request_on_header(nghttp3_http_state *http, static int http_response_on_header(nghttp3_http_state *http, nghttp3_qpack_nv *nv, int trailers) { - if (nv->name->base[0] == ':') { - if (trailers || - (http->flags & NGHTTP3_HTTP_FLAG_PSEUDO_HEADER_DISALLOWED)) { - return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; - } - } - switch (nv->token) { case NGHTTP3_QPACK_TOKEN__STATUS: { - if (!check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__STATUS)) { - return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; - } - if (nv->value->len != 3) { + if (!check_pseudo_header(http, nv, NGHTTP3_HTTP_FLAG__STATUS) || + nv->value->len != 3) { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } http->status_code = (int16_t)parse_uint(nv->value->base, nv->value->len); @@ -340,22 +607,18 @@ static int http_response_on_header(nghttp3_http_state *http, /* content-length header field in 204 response is prohibited by RFC 7230. But some widely used servers send content-length: 0. Until they get fixed, we ignore it. */ - if (http->content_length != -1) { - /* Found multiple content-length field */ - return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; - } - if (!lstrieq("0", nv->value->base, nv->value->len)) { + if (/* Found multiple content-length field */ + http->content_length != -1 || + !lstrieq("0", nv->value->base, nv->value->len)) { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } http->content_length = 0; return NGHTTP3_ERR_REMOVE_HTTP_HEADER; } - if (http->status_code / 100 == 1) { - return NGHTTP3_ERR_REMOVE_HTTP_HEADER; - } - /* https://tools.ietf.org/html/rfc7230#section-3.3.3 */ - if (http->status_code / 100 == 2 && - (http->flags & NGHTTP3_HTTP_FLAG_METH_CONNECT)) { + if (http->status_code / 100 == 1 || + /* https://tools.ietf.org/html/rfc7230#section-3.3.3 */ + (http->status_code / 100 == 2 && + (http->flags & NGHTTP3_HTTP_FLAG_METH_CONNECT))) { return NGHTTP3_ERR_REMOVE_HTTP_HEADER; } if (http->content_length != -1) { @@ -383,349 +646,50 @@ static int http_response_on_header(nghttp3_http_state *http, if (nv->name->base[0] == ':') { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } - } - - return 0; -} - -/* Generated by genauthroitychartbl.py */ -static char VALID_AUTHORITY_CHARS[] = { - 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, - 0 /* EOT */, 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, - 0 /* BS */, 0 /* HT */, 0 /* LF */, 0 /* VT */, - 0 /* FF */, 0 /* CR */, 0 /* SO */, 0 /* SI */, - 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, - 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, - 0 /* CAN */, 0 /* EM */, 0 /* SUB */, 0 /* ESC */, - 0 /* FS */, 0 /* GS */, 0 /* RS */, 0 /* US */, - 0 /* SPC */, 1 /* ! */, 0 /* " */, 0 /* # */, - 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, - 1 /* ( */, 1 /* ) */, 1 /* * */, 1 /* + */, - 1 /* , */, 1 /* - */, 1 /* . */, 0 /* / */, - 1 /* 0 */, 1 /* 1 */, 1 /* 2 */, 1 /* 3 */, - 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, 1 /* 7 */, - 1 /* 8 */, 1 /* 9 */, 1 /* : */, 1 /* ; */, - 0 /* < */, 1 /* = */, 0 /* > */, 0 /* ? */, - 1 /* @ */, 1 /* A */, 1 /* B */, 1 /* C */, - 1 /* D */, 1 /* E */, 1 /* F */, 1 /* G */, - 1 /* H */, 1 /* I */, 1 /* J */, 1 /* K */, - 1 /* L */, 1 /* M */, 1 /* N */, 1 /* O */, - 1 /* P */, 1 /* Q */, 1 /* R */, 1 /* S */, - 1 /* T */, 1 /* U */, 1 /* V */, 1 /* W */, - 1 /* X */, 1 /* Y */, 1 /* Z */, 1 /* [ */, - 0 /* \ */, 1 /* ] */, 0 /* ^ */, 1 /* _ */, - 0 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, - 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, - 1 /* h */, 1 /* i */, 1 /* j */, 1 /* k */, - 1 /* l */, 1 /* m */, 1 /* n */, 1 /* o */, - 1 /* p */, 1 /* q */, 1 /* r */, 1 /* s */, - 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, - 1 /* x */, 1 /* y */, 1 /* z */, 0 /* { */, - 0 /* | */, 0 /* } */, 1 /* ~ */, 0 /* DEL */, - 0 /* 0x80 */, 0 /* 0x81 */, 0 /* 0x82 */, 0 /* 0x83 */, - 0 /* 0x84 */, 0 /* 0x85 */, 0 /* 0x86 */, 0 /* 0x87 */, - 0 /* 0x88 */, 0 /* 0x89 */, 0 /* 0x8a */, 0 /* 0x8b */, - 0 /* 0x8c */, 0 /* 0x8d */, 0 /* 0x8e */, 0 /* 0x8f */, - 0 /* 0x90 */, 0 /* 0x91 */, 0 /* 0x92 */, 0 /* 0x93 */, - 0 /* 0x94 */, 0 /* 0x95 */, 0 /* 0x96 */, 0 /* 0x97 */, - 0 /* 0x98 */, 0 /* 0x99 */, 0 /* 0x9a */, 0 /* 0x9b */, - 0 /* 0x9c */, 0 /* 0x9d */, 0 /* 0x9e */, 0 /* 0x9f */, - 0 /* 0xa0 */, 0 /* 0xa1 */, 0 /* 0xa2 */, 0 /* 0xa3 */, - 0 /* 0xa4 */, 0 /* 0xa5 */, 0 /* 0xa6 */, 0 /* 0xa7 */, - 0 /* 0xa8 */, 0 /* 0xa9 */, 0 /* 0xaa */, 0 /* 0xab */, - 0 /* 0xac */, 0 /* 0xad */, 0 /* 0xae */, 0 /* 0xaf */, - 0 /* 0xb0 */, 0 /* 0xb1 */, 0 /* 0xb2 */, 0 /* 0xb3 */, - 0 /* 0xb4 */, 0 /* 0xb5 */, 0 /* 0xb6 */, 0 /* 0xb7 */, - 0 /* 0xb8 */, 0 /* 0xb9 */, 0 /* 0xba */, 0 /* 0xbb */, - 0 /* 0xbc */, 0 /* 0xbd */, 0 /* 0xbe */, 0 /* 0xbf */, - 0 /* 0xc0 */, 0 /* 0xc1 */, 0 /* 0xc2 */, 0 /* 0xc3 */, - 0 /* 0xc4 */, 0 /* 0xc5 */, 0 /* 0xc6 */, 0 /* 0xc7 */, - 0 /* 0xc8 */, 0 /* 0xc9 */, 0 /* 0xca */, 0 /* 0xcb */, - 0 /* 0xcc */, 0 /* 0xcd */, 0 /* 0xce */, 0 /* 0xcf */, - 0 /* 0xd0 */, 0 /* 0xd1 */, 0 /* 0xd2 */, 0 /* 0xd3 */, - 0 /* 0xd4 */, 0 /* 0xd5 */, 0 /* 0xd6 */, 0 /* 0xd7 */, - 0 /* 0xd8 */, 0 /* 0xd9 */, 0 /* 0xda */, 0 /* 0xdb */, - 0 /* 0xdc */, 0 /* 0xdd */, 0 /* 0xde */, 0 /* 0xdf */, - 0 /* 0xe0 */, 0 /* 0xe1 */, 0 /* 0xe2 */, 0 /* 0xe3 */, - 0 /* 0xe4 */, 0 /* 0xe5 */, 0 /* 0xe6 */, 0 /* 0xe7 */, - 0 /* 0xe8 */, 0 /* 0xe9 */, 0 /* 0xea */, 0 /* 0xeb */, - 0 /* 0xec */, 0 /* 0xed */, 0 /* 0xee */, 0 /* 0xef */, - 0 /* 0xf0 */, 0 /* 0xf1 */, 0 /* 0xf2 */, 0 /* 0xf3 */, - 0 /* 0xf4 */, 0 /* 0xf5 */, 0 /* 0xf6 */, 0 /* 0xf7 */, - 0 /* 0xf8 */, 0 /* 0xf9 */, 0 /* 0xfa */, 0 /* 0xfb */, - 0 /* 0xfc */, 0 /* 0xfd */, 0 /* 0xfe */, 0 /* 0xff */ -}; - -static int check_authority(const uint8_t *value, size_t len) { - const uint8_t *last; - for (last = value + len; value != last; ++value) { - if (!VALID_AUTHORITY_CHARS[*value]) { - return 0; - } - } - return 1; -} - -static int check_scheme(const uint8_t *value, size_t len) { - const uint8_t *last; - if (len == 0) { - return 0; - } - - if (!(('A' <= *value && *value <= 'Z') || ('a' <= *value && *value <= 'z'))) { - return 0; - } - last = value + len; - ++value; - - for (; value != last; ++value) { - if (!(('A' <= *value && *value <= 'Z') || - ('a' <= *value && *value <= 'z') || - ('0' <= *value && *value <= '9') || *value == '+' || *value == '-' || - *value == '.')) { - return 0; + if (!nghttp3_check_header_value(nv->value->base, nv->value->len)) { + return NGHTTP3_ERR_REMOVE_HTTP_HEADER; } } - return 1; -} -/* Generated by genmethodchartbl.py */ -static char VALID_METHOD_CHARS[] = { - 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, - 0 /* EOT */, 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, - 0 /* BS */, 0 /* HT */, 0 /* LF */, 0 /* VT */, - 0 /* FF */, 0 /* CR */, 0 /* SO */, 0 /* SI */, - 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, - 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, - 0 /* CAN */, 0 /* EM */, 0 /* SUB */, 0 /* ESC */, - 0 /* FS */, 0 /* GS */, 0 /* RS */, 0 /* US */, - 0 /* SPC */, 1 /* ! */, 0 /* " */, 1 /* # */, - 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, - 0 /* ( */, 0 /* ) */, 1 /* * */, 1 /* + */, - 0 /* , */, 1 /* - */, 1 /* . */, 0 /* / */, - 1 /* 0 */, 1 /* 1 */, 1 /* 2 */, 1 /* 3 */, - 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, 1 /* 7 */, - 1 /* 8 */, 1 /* 9 */, 0 /* : */, 0 /* ; */, - 0 /* < */, 0 /* = */, 0 /* > */, 0 /* ? */, - 0 /* @ */, 1 /* A */, 1 /* B */, 1 /* C */, - 1 /* D */, 1 /* E */, 1 /* F */, 1 /* G */, - 1 /* H */, 1 /* I */, 1 /* J */, 1 /* K */, - 1 /* L */, 1 /* M */, 1 /* N */, 1 /* O */, - 1 /* P */, 1 /* Q */, 1 /* R */, 1 /* S */, - 1 /* T */, 1 /* U */, 1 /* V */, 1 /* W */, - 1 /* X */, 1 /* Y */, 1 /* Z */, 0 /* [ */, - 0 /* \ */, 0 /* ] */, 1 /* ^ */, 1 /* _ */, - 1 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, - 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, - 1 /* h */, 1 /* i */, 1 /* j */, 1 /* k */, - 1 /* l */, 1 /* m */, 1 /* n */, 1 /* o */, - 1 /* p */, 1 /* q */, 1 /* r */, 1 /* s */, - 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, - 1 /* x */, 1 /* y */, 1 /* z */, 0 /* { */, - 1 /* | */, 0 /* } */, 1 /* ~ */, 0 /* DEL */, - 0 /* 0x80 */, 0 /* 0x81 */, 0 /* 0x82 */, 0 /* 0x83 */, - 0 /* 0x84 */, 0 /* 0x85 */, 0 /* 0x86 */, 0 /* 0x87 */, - 0 /* 0x88 */, 0 /* 0x89 */, 0 /* 0x8a */, 0 /* 0x8b */, - 0 /* 0x8c */, 0 /* 0x8d */, 0 /* 0x8e */, 0 /* 0x8f */, - 0 /* 0x90 */, 0 /* 0x91 */, 0 /* 0x92 */, 0 /* 0x93 */, - 0 /* 0x94 */, 0 /* 0x95 */, 0 /* 0x96 */, 0 /* 0x97 */, - 0 /* 0x98 */, 0 /* 0x99 */, 0 /* 0x9a */, 0 /* 0x9b */, - 0 /* 0x9c */, 0 /* 0x9d */, 0 /* 0x9e */, 0 /* 0x9f */, - 0 /* 0xa0 */, 0 /* 0xa1 */, 0 /* 0xa2 */, 0 /* 0xa3 */, - 0 /* 0xa4 */, 0 /* 0xa5 */, 0 /* 0xa6 */, 0 /* 0xa7 */, - 0 /* 0xa8 */, 0 /* 0xa9 */, 0 /* 0xaa */, 0 /* 0xab */, - 0 /* 0xac */, 0 /* 0xad */, 0 /* 0xae */, 0 /* 0xaf */, - 0 /* 0xb0 */, 0 /* 0xb1 */, 0 /* 0xb2 */, 0 /* 0xb3 */, - 0 /* 0xb4 */, 0 /* 0xb5 */, 0 /* 0xb6 */, 0 /* 0xb7 */, - 0 /* 0xb8 */, 0 /* 0xb9 */, 0 /* 0xba */, 0 /* 0xbb */, - 0 /* 0xbc */, 0 /* 0xbd */, 0 /* 0xbe */, 0 /* 0xbf */, - 0 /* 0xc0 */, 0 /* 0xc1 */, 0 /* 0xc2 */, 0 /* 0xc3 */, - 0 /* 0xc4 */, 0 /* 0xc5 */, 0 /* 0xc6 */, 0 /* 0xc7 */, - 0 /* 0xc8 */, 0 /* 0xc9 */, 0 /* 0xca */, 0 /* 0xcb */, - 0 /* 0xcc */, 0 /* 0xcd */, 0 /* 0xce */, 0 /* 0xcf */, - 0 /* 0xd0 */, 0 /* 0xd1 */, 0 /* 0xd2 */, 0 /* 0xd3 */, - 0 /* 0xd4 */, 0 /* 0xd5 */, 0 /* 0xd6 */, 0 /* 0xd7 */, - 0 /* 0xd8 */, 0 /* 0xd9 */, 0 /* 0xda */, 0 /* 0xdb */, - 0 /* 0xdc */, 0 /* 0xdd */, 0 /* 0xde */, 0 /* 0xdf */, - 0 /* 0xe0 */, 0 /* 0xe1 */, 0 /* 0xe2 */, 0 /* 0xe3 */, - 0 /* 0xe4 */, 0 /* 0xe5 */, 0 /* 0xe6 */, 0 /* 0xe7 */, - 0 /* 0xe8 */, 0 /* 0xe9 */, 0 /* 0xea */, 0 /* 0xeb */, - 0 /* 0xec */, 0 /* 0xed */, 0 /* 0xee */, 0 /* 0xef */, - 0 /* 0xf0 */, 0 /* 0xf1 */, 0 /* 0xf2 */, 0 /* 0xf3 */, - 0 /* 0xf4 */, 0 /* 0xf5 */, 0 /* 0xf6 */, 0 /* 0xf7 */, - 0 /* 0xf8 */, 0 /* 0xf9 */, 0 /* 0xfa */, 0 /* 0xfb */, - 0 /* 0xfc */, 0 /* 0xfd */, 0 /* 0xfe */, 0 /* 0xff */ -}; - -static int check_method(const uint8_t *value, size_t len) { - const uint8_t *last; - if (len == 0) { - return 0; - } - for (last = value + len; value != last; ++value) { - if (!VALID_METHOD_CHARS[*value]) { - return 0; - } - } - return 1; + return 0; } -/* Generated by genpathchartbl.py */ -static char VALID_PATH_CHARS[] = { - 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, - 0 /* EOT */, 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, - 0 /* BS */, 0 /* HT */, 0 /* LF */, 0 /* VT */, - 0 /* FF */, 0 /* CR */, 0 /* SO */, 0 /* SI */, - 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, - 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, - 0 /* CAN */, 0 /* EM */, 0 /* SUB */, 0 /* ESC */, - 0 /* FS */, 0 /* GS */, 0 /* RS */, 0 /* US */, - 0 /* SPC */, 1 /* ! */, 1 /* " */, 1 /* # */, - 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, - 1 /* ( */, 1 /* ) */, 1 /* * */, 1 /* + */, - 1 /* , */, 1 /* - */, 1 /* . */, 1 /* / */, - 1 /* 0 */, 1 /* 1 */, 1 /* 2 */, 1 /* 3 */, - 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, 1 /* 7 */, - 1 /* 8 */, 1 /* 9 */, 1 /* : */, 1 /* ; */, - 1 /* < */, 1 /* = */, 1 /* > */, 1 /* ? */, - 1 /* @ */, 1 /* A */, 1 /* B */, 1 /* C */, - 1 /* D */, 1 /* E */, 1 /* F */, 1 /* G */, - 1 /* H */, 1 /* I */, 1 /* J */, 1 /* K */, - 1 /* L */, 1 /* M */, 1 /* N */, 1 /* O */, - 1 /* P */, 1 /* Q */, 1 /* R */, 1 /* S */, - 1 /* T */, 1 /* U */, 1 /* V */, 1 /* W */, - 1 /* X */, 1 /* Y */, 1 /* Z */, 1 /* [ */, - 1 /* \ */, 1 /* ] */, 1 /* ^ */, 1 /* _ */, - 1 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, - 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, - 1 /* h */, 1 /* i */, 1 /* j */, 1 /* k */, - 1 /* l */, 1 /* m */, 1 /* n */, 1 /* o */, - 1 /* p */, 1 /* q */, 1 /* r */, 1 /* s */, - 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, - 1 /* x */, 1 /* y */, 1 /* z */, 1 /* { */, - 1 /* | */, 1 /* } */, 1 /* ~ */, 0 /* DEL */, - 1 /* 0x80 */, 1 /* 0x81 */, 1 /* 0x82 */, 1 /* 0x83 */, - 1 /* 0x84 */, 1 /* 0x85 */, 1 /* 0x86 */, 1 /* 0x87 */, - 1 /* 0x88 */, 1 /* 0x89 */, 1 /* 0x8a */, 1 /* 0x8b */, - 1 /* 0x8c */, 1 /* 0x8d */, 1 /* 0x8e */, 1 /* 0x8f */, - 1 /* 0x90 */, 1 /* 0x91 */, 1 /* 0x92 */, 1 /* 0x93 */, - 1 /* 0x94 */, 1 /* 0x95 */, 1 /* 0x96 */, 1 /* 0x97 */, - 1 /* 0x98 */, 1 /* 0x99 */, 1 /* 0x9a */, 1 /* 0x9b */, - 1 /* 0x9c */, 1 /* 0x9d */, 1 /* 0x9e */, 1 /* 0x9f */, - 1 /* 0xa0 */, 1 /* 0xa1 */, 1 /* 0xa2 */, 1 /* 0xa3 */, - 1 /* 0xa4 */, 1 /* 0xa5 */, 1 /* 0xa6 */, 1 /* 0xa7 */, - 1 /* 0xa8 */, 1 /* 0xa9 */, 1 /* 0xaa */, 1 /* 0xab */, - 1 /* 0xac */, 1 /* 0xad */, 1 /* 0xae */, 1 /* 0xaf */, - 1 /* 0xb0 */, 1 /* 0xb1 */, 1 /* 0xb2 */, 1 /* 0xb3 */, - 1 /* 0xb4 */, 1 /* 0xb5 */, 1 /* 0xb6 */, 1 /* 0xb7 */, - 1 /* 0xb8 */, 1 /* 0xb9 */, 1 /* 0xba */, 1 /* 0xbb */, - 1 /* 0xbc */, 1 /* 0xbd */, 1 /* 0xbe */, 1 /* 0xbf */, - 1 /* 0xc0 */, 1 /* 0xc1 */, 1 /* 0xc2 */, 1 /* 0xc3 */, - 1 /* 0xc4 */, 1 /* 0xc5 */, 1 /* 0xc6 */, 1 /* 0xc7 */, - 1 /* 0xc8 */, 1 /* 0xc9 */, 1 /* 0xca */, 1 /* 0xcb */, - 1 /* 0xcc */, 1 /* 0xcd */, 1 /* 0xce */, 1 /* 0xcf */, - 1 /* 0xd0 */, 1 /* 0xd1 */, 1 /* 0xd2 */, 1 /* 0xd3 */, - 1 /* 0xd4 */, 1 /* 0xd5 */, 1 /* 0xd6 */, 1 /* 0xd7 */, - 1 /* 0xd8 */, 1 /* 0xd9 */, 1 /* 0xda */, 1 /* 0xdb */, - 1 /* 0xdc */, 1 /* 0xdd */, 1 /* 0xde */, 1 /* 0xdf */, - 1 /* 0xe0 */, 1 /* 0xe1 */, 1 /* 0xe2 */, 1 /* 0xe3 */, - 1 /* 0xe4 */, 1 /* 0xe5 */, 1 /* 0xe6 */, 1 /* 0xe7 */, - 1 /* 0xe8 */, 1 /* 0xe9 */, 1 /* 0xea */, 1 /* 0xeb */, - 1 /* 0xec */, 1 /* 0xed */, 1 /* 0xee */, 1 /* 0xef */, - 1 /* 0xf0 */, 1 /* 0xf1 */, 1 /* 0xf2 */, 1 /* 0xf3 */, - 1 /* 0xf4 */, 1 /* 0xf5 */, 1 /* 0xf6 */, 1 /* 0xf7 */, - 1 /* 0xf8 */, 1 /* 0xf9 */, 1 /* 0xfa */, 1 /* 0xfb */, - 1 /* 0xfc */, 1 /* 0xfd */, 1 /* 0xfe */, 1 /* 0xff */ -}; - -static int check_path(const uint8_t *value, size_t len) { - const uint8_t *last; - for (last = value + len; value != last; ++value) { - if (!VALID_PATH_CHARS[*value]) { - return 0; - } - } - return 1; -} +static int http_check_nonempty_header_name(const uint8_t *name, size_t len); int nghttp3_http_on_header(nghttp3_http_state *http, nghttp3_qpack_nv *nv, int request, int trailers, int connect_protocol) { - int rv; - size_t i; - uint8_t c; - - if (!nghttp3_check_header_name(nv->name->base, nv->name->len)) { - if (nv->name->len > 0 && nv->name->base[0] == ':') { - return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; - } - /* header field name must be lower-cased without exception */ - for (i = 0; i < nv->name->len; ++i) { - c = nv->name->base[i]; - if ('A' <= c && c <= 'Z') { - return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; - } - } - /* When ignoring regular header fields, we set this flag so that - we still enforce header field ordering rule for pseudo header - fields. */ + if (nv->name->len == 0) { http->flags |= NGHTTP3_HTTP_FLAG_PSEUDO_HEADER_DISALLOWED; + return NGHTTP3_ERR_REMOVE_HTTP_HEADER; } - assert(nv->name->len > 0); - - switch (nv->token) { - case NGHTTP3_QPACK_TOKEN__METHOD: - rv = check_method(nv->value->base, nv->value->len); - break; - case NGHTTP3_QPACK_TOKEN__SCHEME: - rv = check_scheme(nv->value->base, nv->value->len); - break; - case NGHTTP3_QPACK_TOKEN__AUTHORITY: - case NGHTTP3_QPACK_TOKEN_HOST: - if (request) { - rv = check_authority(nv->value->base, nv->value->len); - } else { - /* The use of host field in response field section is - undefined. */ - rv = nghttp3_check_header_value(nv->value->base, nv->value->len); + if (nv->name->base[0] == ':') { + /* pseudo header must have a valid token. */ + if (nv->token == -1 || trailers || + (http->flags & NGHTTP3_HTTP_FLAG_PSEUDO_HEADER_DISALLOWED)) { + return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } - break; - case NGHTTP3_QPACK_TOKEN__PATH: - rv = check_path(nv->value->base, nv->value->len); - break; - default: - rv = nghttp3_check_header_value(nv->value->base, nv->value->len); - } + } else { + http->flags |= NGHTTP3_HTTP_FLAG_PSEUDO_HEADER_DISALLOWED; - if (rv == 0) { - if (nv->name->base[0] == ':') { + switch (http_check_nonempty_header_name(nv->name->base, nv->name->len)) { + case 0: + return NGHTTP3_ERR_REMOVE_HTTP_HEADER; + case -1: + /* header field name must be lower-cased without exception */ return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; } - /* When ignoring regular header fields, we set this flag so that - we still enforce header field ordering rule for pseudo header - fields. */ - http->flags |= NGHTTP3_HTTP_FLAG_PSEUDO_HEADER_DISALLOWED; - return NGHTTP3_ERR_REMOVE_HTTP_HEADER; } - if (request) { - rv = http_request_on_header(http, nv, trailers, connect_protocol); - } else { - rv = http_response_on_header(http, nv, trailers); - } + assert(nv->name->len > 0); - if (nv->name->base[0] != ':') { - switch (rv) { - case 0: - case NGHTTP3_ERR_REMOVE_HTTP_HEADER: - http->flags |= NGHTTP3_HTTP_FLAG_PSEUDO_HEADER_DISALLOWED; - break; - } + if (request) { + return http_request_on_header(http, nv, trailers, connect_protocol); } - return rv; + return http_response_on_header(http, nv, trailers); } int nghttp3_http_on_request_headers(nghttp3_http_state *http) { @@ -738,7 +702,7 @@ int nghttp3_http_on_request_headers(nghttp3_http_state *http) { http->content_length = -1; } else { if ((http->flags & NGHTTP3_HTTP_FLAG_REQ_HEADERS) != - NGHTTP3_HTTP_FLAG_REQ_HEADERS || + NGHTTP3_HTTP_FLAG_REQ_HEADERS || (http->flags & (NGHTTP3_HTTP_FLAG__AUTHORITY | NGHTTP3_HTTP_FLAG_HOST)) == 0) { return NGHTTP3_ERR_MALFORMED_HTTP_HEADER; @@ -829,70 +793,58 @@ void nghttp3_http_record_request_method(nghttp3_stream *stream, /* Generated by gennmchartbl.py */ static const int VALID_HD_NAME_CHARS[] = { - 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, - 0 /* EOT */, 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, - 0 /* BS */, 0 /* HT */, 0 /* LF */, 0 /* VT */, - 0 /* FF */, 0 /* CR */, 0 /* SO */, 0 /* SI */, - 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, - 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, - 0 /* CAN */, 0 /* EM */, 0 /* SUB */, 0 /* ESC */, - 0 /* FS */, 0 /* GS */, 0 /* RS */, 0 /* US */, - 0 /* SPC */, 1 /* ! */, 0 /* " */, 1 /* # */, - 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, - 0 /* ( */, 0 /* ) */, 1 /* * */, 1 /* + */, - 0 /* , */, 1 /* - */, 1 /* . */, 0 /* / */, - 1 /* 0 */, 1 /* 1 */, 1 /* 2 */, 1 /* 3 */, - 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, 1 /* 7 */, - 1 /* 8 */, 1 /* 9 */, 0 /* : */, 0 /* ; */, - 0 /* < */, 0 /* = */, 0 /* > */, 0 /* ? */, - 0 /* @ */, 0 /* A */, 0 /* B */, 0 /* C */, - 0 /* D */, 0 /* E */, 0 /* F */, 0 /* G */, - 0 /* H */, 0 /* I */, 0 /* J */, 0 /* K */, - 0 /* L */, 0 /* M */, 0 /* N */, 0 /* O */, - 0 /* P */, 0 /* Q */, 0 /* R */, 0 /* S */, - 0 /* T */, 0 /* U */, 0 /* V */, 0 /* W */, - 0 /* X */, 0 /* Y */, 0 /* Z */, 0 /* [ */, - 0 /* \ */, 0 /* ] */, 1 /* ^ */, 1 /* _ */, - 1 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, - 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, - 1 /* h */, 1 /* i */, 1 /* j */, 1 /* k */, - 1 /* l */, 1 /* m */, 1 /* n */, 1 /* o */, - 1 /* p */, 1 /* q */, 1 /* r */, 1 /* s */, - 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, - 1 /* x */, 1 /* y */, 1 /* z */, 0 /* { */, - 1 /* | */, 0 /* } */, 1 /* ~ */, 0 /* DEL */, - 0 /* 0x80 */, 0 /* 0x81 */, 0 /* 0x82 */, 0 /* 0x83 */, - 0 /* 0x84 */, 0 /* 0x85 */, 0 /* 0x86 */, 0 /* 0x87 */, - 0 /* 0x88 */, 0 /* 0x89 */, 0 /* 0x8a */, 0 /* 0x8b */, - 0 /* 0x8c */, 0 /* 0x8d */, 0 /* 0x8e */, 0 /* 0x8f */, - 0 /* 0x90 */, 0 /* 0x91 */, 0 /* 0x92 */, 0 /* 0x93 */, - 0 /* 0x94 */, 0 /* 0x95 */, 0 /* 0x96 */, 0 /* 0x97 */, - 0 /* 0x98 */, 0 /* 0x99 */, 0 /* 0x9a */, 0 /* 0x9b */, - 0 /* 0x9c */, 0 /* 0x9d */, 0 /* 0x9e */, 0 /* 0x9f */, - 0 /* 0xa0 */, 0 /* 0xa1 */, 0 /* 0xa2 */, 0 /* 0xa3 */, - 0 /* 0xa4 */, 0 /* 0xa5 */, 0 /* 0xa6 */, 0 /* 0xa7 */, - 0 /* 0xa8 */, 0 /* 0xa9 */, 0 /* 0xaa */, 0 /* 0xab */, - 0 /* 0xac */, 0 /* 0xad */, 0 /* 0xae */, 0 /* 0xaf */, - 0 /* 0xb0 */, 0 /* 0xb1 */, 0 /* 0xb2 */, 0 /* 0xb3 */, - 0 /* 0xb4 */, 0 /* 0xb5 */, 0 /* 0xb6 */, 0 /* 0xb7 */, - 0 /* 0xb8 */, 0 /* 0xb9 */, 0 /* 0xba */, 0 /* 0xbb */, - 0 /* 0xbc */, 0 /* 0xbd */, 0 /* 0xbe */, 0 /* 0xbf */, - 0 /* 0xc0 */, 0 /* 0xc1 */, 0 /* 0xc2 */, 0 /* 0xc3 */, - 0 /* 0xc4 */, 0 /* 0xc5 */, 0 /* 0xc6 */, 0 /* 0xc7 */, - 0 /* 0xc8 */, 0 /* 0xc9 */, 0 /* 0xca */, 0 /* 0xcb */, - 0 /* 0xcc */, 0 /* 0xcd */, 0 /* 0xce */, 0 /* 0xcf */, - 0 /* 0xd0 */, 0 /* 0xd1 */, 0 /* 0xd2 */, 0 /* 0xd3 */, - 0 /* 0xd4 */, 0 /* 0xd5 */, 0 /* 0xd6 */, 0 /* 0xd7 */, - 0 /* 0xd8 */, 0 /* 0xd9 */, 0 /* 0xda */, 0 /* 0xdb */, - 0 /* 0xdc */, 0 /* 0xdd */, 0 /* 0xde */, 0 /* 0xdf */, - 0 /* 0xe0 */, 0 /* 0xe1 */, 0 /* 0xe2 */, 0 /* 0xe3 */, - 0 /* 0xe4 */, 0 /* 0xe5 */, 0 /* 0xe6 */, 0 /* 0xe7 */, - 0 /* 0xe8 */, 0 /* 0xe9 */, 0 /* 0xea */, 0 /* 0xeb */, - 0 /* 0xec */, 0 /* 0xed */, 0 /* 0xee */, 0 /* 0xef */, - 0 /* 0xf0 */, 0 /* 0xf1 */, 0 /* 0xf2 */, 0 /* 0xf3 */, - 0 /* 0xf4 */, 0 /* 0xf5 */, 0 /* 0xf6 */, 0 /* 0xf7 */, - 0 /* 0xf8 */, 0 /* 0xf9 */, 0 /* 0xfa */, 0 /* 0xfb */, - 0 /* 0xfc */, 0 /* 0xfd */, 0 /* 0xfe */, 0 /* 0xff */ + 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, 0 /* EOT */, + 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, 0 /* BS */, 0 /* HT */, + 0 /* LF */, 0 /* VT */, 0 /* FF */, 0 /* CR */, 0 /* SO */, + 0 /* SI */, 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, + 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, 0 /* CAN */, + 0 /* EM */, 0 /* SUB */, 0 /* ESC */, 0 /* FS */, 0 /* GS */, + 0 /* RS */, 0 /* US */, 0 /* SPC */, 1 /* ! */, 0 /* " */, + 1 /* # */, 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, + 0 /* ( */, 0 /* ) */, 1 /* * */, 1 /* + */, 0 /* , */, + 1 /* - */, 1 /* . */, 0 /* / */, 1 /* 0 */, 1 /* 1 */, + 1 /* 2 */, 1 /* 3 */, 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, + 1 /* 7 */, 1 /* 8 */, 1 /* 9 */, 0 /* : */, 0 /* ; */, + 0 /* < */, 0 /* = */, 0 /* > */, 0 /* ? */, 0 /* @ */, + -1 /* A */, -1 /* B */, -1 /* C */, -1 /* D */, -1 /* E */, + -1 /* F */, -1 /* G */, -1 /* H */, -1 /* I */, -1 /* J */, + -1 /* K */, -1 /* L */, -1 /* M */, -1 /* N */, -1 /* O */, + -1 /* P */, -1 /* Q */, -1 /* R */, -1 /* S */, -1 /* T */, + -1 /* U */, -1 /* V */, -1 /* W */, -1 /* X */, -1 /* Y */, + -1 /* Z */, 0 /* [ */, 0 /* \ */, 0 /* ] */, 1 /* ^ */, + 1 /* _ */, 1 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, + 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, 1 /* h */, + 1 /* i */, 1 /* j */, 1 /* k */, 1 /* l */, 1 /* m */, + 1 /* n */, 1 /* o */, 1 /* p */, 1 /* q */, 1 /* r */, + 1 /* s */, 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, + 1 /* x */, 1 /* y */, 1 /* z */, 0 /* { */, 1 /* | */, + 0 /* } */, 1 /* ~ */, 0 /* DEL */, 0 /* 0x80 */, 0 /* 0x81 */, + 0 /* 0x82 */, 0 /* 0x83 */, 0 /* 0x84 */, 0 /* 0x85 */, 0 /* 0x86 */, + 0 /* 0x87 */, 0 /* 0x88 */, 0 /* 0x89 */, 0 /* 0x8a */, 0 /* 0x8b */, + 0 /* 0x8c */, 0 /* 0x8d */, 0 /* 0x8e */, 0 /* 0x8f */, 0 /* 0x90 */, + 0 /* 0x91 */, 0 /* 0x92 */, 0 /* 0x93 */, 0 /* 0x94 */, 0 /* 0x95 */, + 0 /* 0x96 */, 0 /* 0x97 */, 0 /* 0x98 */, 0 /* 0x99 */, 0 /* 0x9a */, + 0 /* 0x9b */, 0 /* 0x9c */, 0 /* 0x9d */, 0 /* 0x9e */, 0 /* 0x9f */, + 0 /* 0xa0 */, 0 /* 0xa1 */, 0 /* 0xa2 */, 0 /* 0xa3 */, 0 /* 0xa4 */, + 0 /* 0xa5 */, 0 /* 0xa6 */, 0 /* 0xa7 */, 0 /* 0xa8 */, 0 /* 0xa9 */, + 0 /* 0xaa */, 0 /* 0xab */, 0 /* 0xac */, 0 /* 0xad */, 0 /* 0xae */, + 0 /* 0xaf */, 0 /* 0xb0 */, 0 /* 0xb1 */, 0 /* 0xb2 */, 0 /* 0xb3 */, + 0 /* 0xb4 */, 0 /* 0xb5 */, 0 /* 0xb6 */, 0 /* 0xb7 */, 0 /* 0xb8 */, + 0 /* 0xb9 */, 0 /* 0xba */, 0 /* 0xbb */, 0 /* 0xbc */, 0 /* 0xbd */, + 0 /* 0xbe */, 0 /* 0xbf */, 0 /* 0xc0 */, 0 /* 0xc1 */, 0 /* 0xc2 */, + 0 /* 0xc3 */, 0 /* 0xc4 */, 0 /* 0xc5 */, 0 /* 0xc6 */, 0 /* 0xc7 */, + 0 /* 0xc8 */, 0 /* 0xc9 */, 0 /* 0xca */, 0 /* 0xcb */, 0 /* 0xcc */, + 0 /* 0xcd */, 0 /* 0xce */, 0 /* 0xcf */, 0 /* 0xd0 */, 0 /* 0xd1 */, + 0 /* 0xd2 */, 0 /* 0xd3 */, 0 /* 0xd4 */, 0 /* 0xd5 */, 0 /* 0xd6 */, + 0 /* 0xd7 */, 0 /* 0xd8 */, 0 /* 0xd9 */, 0 /* 0xda */, 0 /* 0xdb */, + 0 /* 0xdc */, 0 /* 0xdd */, 0 /* 0xde */, 0 /* 0xdf */, 0 /* 0xe0 */, + 0 /* 0xe1 */, 0 /* 0xe2 */, 0 /* 0xe3 */, 0 /* 0xe4 */, 0 /* 0xe5 */, + 0 /* 0xe6 */, 0 /* 0xe7 */, 0 /* 0xe8 */, 0 /* 0xe9 */, 0 /* 0xea */, + 0 /* 0xeb */, 0 /* 0xec */, 0 /* 0xed */, 0 /* 0xee */, 0 /* 0xef */, + 0 /* 0xf0 */, 0 /* 0xf1 */, 0 /* 0xf2 */, 0 /* 0xf3 */, 0 /* 0xf4 */, + 0 /* 0xf5 */, 0 /* 0xf6 */, 0 /* 0xf7 */, 0 /* 0xf8 */, 0 /* 0xf9 */, + 0 /* 0xfa */, 0 /* 0xfb */, 0 /* 0xfc */, 0 /* 0xfd */, 0 /* 0xfe */, + 0 /* 0xff */, }; int nghttp3_check_header_name(const uint8_t *name, size_t len) { @@ -915,76 +867,125 @@ int nghttp3_check_header_name(const uint8_t *name, size_t len) { return 1; } +/* http_check_nonempty_header_name validates regular header name + pointed by |name| of length |len|. |len| must be greater than + zero. This function returns 1 if it succeeds, or -1 if the name + contains a character in [A-Z], otherwise 0. */ +static int http_check_nonempty_header_name(const uint8_t *name, size_t len) { + const uint8_t *last; + int rv; + + for (last = name + len; name != last; ++name) { + rv = VALID_HD_NAME_CHARS[*name]; + if (rv != 1) { + return rv; + } + } + + return 1; +} + /* Generated by genvchartbl.py */ static const int VALID_HD_VALUE_CHARS[] = { - 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, - 0 /* EOT */, 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, - 0 /* BS */, 1 /* HT */, 0 /* LF */, 0 /* VT */, - 0 /* FF */, 0 /* CR */, 0 /* SO */, 0 /* SI */, - 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, - 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, - 0 /* CAN */, 0 /* EM */, 0 /* SUB */, 0 /* ESC */, - 0 /* FS */, 0 /* GS */, 0 /* RS */, 0 /* US */, - 1 /* SPC */, 1 /* ! */, 1 /* " */, 1 /* # */, - 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, - 1 /* ( */, 1 /* ) */, 1 /* * */, 1 /* + */, - 1 /* , */, 1 /* - */, 1 /* . */, 1 /* / */, - 1 /* 0 */, 1 /* 1 */, 1 /* 2 */, 1 /* 3 */, - 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, 1 /* 7 */, - 1 /* 8 */, 1 /* 9 */, 1 /* : */, 1 /* ; */, - 1 /* < */, 1 /* = */, 1 /* > */, 1 /* ? */, - 1 /* @ */, 1 /* A */, 1 /* B */, 1 /* C */, - 1 /* D */, 1 /* E */, 1 /* F */, 1 /* G */, - 1 /* H */, 1 /* I */, 1 /* J */, 1 /* K */, - 1 /* L */, 1 /* M */, 1 /* N */, 1 /* O */, - 1 /* P */, 1 /* Q */, 1 /* R */, 1 /* S */, - 1 /* T */, 1 /* U */, 1 /* V */, 1 /* W */, - 1 /* X */, 1 /* Y */, 1 /* Z */, 1 /* [ */, - 1 /* \ */, 1 /* ] */, 1 /* ^ */, 1 /* _ */, - 1 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, - 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, - 1 /* h */, 1 /* i */, 1 /* j */, 1 /* k */, - 1 /* l */, 1 /* m */, 1 /* n */, 1 /* o */, - 1 /* p */, 1 /* q */, 1 /* r */, 1 /* s */, - 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, - 1 /* x */, 1 /* y */, 1 /* z */, 1 /* { */, - 1 /* | */, 1 /* } */, 1 /* ~ */, 0 /* DEL */, - 1 /* 0x80 */, 1 /* 0x81 */, 1 /* 0x82 */, 1 /* 0x83 */, - 1 /* 0x84 */, 1 /* 0x85 */, 1 /* 0x86 */, 1 /* 0x87 */, - 1 /* 0x88 */, 1 /* 0x89 */, 1 /* 0x8a */, 1 /* 0x8b */, - 1 /* 0x8c */, 1 /* 0x8d */, 1 /* 0x8e */, 1 /* 0x8f */, - 1 /* 0x90 */, 1 /* 0x91 */, 1 /* 0x92 */, 1 /* 0x93 */, - 1 /* 0x94 */, 1 /* 0x95 */, 1 /* 0x96 */, 1 /* 0x97 */, - 1 /* 0x98 */, 1 /* 0x99 */, 1 /* 0x9a */, 1 /* 0x9b */, - 1 /* 0x9c */, 1 /* 0x9d */, 1 /* 0x9e */, 1 /* 0x9f */, - 1 /* 0xa0 */, 1 /* 0xa1 */, 1 /* 0xa2 */, 1 /* 0xa3 */, - 1 /* 0xa4 */, 1 /* 0xa5 */, 1 /* 0xa6 */, 1 /* 0xa7 */, - 1 /* 0xa8 */, 1 /* 0xa9 */, 1 /* 0xaa */, 1 /* 0xab */, - 1 /* 0xac */, 1 /* 0xad */, 1 /* 0xae */, 1 /* 0xaf */, - 1 /* 0xb0 */, 1 /* 0xb1 */, 1 /* 0xb2 */, 1 /* 0xb3 */, - 1 /* 0xb4 */, 1 /* 0xb5 */, 1 /* 0xb6 */, 1 /* 0xb7 */, - 1 /* 0xb8 */, 1 /* 0xb9 */, 1 /* 0xba */, 1 /* 0xbb */, - 1 /* 0xbc */, 1 /* 0xbd */, 1 /* 0xbe */, 1 /* 0xbf */, - 1 /* 0xc0 */, 1 /* 0xc1 */, 1 /* 0xc2 */, 1 /* 0xc3 */, - 1 /* 0xc4 */, 1 /* 0xc5 */, 1 /* 0xc6 */, 1 /* 0xc7 */, - 1 /* 0xc8 */, 1 /* 0xc9 */, 1 /* 0xca */, 1 /* 0xcb */, - 1 /* 0xcc */, 1 /* 0xcd */, 1 /* 0xce */, 1 /* 0xcf */, - 1 /* 0xd0 */, 1 /* 0xd1 */, 1 /* 0xd2 */, 1 /* 0xd3 */, - 1 /* 0xd4 */, 1 /* 0xd5 */, 1 /* 0xd6 */, 1 /* 0xd7 */, - 1 /* 0xd8 */, 1 /* 0xd9 */, 1 /* 0xda */, 1 /* 0xdb */, - 1 /* 0xdc */, 1 /* 0xdd */, 1 /* 0xde */, 1 /* 0xdf */, - 1 /* 0xe0 */, 1 /* 0xe1 */, 1 /* 0xe2 */, 1 /* 0xe3 */, - 1 /* 0xe4 */, 1 /* 0xe5 */, 1 /* 0xe6 */, 1 /* 0xe7 */, - 1 /* 0xe8 */, 1 /* 0xe9 */, 1 /* 0xea */, 1 /* 0xeb */, - 1 /* 0xec */, 1 /* 0xed */, 1 /* 0xee */, 1 /* 0xef */, - 1 /* 0xf0 */, 1 /* 0xf1 */, 1 /* 0xf2 */, 1 /* 0xf3 */, - 1 /* 0xf4 */, 1 /* 0xf5 */, 1 /* 0xf6 */, 1 /* 0xf7 */, - 1 /* 0xf8 */, 1 /* 0xf9 */, 1 /* 0xfa */, 1 /* 0xfb */, - 1 /* 0xfc */, 1 /* 0xfd */, 1 /* 0xfe */, 1 /* 0xff */ + 0 /* NUL */, 0 /* SOH */, 0 /* STX */, 0 /* ETX */, + 0 /* EOT */, 0 /* ENQ */, 0 /* ACK */, 0 /* BEL */, + 0 /* BS */, 1 /* HT */, 0 /* LF */, 0 /* VT */, + 0 /* FF */, 0 /* CR */, 0 /* SO */, 0 /* SI */, + 0 /* DLE */, 0 /* DC1 */, 0 /* DC2 */, 0 /* DC3 */, + 0 /* DC4 */, 0 /* NAK */, 0 /* SYN */, 0 /* ETB */, + 0 /* CAN */, 0 /* EM */, 0 /* SUB */, 0 /* ESC */, + 0 /* FS */, 0 /* GS */, 0 /* RS */, 0 /* US */, + 1 /* SPC */, 1 /* ! */, 1 /* " */, 1 /* # */, + 1 /* $ */, 1 /* % */, 1 /* & */, 1 /* ' */, + 1 /* ( */, 1 /* ) */, 1 /* * */, 1 /* + */, + 1 /* , */, 1 /* - */, 1 /* . */, 1 /* / */, + 1 /* 0 */, 1 /* 1 */, 1 /* 2 */, 1 /* 3 */, + 1 /* 4 */, 1 /* 5 */, 1 /* 6 */, 1 /* 7 */, + 1 /* 8 */, 1 /* 9 */, 1 /* : */, 1 /* ; */, + 1 /* < */, 1 /* = */, 1 /* > */, 1 /* ? */, + 1 /* @ */, 1 /* A */, 1 /* B */, 1 /* C */, + 1 /* D */, 1 /* E */, 1 /* F */, 1 /* G */, + 1 /* H */, 1 /* I */, 1 /* J */, 1 /* K */, + 1 /* L */, 1 /* M */, 1 /* N */, 1 /* O */, + 1 /* P */, 1 /* Q */, 1 /* R */, 1 /* S */, + 1 /* T */, 1 /* U */, 1 /* V */, 1 /* W */, + 1 /* X */, 1 /* Y */, 1 /* Z */, 1 /* [ */, + 1 /* \ */, 1 /* ] */, 1 /* ^ */, 1 /* _ */, + 1 /* ` */, 1 /* a */, 1 /* b */, 1 /* c */, + 1 /* d */, 1 /* e */, 1 /* f */, 1 /* g */, + 1 /* h */, 1 /* i */, 1 /* j */, 1 /* k */, + 1 /* l */, 1 /* m */, 1 /* n */, 1 /* o */, + 1 /* p */, 1 /* q */, 1 /* r */, 1 /* s */, + 1 /* t */, 1 /* u */, 1 /* v */, 1 /* w */, + 1 /* x */, 1 /* y */, 1 /* z */, 1 /* { */, + 1 /* | */, 1 /* } */, 1 /* ~ */, 0 /* DEL */, + 1 /* 0x80 */, 1 /* 0x81 */, 1 /* 0x82 */, 1 /* 0x83 */, + 1 /* 0x84 */, 1 /* 0x85 */, 1 /* 0x86 */, 1 /* 0x87 */, + 1 /* 0x88 */, 1 /* 0x89 */, 1 /* 0x8a */, 1 /* 0x8b */, + 1 /* 0x8c */, 1 /* 0x8d */, 1 /* 0x8e */, 1 /* 0x8f */, + 1 /* 0x90 */, 1 /* 0x91 */, 1 /* 0x92 */, 1 /* 0x93 */, + 1 /* 0x94 */, 1 /* 0x95 */, 1 /* 0x96 */, 1 /* 0x97 */, + 1 /* 0x98 */, 1 /* 0x99 */, 1 /* 0x9a */, 1 /* 0x9b */, + 1 /* 0x9c */, 1 /* 0x9d */, 1 /* 0x9e */, 1 /* 0x9f */, + 1 /* 0xa0 */, 1 /* 0xa1 */, 1 /* 0xa2 */, 1 /* 0xa3 */, + 1 /* 0xa4 */, 1 /* 0xa5 */, 1 /* 0xa6 */, 1 /* 0xa7 */, + 1 /* 0xa8 */, 1 /* 0xa9 */, 1 /* 0xaa */, 1 /* 0xab */, + 1 /* 0xac */, 1 /* 0xad */, 1 /* 0xae */, 1 /* 0xaf */, + 1 /* 0xb0 */, 1 /* 0xb1 */, 1 /* 0xb2 */, 1 /* 0xb3 */, + 1 /* 0xb4 */, 1 /* 0xb5 */, 1 /* 0xb6 */, 1 /* 0xb7 */, + 1 /* 0xb8 */, 1 /* 0xb9 */, 1 /* 0xba */, 1 /* 0xbb */, + 1 /* 0xbc */, 1 /* 0xbd */, 1 /* 0xbe */, 1 /* 0xbf */, + 1 /* 0xc0 */, 1 /* 0xc1 */, 1 /* 0xc2 */, 1 /* 0xc3 */, + 1 /* 0xc4 */, 1 /* 0xc5 */, 1 /* 0xc6 */, 1 /* 0xc7 */, + 1 /* 0xc8 */, 1 /* 0xc9 */, 1 /* 0xca */, 1 /* 0xcb */, + 1 /* 0xcc */, 1 /* 0xcd */, 1 /* 0xce */, 1 /* 0xcf */, + 1 /* 0xd0 */, 1 /* 0xd1 */, 1 /* 0xd2 */, 1 /* 0xd3 */, + 1 /* 0xd4 */, 1 /* 0xd5 */, 1 /* 0xd6 */, 1 /* 0xd7 */, + 1 /* 0xd8 */, 1 /* 0xd9 */, 1 /* 0xda */, 1 /* 0xdb */, + 1 /* 0xdc */, 1 /* 0xdd */, 1 /* 0xde */, 1 /* 0xdf */, + 1 /* 0xe0 */, 1 /* 0xe1 */, 1 /* 0xe2 */, 1 /* 0xe3 */, + 1 /* 0xe4 */, 1 /* 0xe5 */, 1 /* 0xe6 */, 1 /* 0xe7 */, + 1 /* 0xe8 */, 1 /* 0xe9 */, 1 /* 0xea */, 1 /* 0xeb */, + 1 /* 0xec */, 1 /* 0xed */, 1 /* 0xee */, 1 /* 0xef */, + 1 /* 0xf0 */, 1 /* 0xf1 */, 1 /* 0xf2 */, 1 /* 0xf3 */, + 1 /* 0xf4 */, 1 /* 0xf5 */, 1 /* 0xf6 */, 1 /* 0xf7 */, + 1 /* 0xf8 */, 1 /* 0xf9 */, 1 /* 0xfa */, 1 /* 0xfb */, + 1 /* 0xfc */, 1 /* 0xfd */, 1 /* 0xfe */, 1 /* 0xff */ }; +#ifdef __AVX2__ +static int contains_bad_header_value_char_avx2(const uint8_t *first, + const uint8_t *last) { + const __m256i ctll = _mm256_set1_epi8(0x00 - 1); + const __m256i ctlr = _mm256_set1_epi8(0x1f + 1); + const __m256i ht = _mm256_set1_epi8('\t'); + const __m256i del = _mm256_set1_epi8(0x7f); + __m256i s, x; + uint32_t m; + + for (; first != last; first += 32) { + s = _mm256_loadu_si256((void *)first); + + x = _mm256_andnot_si256( + _mm256_cmpeq_epi8(s, ht), + _mm256_and_si256(_mm256_cmpgt_epi8(s, ctll), _mm256_cmpgt_epi8(ctlr, s))); + x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x); + + m = (uint32_t)_mm256_movemask_epi8(x); + if (m) { + return 1; + } + } + + return 0; +} +#endif /* __AVX2__ */ + int nghttp3_check_header_value(const uint8_t *value, size_t len) { const uint8_t *last; +#ifdef __AVX2__ + const uint8_t *last32; +#endif /* __AVX2__ */ switch (len) { case 0: @@ -997,7 +998,20 @@ int nghttp3_check_header_value(const uint8_t *value, size_t len) { } } - for (last = value + len; value != last; ++value) { + last = value + len; + +#ifdef __AVX2__ + if (len >= 32) { + last32 = value + (len & ~0x1fu); + if (contains_bad_header_value_char_avx2(value, last32)) { + return 0; + } + + value = last32; + } +#endif /* __AVX2__ */ + + for (; value != last; ++value) { if (!VALID_HD_VALUE_CHARS[*value]) { return 0; } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_http.h b/deps/ngtcp2/nghttp3/lib/nghttp3_http.h index 575d9c267e1b68..f0bfc69fbade75 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_http.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_http.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -170,4 +170,4 @@ int nghttp3_http_parse_priority(nghttp3_pri *dest, const uint8_t *value, int nghttp3_pri_eq(const nghttp3_pri *a, const nghttp3_pri *b); -#endif /* NGHTTP3_HTTP_H */ +#endif /* !defined(NGHTTP3_HTTP_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_idtr.c b/deps/ngtcp2/nghttp3/lib/nghttp3_idtr.c index dc34841fe0f8ef..ffed3064d2b791 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_idtr.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_idtr.c @@ -27,10 +27,8 @@ #include -void nghttp3_idtr_init(nghttp3_idtr *idtr, int server, const nghttp3_mem *mem) { +void nghttp3_idtr_init(nghttp3_idtr *idtr, const nghttp3_mem *mem) { nghttp3_gaptr_init(&idtr->gap, mem); - - idtr->server = server; } void nghttp3_idtr_free(nghttp3_idtr *idtr) { @@ -42,8 +40,7 @@ void nghttp3_idtr_free(nghttp3_idtr *idtr) { } /* - * id_from_stream_id translates |stream_id| to id space used by - * nghttp3_idtr. + * id_from_stream_id translates |stream_id| to an internal ID. */ static uint64_t id_from_stream_id(int64_t stream_id) { return (uint64_t)(stream_id >> 2); @@ -52,9 +49,6 @@ static uint64_t id_from_stream_id(int64_t stream_id) { int nghttp3_idtr_open(nghttp3_idtr *idtr, int64_t stream_id) { uint64_t q; - assert((idtr->server && (stream_id % 2)) || - (!idtr->server && (stream_id % 2)) == 0); - q = id_from_stream_id(stream_id); if (nghttp3_gaptr_is_pushed(&idtr->gap, q, 1)) { @@ -67,14 +61,7 @@ int nghttp3_idtr_open(nghttp3_idtr *idtr, int64_t stream_id) { int nghttp3_idtr_is_open(nghttp3_idtr *idtr, int64_t stream_id) { uint64_t q; - assert((idtr->server && (stream_id % 2)) || - (!idtr->server && (stream_id % 2)) == 0); - q = id_from_stream_id(stream_id); return nghttp3_gaptr_is_pushed(&idtr->gap, q, 1); } - -uint64_t nghttp3_idtr_first_gap(nghttp3_idtr *idtr) { - return nghttp3_gaptr_first_gap_offset(&idtr->gap); -} diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_idtr.h b/deps/ngtcp2/nghttp3/lib/nghttp3_idtr.h index ea3346c9a964c4..8ba15fc810cdcb 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_idtr.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_idtr.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -39,21 +39,17 @@ * nghttp3_idtr tracks the usage of stream ID. */ typedef struct nghttp3_idtr { - /* gap maintains the range of ID which is not used yet. Initially, - its range is [0, UINT64_MAX). */ + /* gap maintains the range of an internal ID which is not used yet. + Initially, its range is [0, UINT64_MAX). The internal ID and + stream ID are in the different number spaces. See + id_from_stream_id to convert a stream ID to an internal ID. */ nghttp3_gaptr gap; - /* server is nonzero if this object records server initiated stream - ID. */ - int server; } nghttp3_idtr; /* * nghttp3_idtr_init initializes |idtr|. - * - * If this object records server initiated ID (even number), set - * |server| to nonzero. */ -void nghttp3_idtr_init(nghttp3_idtr *idtr, int server, const nghttp3_mem *mem); +void nghttp3_idtr_init(nghttp3_idtr *idtr, const nghttp3_mem *mem); /* * nghttp3_idtr_free frees resources allocated for |idtr|. @@ -61,30 +57,21 @@ void nghttp3_idtr_init(nghttp3_idtr *idtr, int server, const nghttp3_mem *mem); void nghttp3_idtr_free(nghttp3_idtr *idtr); /* - * nghttp3_idtr_open claims that |stream_id| is in used. + * nghttp3_idtr_open claims that |stream_id| is in use. * * It returns 0 if it succeeds, or one of the following negative error * codes: * * NGHTTP3_ERR_STREAM_IN_USE - * ID has already been used. + * |stream_id| has already been used. * NGHTTP3_ERR_NOMEM * Out of memory. */ int nghttp3_idtr_open(nghttp3_idtr *idtr, int64_t stream_id); /* - * nghttp3_idtr_open tells whether ID |stream_id| is in used or not. - * - * It returns nonzero if |stream_id| is used. + * nghttp3_idtr_open returns nonzero if |stream_id| is in use. */ int nghttp3_idtr_is_open(nghttp3_idtr *idtr, int64_t stream_id); -/* - * nghttp3_idtr_first_gap returns the first id of first gap. If there - * is no gap, it returns UINT64_MAX. The returned id is an id space - * used in this object internally, and not stream ID. - */ -uint64_t nghttp3_idtr_first_gap(nghttp3_idtr *idtr); - -#endif /* NGHTTP3_IDTR_H */ +#endif /* !defined(NGHTTP3_IDTR_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_ksl.c b/deps/ngtcp2/nghttp3/lib/nghttp3_ksl.c index d7420a5d8a1e5d..a3b5fbcb05f4f3 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_ksl.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_ksl.c @@ -39,8 +39,10 @@ static nghttp3_ksl_blk null_blk = {{{NULL, NULL, 0, 0, {0}}}}; nghttp3_objalloc_def(ksl_blk, nghttp3_ksl_blk, oplent); static size_t ksl_nodelen(size_t keylen) { - return (sizeof(nghttp3_ksl_node) + keylen - sizeof(uint64_t) + 0xfu) & - ~(uintptr_t)0xfu; + assert(keylen >= sizeof(uint64_t)); + + return (sizeof(nghttp3_ksl_node) + keylen - sizeof(uint64_t) + 0x7u) & + ~(uintptr_t)0x7u; } static size_t ksl_blklen(size_t nodelen) { @@ -61,15 +63,14 @@ void nghttp3_ksl_init(nghttp3_ksl *ksl, nghttp3_ksl_compar compar, size_t nodelen = ksl_nodelen(keylen); nghttp3_objalloc_init(&ksl->blkalloc, - ((ksl_blklen(nodelen) + 0xfu) & ~(uintptr_t)0xfu) * 8, - mem); + (ksl_blklen(nodelen) + 0xfu) & ~(uintptr_t)0xfu, mem); ksl->head = NULL; ksl->front = ksl->back = NULL; ksl->compar = compar; + ksl->n = 0; ksl->keylen = keylen; ksl->nodelen = nodelen; - ksl->n = 0; } static nghttp3_ksl_blk *ksl_blk_objalloc_new(nghttp3_ksl *ksl) { @@ -83,6 +84,7 @@ static void ksl_blk_objalloc_del(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk) { static int ksl_head_init(nghttp3_ksl *ksl) { nghttp3_ksl_blk *head = ksl_blk_objalloc_new(ksl); + if (!head) { return NGHTTP3_ERR_NOMEM; } @@ -112,7 +114,7 @@ static void ksl_free_blk(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk) { ksl_blk_objalloc_del(ksl, blk); } -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ void nghttp3_ksl_free(nghttp3_ksl *ksl) { if (!ksl || !ksl->head) { @@ -121,7 +123,7 @@ void nghttp3_ksl_free(nghttp3_ksl *ksl) { #ifdef NOMEMPOOL ksl_free_blk(ksl, ksl->head); -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ nghttp3_objalloc_free(&ksl->blkalloc); } @@ -144,21 +146,22 @@ static nghttp3_ksl_blk *ksl_split_blk(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk) { rblk->next = blk->next; blk->next = rblk; + if (rblk->next) { rblk->next->prev = rblk; } else if (ksl->back == blk) { ksl->back = rblk; } + rblk->prev = blk; rblk->leaf = blk->leaf; rblk->n = blk->n / 2; + blk->n -= rblk->n; - memcpy(rblk->nodes, blk->nodes + ksl->nodelen * (blk->n - rblk->n), + memcpy(rblk->nodes, blk->nodes + ksl->nodelen * blk->n, ksl->nodelen * rblk->n); - blk->n -= rblk->n; - assert(blk->n >= NGHTTP3_KSL_MIN_NBLK); assert(rblk->n >= NGHTTP3_KSL_MIN_NBLK); @@ -174,7 +177,7 @@ static nghttp3_ksl_blk *ksl_split_blk(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk) { * codes: * * NGHTTP3_ERR_NOMEM - * Out of memory. + * Out of memory. */ static int ksl_split_node(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i) { nghttp3_ksl_node *node; @@ -210,7 +213,7 @@ static int ksl_split_node(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i) { * codes: * * NGHTTP3_ERR_NOMEM - * Out of memory. + * Out of memory. */ static int ksl_split_head(nghttp3_ksl *ksl) { nghttp3_ksl_blk *rblk = NULL, *lblk, *nhead = NULL; @@ -224,10 +227,12 @@ static int ksl_split_head(nghttp3_ksl *ksl) { lblk = ksl->head; nhead = ksl_blk_objalloc_new(ksl); + if (nhead == NULL) { ksl_blk_objalloc_del(ksl, rblk); return NGHTTP3_ERR_NOMEM; } + nhead->next = nhead->prev = NULL; nhead->n = 2; nhead->leaf = 0; @@ -248,9 +253,9 @@ static int ksl_split_head(nghttp3_ksl *ksl) { } /* - * insert_node inserts a node whose key is |key| with the associated - * |data| at the index of |i|. This function assumes that the number - * of nodes contained by |blk| is strictly less than + * ksl_insert_node inserts a node whose key is |key| with the + * associated |data| at the index of |i|. This function assumes that + * the number of nodes contained by |blk| is strictly less than * NGHTTP3_KSL_MAX_NBLK. */ static void ksl_insert_node(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i, @@ -269,9 +274,9 @@ static void ksl_insert_node(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i, ++blk->n; } -static size_t ksl_bsearch(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, - const nghttp3_ksl_key *key, - nghttp3_ksl_compar compar) { +static size_t ksl_search(const nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, + const nghttp3_ksl_key *key, + nghttp3_ksl_compar compar) { size_t i; nghttp3_ksl_node *node; @@ -297,18 +302,17 @@ int nghttp3_ksl_insert(nghttp3_ksl *ksl, nghttp3_ksl_it *it, } } - blk = ksl->head; - - if (blk->n == NGHTTP3_KSL_MAX_NBLK) { + if (ksl->head->n == NGHTTP3_KSL_MAX_NBLK) { rv = ksl_split_head(ksl); if (rv != 0) { return rv; } - blk = ksl->head; } + blk = ksl->head; + for (;;) { - i = ksl_bsearch(ksl, blk, key, ksl->compar); + i = ksl_search(ksl, blk, key, ksl->compar); if (blk->leaf) { if (i < blk->n && @@ -316,13 +320,17 @@ int nghttp3_ksl_insert(nghttp3_ksl *ksl, nghttp3_ksl_it *it, if (it) { *it = nghttp3_ksl_end(ksl); } + return NGHTTP3_ERR_INVALID_ARGUMENT; } + ksl_insert_node(ksl, blk, i, key, data); ++ksl->n; + if (it) { nghttp3_ksl_it_init(it, ksl, blk, i); } + return 0; } @@ -335,16 +343,21 @@ int nghttp3_ksl_insert(nghttp3_ksl *ksl, nghttp3_ksl_it *it, if (rv != 0) { return rv; } + node = nghttp3_ksl_nth_node(ksl, blk, blk->n - 1); } + ksl_node_set_key(ksl, node, key); blk = node->blk; } + ksl_insert_node(ksl, blk, blk->n, key, data); ++ksl->n; + if (it) { nghttp3_ksl_it_init(it, ksl, blk, blk->n - 1); } + return 0; } @@ -355,8 +368,10 @@ int nghttp3_ksl_insert(nghttp3_ksl *ksl, nghttp3_ksl_it *it, if (rv != 0) { return rv; } + if (ksl->compar((nghttp3_ksl_key *)node->key, key)) { node = nghttp3_ksl_nth_node(ksl, blk, i + 1); + if (ksl->compar((nghttp3_ksl_key *)node->key, key)) { ksl_node_set_key(ksl, node, key); } @@ -382,19 +397,22 @@ static void ksl_remove_node(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i) { * ksl_merge_node merges 2 nodes which are the nodes at the index of * |i| and |i + 1|. * - * If |blk| is the direct descendant of head (root) block and the head - * block contains just 2 nodes, the merged block becomes head block, - * which decreases the height of |ksl| by 1. + * If |blk| is the head (root) block and it contains just 2 nodes + * before merging nodes, the merged block becomes head block, which + * decreases the height of |ksl| by 1. * * This function returns the pointer to the merged block. */ static nghttp3_ksl_blk *ksl_merge_node(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i) { + nghttp3_ksl_node *lnode; nghttp3_ksl_blk *lblk, *rblk; assert(i + 1 < blk->n); - lblk = nghttp3_ksl_nth_node(ksl, blk, i)->blk; + lnode = nghttp3_ksl_nth_node(ksl, blk, i); + + lblk = lnode->blk; rblk = nghttp3_ksl_nth_node(ksl, blk, i + 1)->blk; assert(lblk->n + rblk->n < NGHTTP3_KSL_MAX_NBLK); @@ -404,6 +422,7 @@ static nghttp3_ksl_blk *ksl_merge_node(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, lblk->n += rblk->n; lblk->next = rblk->next; + if (lblk->next) { lblk->next->prev = lblk; } else if (ksl->back == rblk) { @@ -417,7 +436,7 @@ static nghttp3_ksl_blk *ksl_merge_node(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, ksl->head = lblk; } else { ksl_remove_node(ksl, blk, i + 1); - ksl_node_set_key(ksl, nghttp3_ksl_nth_node(ksl, blk, i), + ksl_node_set_key(ksl, lnode, nghttp3_ksl_nth_node(ksl, lblk, lblk->n - 1)->key); } @@ -431,6 +450,7 @@ static nghttp3_ksl_blk *ksl_merge_node(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, */ static void ksl_shift_left(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i) { nghttp3_ksl_node *lnode, *rnode; + nghttp3_ksl_blk *lblk, *rblk; size_t n; assert(i > 0); @@ -438,36 +458,37 @@ static void ksl_shift_left(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i) { lnode = nghttp3_ksl_nth_node(ksl, blk, i - 1); rnode = nghttp3_ksl_nth_node(ksl, blk, i); - assert(lnode->blk->n < NGHTTP3_KSL_MAX_NBLK); - assert(rnode->blk->n > NGHTTP3_KSL_MIN_NBLK); + lblk = lnode->blk; + rblk = rnode->blk; + + assert(lblk->n < NGHTTP3_KSL_MAX_NBLK); + assert(rblk->n > NGHTTP3_KSL_MIN_NBLK); - n = (lnode->blk->n + rnode->blk->n + 1) / 2 - lnode->blk->n; + n = (lblk->n + rblk->n + 1) / 2 - lblk->n; assert(n > 0); - assert(lnode->blk->n <= NGHTTP3_KSL_MAX_NBLK - n); - assert(rnode->blk->n >= NGHTTP3_KSL_MIN_NBLK + n); + assert(lblk->n <= NGHTTP3_KSL_MAX_NBLK - n); + assert(rblk->n >= NGHTTP3_KSL_MIN_NBLK + n); - memcpy(lnode->blk->nodes + ksl->nodelen * lnode->blk->n, rnode->blk->nodes, - ksl->nodelen * n); + memcpy(lblk->nodes + ksl->nodelen * lblk->n, rblk->nodes, ksl->nodelen * n); - lnode->blk->n += (uint32_t)n; - rnode->blk->n -= (uint32_t)n; + lblk->n += (uint32_t)n; + rblk->n -= (uint32_t)n; - ksl_node_set_key( - ksl, lnode, - nghttp3_ksl_nth_node(ksl, lnode->blk, lnode->blk->n - 1)->key); + ksl_node_set_key(ksl, lnode, + nghttp3_ksl_nth_node(ksl, lblk, lblk->n - 1)->key); - memmove(rnode->blk->nodes, rnode->blk->nodes + ksl->nodelen * n, - ksl->nodelen * rnode->blk->n); + memmove(rblk->nodes, rblk->nodes + ksl->nodelen * n, ksl->nodelen * rblk->n); } /* * ksl_shift_right moves the last nodes in blk->nodes[i]->blk->nodes * to blk->nodes[i + 1]->blk->nodes in a manner that they have the - * same amount of nodes as much as possible.. + * same amount of nodes as much as possible. */ static void ksl_shift_right(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i) { nghttp3_ksl_node *lnode, *rnode; + nghttp3_ksl_blk *lblk, *rblk; size_t n; assert(i < blk->n - 1); @@ -475,27 +496,27 @@ static void ksl_shift_right(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t i) { lnode = nghttp3_ksl_nth_node(ksl, blk, i); rnode = nghttp3_ksl_nth_node(ksl, blk, i + 1); - assert(lnode->blk->n > NGHTTP3_KSL_MIN_NBLK); - assert(rnode->blk->n < NGHTTP3_KSL_MAX_NBLK); + lblk = lnode->blk; + rblk = rnode->blk; - n = (lnode->blk->n + rnode->blk->n + 1) / 2 - rnode->blk->n; + assert(lblk->n > NGHTTP3_KSL_MIN_NBLK); + assert(rblk->n < NGHTTP3_KSL_MAX_NBLK); + + n = (lblk->n + rblk->n + 1) / 2 - rblk->n; assert(n > 0); - assert(lnode->blk->n >= NGHTTP3_KSL_MIN_NBLK + n); - assert(rnode->blk->n <= NGHTTP3_KSL_MAX_NBLK - n); + assert(lblk->n >= NGHTTP3_KSL_MIN_NBLK + n); + assert(rblk->n <= NGHTTP3_KSL_MAX_NBLK - n); - memmove(rnode->blk->nodes + ksl->nodelen * n, rnode->blk->nodes, - ksl->nodelen * rnode->blk->n); + memmove(rblk->nodes + ksl->nodelen * n, rblk->nodes, ksl->nodelen * rblk->n); - rnode->blk->n += (uint32_t)n; - lnode->blk->n -= (uint32_t)n; + rblk->n += (uint32_t)n; + lblk->n -= (uint32_t)n; - memcpy(rnode->blk->nodes, lnode->blk->nodes + ksl->nodelen * lnode->blk->n, - ksl->nodelen * n); + memcpy(rblk->nodes, lblk->nodes + ksl->nodelen * lblk->n, ksl->nodelen * n); - ksl_node_set_key( - ksl, lnode, - nghttp3_ksl_nth_node(ksl, lnode->blk, lnode->blk->n - 1)->key); + ksl_node_set_key(ksl, lnode, + nghttp3_ksl_nth_node(ksl, lblk, lblk->n - 1)->key); } /* @@ -539,23 +560,24 @@ int nghttp3_ksl_remove(nghttp3_ksl *ksl, nghttp3_ksl_it *it, nghttp3_ksl_node *node; size_t i; - if (!ksl->head) { + if (!blk) { return NGHTTP3_ERR_INVALID_ARGUMENT; } if (!blk->leaf && blk->n == 2 && nghttp3_ksl_nth_node(ksl, blk, 0)->blk->n == NGHTTP3_KSL_MIN_NBLK && nghttp3_ksl_nth_node(ksl, blk, 1)->blk->n == NGHTTP3_KSL_MIN_NBLK) { - blk = ksl_merge_node(ksl, ksl->head, 0); + blk = ksl_merge_node(ksl, blk, 0); } for (;;) { - i = ksl_bsearch(ksl, blk, key, ksl->compar); + i = ksl_search(ksl, blk, key, ksl->compar); if (i == blk->n) { if (it) { *it = nghttp3_ksl_end(ksl); } + return NGHTTP3_ERR_INVALID_ARGUMENT; } @@ -564,10 +586,13 @@ int nghttp3_ksl_remove(nghttp3_ksl *ksl, nghttp3_ksl_it *it, if (it) { *it = nghttp3_ksl_end(ksl); } + return NGHTTP3_ERR_INVALID_ARGUMENT; } + ksl_remove_node(ksl, blk, i); --ksl->n; + if (it) { if (blk->n == i && blk->next) { nghttp3_ksl_it_init(it, ksl, blk->next, 0); @@ -575,6 +600,7 @@ int nghttp3_ksl_remove(nghttp3_ksl *ksl, nghttp3_ksl_it *it, nghttp3_ksl_it_init(it, ksl, blk, i); } } + return 0; } @@ -591,6 +617,7 @@ int nghttp3_ksl_remove(nghttp3_ksl *ksl, nghttp3_ksl_it *it, nghttp3_ksl_nth_node(ksl, blk, i + 1)->blk->n > NGHTTP3_KSL_MIN_NBLK) { ksl_shift_left(ksl, blk, i + 1); blk = node->blk; + continue; } @@ -598,6 +625,7 @@ int nghttp3_ksl_remove(nghttp3_ksl *ksl, nghttp3_ksl_it *it, nghttp3_ksl_nth_node(ksl, blk, i - 1)->blk->n > NGHTTP3_KSL_MIN_NBLK) { ksl_shift_right(ksl, blk, i - 1); blk = node->blk; + continue; } @@ -612,48 +640,12 @@ int nghttp3_ksl_remove(nghttp3_ksl *ksl, nghttp3_ksl_it *it, } } -nghttp3_ksl_it nghttp3_ksl_lower_bound(nghttp3_ksl *ksl, +nghttp3_ksl_it nghttp3_ksl_lower_bound(const nghttp3_ksl *ksl, const nghttp3_ksl_key *key) { - nghttp3_ksl_blk *blk = ksl->head; - nghttp3_ksl_it it; - size_t i; - - if (!blk) { - nghttp3_ksl_it_init(&it, ksl, &null_blk, 0); - return it; - } - - for (;;) { - i = ksl_bsearch(ksl, blk, key, ksl->compar); - - if (blk->leaf) { - if (i == blk->n && blk->next) { - blk = blk->next; - i = 0; - } - nghttp3_ksl_it_init(&it, ksl, blk, i); - return it; - } - - if (i == blk->n) { - /* This happens if descendant has smaller key. Fast forward to - find last node in this subtree. */ - for (; !blk->leaf; blk = nghttp3_ksl_nth_node(ksl, blk, blk->n - 1)->blk) - ; - if (blk->next) { - blk = blk->next; - i = 0; - } else { - i = blk->n; - } - nghttp3_ksl_it_init(&it, ksl, blk, i); - return it; - } - blk = nghttp3_ksl_nth_node(ksl, blk, i)->blk; - } + return nghttp3_ksl_lower_bound_compar(ksl, key, ksl->compar); } -nghttp3_ksl_it nghttp3_ksl_lower_bound_compar(nghttp3_ksl *ksl, +nghttp3_ksl_it nghttp3_ksl_lower_bound_compar(const nghttp3_ksl *ksl, const nghttp3_ksl_key *key, nghttp3_ksl_compar compar) { nghttp3_ksl_blk *blk = ksl->head; @@ -666,14 +658,16 @@ nghttp3_ksl_it nghttp3_ksl_lower_bound_compar(nghttp3_ksl *ksl, } for (;;) { - i = ksl_bsearch(ksl, blk, key, compar); + i = ksl_search(ksl, blk, key, compar); if (blk->leaf) { if (i == blk->n && blk->next) { blk = blk->next; i = 0; } + nghttp3_ksl_it_init(&it, ksl, blk, i); + return it; } @@ -682,15 +676,19 @@ nghttp3_ksl_it nghttp3_ksl_lower_bound_compar(nghttp3_ksl *ksl, find last node in this subtree. */ for (; !blk->leaf; blk = nghttp3_ksl_nth_node(ksl, blk, blk->n - 1)->blk) ; + if (blk->next) { blk = blk->next; i = 0; } else { i = blk->n; } + nghttp3_ksl_it_init(&it, ksl, blk, i); + return it; } + blk = nghttp3_ksl_nth_node(ksl, blk, i)->blk; } } @@ -704,7 +702,7 @@ void nghttp3_ksl_update_key(nghttp3_ksl *ksl, const nghttp3_ksl_key *old_key, assert(ksl->head); for (;;) { - i = ksl_bsearch(ksl, blk, old_key, ksl->compar); + i = ksl_search(ksl, blk, old_key, ksl->compar); assert(i < blk->n); node = nghttp3_ksl_nth_node(ksl, blk, i); @@ -712,6 +710,7 @@ void nghttp3_ksl_update_key(nghttp3_ksl *ksl, const nghttp3_ksl_key *old_key, if (blk->leaf) { assert(key_equal(ksl->compar, (nghttp3_ksl_key *)node->key, old_key)); ksl_node_set_key(ksl, node, new_key); + return; } @@ -724,7 +723,7 @@ void nghttp3_ksl_update_key(nghttp3_ksl *ksl, const nghttp3_ksl_key *old_key, } } -size_t nghttp3_ksl_len(nghttp3_ksl *ksl) { return ksl->n; } +size_t nghttp3_ksl_len(const nghttp3_ksl *ksl) { return ksl->n; } void nghttp3_ksl_clear(nghttp3_ksl *ksl) { if (!ksl->head) { @@ -733,7 +732,7 @@ void nghttp3_ksl_clear(nghttp3_ksl *ksl) { #ifdef NOMEMPOOL ksl_free_blk(ksl, ksl->head); -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ ksl->front = ksl->back = ksl->head = NULL; ksl->n = 0; @@ -742,7 +741,8 @@ void nghttp3_ksl_clear(nghttp3_ksl *ksl) { } #ifndef WIN32 -static void ksl_print(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t level) { +static void ksl_print(const nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, + size_t level) { size_t i; nghttp3_ksl_node *node; @@ -753,7 +753,9 @@ static void ksl_print(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t level) { node = nghttp3_ksl_nth_node(ksl, blk, i); fprintf(stderr, " %" PRId64, *(int64_t *)(void *)node->key); } + fprintf(stderr, "\n"); + return; } @@ -762,14 +764,14 @@ static void ksl_print(nghttp3_ksl *ksl, nghttp3_ksl_blk *blk, size_t level) { } } -void nghttp3_ksl_print(nghttp3_ksl *ksl) { +void nghttp3_ksl_print(const nghttp3_ksl *ksl) { if (!ksl->head) { return; } ksl_print(ksl, ksl->head, 0); } -#endif /* !WIN32 */ +#endif /* !defined(WIN32) */ nghttp3_ksl_it nghttp3_ksl_begin(const nghttp3_ksl *ksl) { nghttp3_ksl_it it; @@ -826,6 +828,6 @@ int nghttp3_ksl_range_compar(const nghttp3_ksl_key *lhs, int nghttp3_ksl_range_exclusive_compar(const nghttp3_ksl_key *lhs, const nghttp3_ksl_key *rhs) { const nghttp3_range *a = lhs, *b = rhs; - return a->begin < b->begin && - !(nghttp3_max(a->begin, b->begin) < nghttp3_min(a->end, b->end)); + return a->begin < b->begin && !(nghttp3_max_uint64(a->begin, b->begin) < + nghttp3_min_uint64(a->end, b->end)); } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_ksl.h b/deps/ngtcp2/nghttp3/lib/nghttp3_ksl.h index d513bdd672c750..e15e227ce50fb0 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_ksl.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_ksl.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -36,16 +36,12 @@ #include "nghttp3_objalloc.h" -/* - * Skip List using single key instead of range. - */ - #define NGHTTP3_KSL_DEGR 16 /* NGHTTP3_KSL_MAX_NBLK is the maximum number of nodes which a single block can contain. */ #define NGHTTP3_KSL_MAX_NBLK (2 * NGHTTP3_KSL_DEGR - 1) /* NGHTTP3_KSL_MIN_NBLK is the minimum number of nodes which a single - block other than root must contains. */ + block other than root must contain. */ #define NGHTTP3_KSL_MIN_NBLK (NGHTTP3_KSL_DEGR - 1) /* @@ -122,7 +118,7 @@ typedef struct nghttp3_ksl nghttp3_ksl; typedef struct nghttp3_ksl_it nghttp3_ksl_it; /* - * nghttp3_ksl_it is a forward iterator to iterate nodes. + * nghttp3_ksl_it is a bidirectional iterator to iterate nodes. */ struct nghttp3_ksl_it { const nghttp3_ksl *ksl; @@ -142,6 +138,7 @@ struct nghttp3_ksl { /* back points to the last leaf block. */ nghttp3_ksl_blk *back; nghttp3_ksl_compar compar; + /* n is the number of elements stored. */ size_t n; /* keylen is the size of key */ size_t keylen; @@ -152,7 +149,8 @@ struct nghttp3_ksl { /* * nghttp3_ksl_init initializes |ksl|. |compar| specifies compare - * function. |keylen| is the length of key. + * function. |keylen| is the length of key and must be at least + * sizeof(uint64_t). */ void nghttp3_ksl_init(nghttp3_ksl *ksl, nghttp3_ksl_compar compar, size_t keylen, const nghttp3_mem *mem); @@ -167,15 +165,15 @@ void nghttp3_ksl_free(nghttp3_ksl *ksl); /* * nghttp3_ksl_insert inserts |key| with its associated |data|. On * successful insertion, the iterator points to the inserted node is - * stored in |*it|. + * stored in |*it| if |it| is not NULL. * * This function returns 0 if it succeeds, or one of the following * negative error codes: * * NGHTTP3_ERR_NOMEM - * Out of memory. + * Out of memory. * NGHTTP3_ERR_INVALID_ARGUMENT - * |key| already exists. + * |key| already exists. */ int nghttp3_ksl_insert(nghttp3_ksl *ksl, nghttp3_ksl_it *it, const nghttp3_ksl_key *key, void *data); @@ -186,13 +184,14 @@ int nghttp3_ksl_insert(nghttp3_ksl *ksl, nghttp3_ksl_it *it, * This function assigns the iterator to |*it|, which points to the * node which is located at the right next of the removed node if |it| * is not NULL. If |key| is not found, no deletion takes place and - * the return value of nghttp3_ksl_end(ksl) is assigned to |*it|. + * the return value of nghttp3_ksl_end(ksl) is assigned to |*it| if + * |it| is not NULL. * * This function returns 0 if it succeeds, or one of the following * negative error codes: * * NGHTTP3_ERR_INVALID_ARGUMENT - * |key| does not exist. + * |key| does not exist. */ int nghttp3_ksl_remove(nghttp3_ksl *ksl, nghttp3_ksl_it *it, const nghttp3_ksl_key *key); @@ -212,24 +211,24 @@ int nghttp3_ksl_remove_hint(nghttp3_ksl *ksl, nghttp3_ksl_it *it, * nghttp3_ksl_lower_bound returns the iterator which points to the * first node which has the key which is equal to |key| or the last * node which satisfies !compar(&node->key, key). If there is no such - * node, it returns the iterator which satisfies nghttp3_ksl_it_end(it) - * != 0. + * node, it returns the iterator which satisfies + * nghttp3_ksl_it_end(it) != 0. */ -nghttp3_ksl_it nghttp3_ksl_lower_bound(nghttp3_ksl *ksl, +nghttp3_ksl_it nghttp3_ksl_lower_bound(const nghttp3_ksl *ksl, const nghttp3_ksl_key *key); /* * nghttp3_ksl_lower_bound_compar works like nghttp3_ksl_lower_bound, * but it takes custom function |compar| to do lower bound search. */ -nghttp3_ksl_it nghttp3_ksl_lower_bound_compar(nghttp3_ksl *ksl, +nghttp3_ksl_it nghttp3_ksl_lower_bound_compar(const nghttp3_ksl *ksl, const nghttp3_ksl_key *key, nghttp3_ksl_compar compar); /* - * nghttp3_ksl_update_key replaces the key of nodes which has |old_key| - * with |new_key|. |new_key| must be strictly greater than the - * previous node and strictly smaller than the next node. + * nghttp3_ksl_update_key replaces the key of nodes which has + * |old_key| with |new_key|. |new_key| must be strictly greater than + * the previous node and strictly smaller than the next node. */ void nghttp3_ksl_update_key(nghttp3_ksl *ksl, const nghttp3_ksl_key *old_key, const nghttp3_ksl_key *new_key); @@ -237,7 +236,8 @@ void nghttp3_ksl_update_key(nghttp3_ksl *ksl, const nghttp3_ksl_key *old_key, /* * nghttp3_ksl_begin returns the iterator which points to the first * node. If there is no node in |ksl|, it returns the iterator which - * satisfies nghttp3_ksl_it_end(it) != 0. + * satisfies both nghttp3_ksl_it_begin(it) != 0 and + * nghttp3_ksl_it_end(it) != 0. */ nghttp3_ksl_it nghttp3_ksl_begin(const nghttp3_ksl *ksl); @@ -245,14 +245,15 @@ nghttp3_ksl_it nghttp3_ksl_begin(const nghttp3_ksl *ksl); * nghttp3_ksl_end returns the iterator which points to the node * following the last node. The returned object satisfies * nghttp3_ksl_it_end(). If there is no node in |ksl|, it returns the - * iterator which satisfies nghttp3_ksl_it_begin(it) != 0. + * iterator which satisfies nghttp3_ksl_it_begin(it) != 0 and + * nghttp3_ksl_it_end(it) != 0. */ nghttp3_ksl_it nghttp3_ksl_end(const nghttp3_ksl *ksl); /* * nghttp3_ksl_len returns the number of elements stored in |ksl|. */ -size_t nghttp3_ksl_len(nghttp3_ksl *ksl); +size_t nghttp3_ksl_len(const nghttp3_ksl *ksl); /* * nghttp3_ksl_clear removes all elements stored in |ksl|. @@ -271,8 +272,8 @@ void nghttp3_ksl_clear(nghttp3_ksl *ksl); * that the key is of type int64_t. This function should be used for * the debugging purpose only. */ -void nghttp3_ksl_print(nghttp3_ksl *ksl); -#endif /* !WIN32 */ +void nghttp3_ksl_print(const nghttp3_ksl *ksl); +#endif /* !defined(WIN32) */ /* * nghttp3_ksl_it_init initializes |it|. @@ -295,8 +296,8 @@ void nghttp3_ksl_it_init(nghttp3_ksl_it *it, const nghttp3_ksl *ksl, */ #define nghttp3_ksl_it_next(IT) \ (++(IT)->i == (IT)->blk->n && (IT)->blk->next \ - ? ((IT)->blk = (IT)->blk->next, (IT)->i = 0) \ - : 0) + ? ((IT)->blk = (IT)->blk->next, (IT)->i = 0) \ + : 0) /* * nghttp3_ksl_it_prev moves backward the iterator by one. It is @@ -306,16 +307,16 @@ void nghttp3_ksl_it_init(nghttp3_ksl_it *it, const nghttp3_ksl *ksl, void nghttp3_ksl_it_prev(nghttp3_ksl_it *it); /* - * nghttp3_ksl_it_end returns nonzero if |it| points to the beyond the - * last node. + * nghttp3_ksl_it_end returns nonzero if |it| points to the one beyond + * the last node. */ #define nghttp3_ksl_it_end(IT) \ ((IT)->blk->n == (IT)->i && (IT)->blk->next == NULL) /* * nghttp3_ksl_it_begin returns nonzero if |it| points to the first - * node. |it| might satisfy both nghttp3_ksl_it_begin(&it) and - * nghttp3_ksl_it_end(&it) if the skip list has no node. + * node. |it| might satisfy both nghttp3_ksl_it_begin(it) != 0 and + * nghttp3_ksl_it_end(it) != 0 if the skip list has no node. */ int nghttp3_ksl_it_begin(const nghttp3_ksl_it *it); @@ -347,4 +348,4 @@ int nghttp3_ksl_range_compar(const nghttp3_ksl_key *lhs, int nghttp3_ksl_range_exclusive_compar(const nghttp3_ksl_key *lhs, const nghttp3_ksl_key *rhs); -#endif /* NGHTTP3_KSL_H */ +#endif /* !defined(NGHTTP3_KSL_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_macro.h b/deps/ngtcp2/nghttp3/lib/nghttp3_macro.h index a44e907661abbf..a4e1dfea3cda00 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_macro.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_macro.h @@ -28,17 +28,14 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include #include -#define nghttp3_min(A, B) ((A) < (B) ? (A) : (B)) -#define nghttp3_max(A, B) ((A) > (B) ? (A) : (B)) - #define nghttp3_struct_of(ptr, type, member) \ - ((type *)(void *)((char *)(ptr)-offsetof(type, member))) + ((type *)(void *)((char *)(ptr) - offsetof(type, member))) #define nghttp3_arraylen(A) (sizeof(A) / sizeof(*(A))) @@ -48,4 +45,30 @@ variable-length integer encoding. */ #define NGHTTP3_MAX_VARINT ((1ULL << 62) - 1) -#endif /* NGHTTP3_MACRO_H */ +#define nghttp3_max_def(SUFFIX, T) \ + static inline T nghttp3_max_##SUFFIX(T a, T b) { return a < b ? b : a; } + +nghttp3_max_def(int8, int8_t); +nghttp3_max_def(int16, int16_t); +nghttp3_max_def(int32, int32_t); +nghttp3_max_def(int64, int64_t); +nghttp3_max_def(uint8, uint8_t); +nghttp3_max_def(uint16, uint16_t); +nghttp3_max_def(uint32, uint32_t); +nghttp3_max_def(uint64, uint64_t); +nghttp3_max_def(size, size_t); + +#define nghttp3_min_def(SUFFIX, T) \ + static inline T nghttp3_min_##SUFFIX(T a, T b) { return a < b ? a : b; } + +nghttp3_min_def(int8, int8_t); +nghttp3_min_def(int16, int16_t); +nghttp3_min_def(int32, int32_t); +nghttp3_min_def(int64, int64_t); +nghttp3_min_def(uint8, uint8_t); +nghttp3_min_def(uint16, uint16_t); +nghttp3_min_def(uint32, uint32_t); +nghttp3_min_def(uint64, uint64_t); +nghttp3_min_def(size, size_t); + +#endif /* !defined(NGHTTP3_MACRO_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_map.c b/deps/ngtcp2/nghttp3/lib/nghttp3_map.c index b93fdfd3d488f5..cc5e42a2caf63f 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_map.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_map.c @@ -36,8 +36,7 @@ void nghttp3_map_init(nghttp3_map *map, const nghttp3_mem *mem) { map->mem = mem; - map->tablelen = 0; - map->tablelenbits = 0; + map->hashbits = 0; map->table = NULL; map->size = 0; } @@ -50,33 +49,20 @@ void nghttp3_map_free(nghttp3_map *map) { nghttp3_mem_free(map->mem, map->table); } -void nghttp3_map_each_free(nghttp3_map *map, int (*func)(void *data, void *ptr), - void *ptr) { - uint32_t i; - nghttp3_map_bucket *bkt; - - for (i = 0; i < map->tablelen; ++i) { - bkt = &map->table[i]; - - if (bkt->data == NULL) { - continue; - } - - func(bkt->data, ptr); - } -} - -int nghttp3_map_each(nghttp3_map *map, int (*func)(void *data, void *ptr), +int nghttp3_map_each(const nghttp3_map *map, int (*func)(void *data, void *ptr), void *ptr) { int rv; - uint32_t i; + size_t i; nghttp3_map_bucket *bkt; + size_t tablelen; if (map->size == 0) { return 0; } - for (i = 0; i < map->tablelen; ++i) { + tablelen = 1u << map->hashbits; + + for (i = 0; i < tablelen; ++i) { bkt = &map->table[i]; if (bkt->data == NULL) { @@ -92,82 +78,61 @@ int nghttp3_map_each(nghttp3_map *map, int (*func)(void *data, void *ptr), return 0; } -static uint32_t hash(nghttp3_map_key_type key) { - return (uint32_t)((key * 11400714819323198485llu) >> 32); -} - -static size_t h2idx(uint32_t hash, uint32_t bits) { - return hash >> (32 - bits); -} - -static size_t distance(uint32_t tablelen, uint32_t tablelenbits, - nghttp3_map_bucket *bkt, size_t idx) { - return (idx - h2idx(bkt->hash, tablelenbits)) & (tablelen - 1); +static size_t hash(nghttp3_map_key_type key, size_t bits) { + return (size_t)((key * 11400714819323198485llu) >> (64 - bits)); } -static void map_bucket_swap(nghttp3_map_bucket *bkt, uint32_t *phash, - nghttp3_map_key_type *pkey, void **pdata) { - uint32_t h = bkt->hash; - nghttp3_map_key_type key = bkt->key; - void *data = bkt->data; - - bkt->hash = *phash; - bkt->key = *pkey; - bkt->data = *pdata; +static void map_bucket_swap(nghttp3_map_bucket *a, nghttp3_map_bucket *b) { + nghttp3_map_bucket c = *a; - *phash = h; - *pkey = key; - *pdata = data; -} - -static void map_bucket_set_data(nghttp3_map_bucket *bkt, uint32_t hash, - nghttp3_map_key_type key, void *data) { - bkt->hash = hash; - bkt->key = key; - bkt->data = data; + *a = *b; + *b = c; } #ifndef WIN32 -void nghttp3_map_print_distance(nghttp3_map *map) { - uint32_t i; +void nghttp3_map_print_distance(const nghttp3_map *map) { + size_t i; size_t idx; nghttp3_map_bucket *bkt; + size_t tablelen; - for (i = 0; i < map->tablelen; ++i) { + if (map->size == 0) { + return; + } + + tablelen = 1u << map->hashbits; + + for (i = 0; i < tablelen; ++i) { bkt = &map->table[i]; if (bkt->data == NULL) { - fprintf(stderr, "@%u \n", i); + fprintf(stderr, "@%zu \n", i); continue; } - idx = h2idx(bkt->hash, map->tablelenbits); - fprintf(stderr, "@%u hash=%08x key=%" PRIu64 " base=%zu distance=%zu\n", i, - bkt->hash, bkt->key, idx, - distance(map->tablelen, map->tablelenbits, bkt, idx)); + idx = hash(bkt->key, map->hashbits); + fprintf(stderr, "@%zu hash=%zu key=%" PRIu64 " base=%zu distance=%u\n", i, + hash(bkt->key, map->hashbits), bkt->key, idx, bkt->psl); } } -#endif /* !WIN32 */ +#endif /* !defined(WIN32) */ -static int insert(nghttp3_map_bucket *table, uint32_t tablelen, - uint32_t tablelenbits, uint32_t hash, +static int insert(nghttp3_map_bucket *table, size_t hashbits, nghttp3_map_key_type key, void *data) { - size_t idx = h2idx(hash, tablelenbits); - size_t d = 0, dd; - nghttp3_map_bucket *bkt; + size_t idx = hash(key, hashbits); + nghttp3_map_bucket b = {0, key, data}, *bkt; + size_t mask = (1u << hashbits) - 1; for (;;) { bkt = &table[idx]; if (bkt->data == NULL) { - map_bucket_set_data(bkt, hash, key, data); + *bkt = b; return 0; } - dd = distance(tablelen, tablelenbits, bkt, idx); - if (d > dd) { - map_bucket_swap(bkt, &hash, &key, &data); - d = dd; + if (b.psl > bkt->psl) { + map_bucket_swap(bkt, &b); } else if (bkt->key == key) { /* TODO This check is just a waste after first swap or if this function is called from map_resize. That said, there is no @@ -176,41 +141,42 @@ static int insert(nghttp3_map_bucket *table, uint32_t tablelen, return NGHTTP3_ERR_INVALID_ARGUMENT; } - ++d; - idx = (idx + 1) & (tablelen - 1); + ++b.psl; + idx = (idx + 1) & mask; } } -/* new_tablelen must be power of 2 and new_tablelen == (1 << - new_tablelenbits) must hold. */ -static int map_resize(nghttp3_map *map, uint32_t new_tablelen, - uint32_t new_tablelenbits) { - uint32_t i; +static int map_resize(nghttp3_map *map, size_t new_hashbits) { + size_t i; nghttp3_map_bucket *new_table; nghttp3_map_bucket *bkt; + size_t tablelen; int rv; (void)rv; - new_table = - nghttp3_mem_calloc(map->mem, new_tablelen, sizeof(nghttp3_map_bucket)); + new_table = nghttp3_mem_calloc(map->mem, 1u << new_hashbits, + sizeof(nghttp3_map_bucket)); if (new_table == NULL) { return NGHTTP3_ERR_NOMEM; } - for (i = 0; i < map->tablelen; ++i) { - bkt = &map->table[i]; - if (bkt->data == NULL) { - continue; - } - rv = insert(new_table, new_tablelen, new_tablelenbits, bkt->hash, bkt->key, - bkt->data); + if (map->size) { + tablelen = 1u << map->hashbits; - assert(0 == rv); + for (i = 0; i < tablelen; ++i) { + bkt = &map->table[i]; + if (bkt->data == NULL) { + continue; + } + + rv = insert(new_table, new_hashbits, bkt->key, bkt->data); + + assert(0 == rv); + } } nghttp3_mem_free(map->mem, map->table); - map->tablelen = new_tablelen; - map->tablelenbits = new_tablelenbits; + map->hashbits = new_hashbits; map->table = new_table; return 0; @@ -222,48 +188,49 @@ int nghttp3_map_insert(nghttp3_map *map, nghttp3_map_key_type key, void *data) { assert(data); /* Load factor is 0.75 */ - if ((map->size + 1) * 4 > map->tablelen * 3) { - if (map->tablelen) { - rv = map_resize(map, map->tablelen * 2, map->tablelenbits + 1); + /* Under the very initial condition, that is map->size == 0 and + map->hashbits == 0, 4 > 3 still holds nicely. */ + if ((map->size + 1) * 4 > (1u << map->hashbits) * 3) { + if (map->hashbits) { + rv = map_resize(map, map->hashbits + 1); if (rv != 0) { return rv; } } else { - rv = map_resize(map, 1 << NGHTTP3_INITIAL_TABLE_LENBITS, - NGHTTP3_INITIAL_TABLE_LENBITS); + rv = map_resize(map, NGHTTP3_INITIAL_TABLE_LENBITS); if (rv != 0) { return rv; } } } - rv = insert(map->table, map->tablelen, map->tablelenbits, hash(key), key, - data); + rv = insert(map->table, map->hashbits, key, data); if (rv != 0) { return rv; } + ++map->size; + return 0; } -void *nghttp3_map_find(nghttp3_map *map, nghttp3_map_key_type key) { - uint32_t h; +void *nghttp3_map_find(const nghttp3_map *map, nghttp3_map_key_type key) { size_t idx; nghttp3_map_bucket *bkt; - size_t d = 0; + size_t psl = 0; + size_t mask; if (map->size == 0) { return NULL; } - h = hash(key); - idx = h2idx(h, map->tablelenbits); + idx = hash(key, map->hashbits); + mask = (1u << map->hashbits) - 1; for (;;) { bkt = &map->table[idx]; - if (bkt->data == NULL || - d > distance(map->tablelen, map->tablelenbits, bkt, idx)) { + if (bkt->data == NULL || psl > bkt->psl) { return NULL; } @@ -271,50 +238,47 @@ void *nghttp3_map_find(nghttp3_map *map, nghttp3_map_key_type key) { return bkt->data; } - ++d; - idx = (idx + 1) & (map->tablelen - 1); + ++psl; + idx = (idx + 1) & mask; } } int nghttp3_map_remove(nghttp3_map *map, nghttp3_map_key_type key) { - uint32_t h; - size_t idx, didx; - nghttp3_map_bucket *bkt; - size_t d = 0; + size_t idx; + nghttp3_map_bucket *b, *bkt; + size_t psl = 0; + size_t mask; if (map->size == 0) { return NGHTTP3_ERR_INVALID_ARGUMENT; } - h = hash(key); - idx = h2idx(h, map->tablelenbits); + idx = hash(key, map->hashbits); + mask = (1u << map->hashbits) - 1; for (;;) { bkt = &map->table[idx]; - if (bkt->data == NULL || - d > distance(map->tablelen, map->tablelenbits, bkt, idx)) { + if (bkt->data == NULL || psl > bkt->psl) { return NGHTTP3_ERR_INVALID_ARGUMENT; } if (bkt->key == key) { - map_bucket_set_data(bkt, 0, 0, NULL); - - didx = idx; - idx = (idx + 1) & (map->tablelen - 1); + b = bkt; + idx = (idx + 1) & mask; for (;;) { bkt = &map->table[idx]; - if (bkt->data == NULL || - distance(map->tablelen, map->tablelenbits, bkt, idx) == 0) { + if (bkt->data == NULL || bkt->psl == 0) { + b->data = NULL; break; } - map->table[didx] = *bkt; - map_bucket_set_data(bkt, 0, 0, NULL); - didx = idx; + --bkt->psl; + *b = *bkt; + b = bkt; - idx = (idx + 1) & (map->tablelen - 1); + idx = (idx + 1) & mask; } --map->size; @@ -322,18 +286,18 @@ int nghttp3_map_remove(nghttp3_map *map, nghttp3_map_key_type key) { return 0; } - ++d; - idx = (idx + 1) & (map->tablelen - 1); + ++psl; + idx = (idx + 1) & mask; } } void nghttp3_map_clear(nghttp3_map *map) { - if (map->tablelen == 0) { + if (map->size == 0) { return; } - memset(map->table, 0, sizeof(*map->table) * map->tablelen); + memset(map->table, 0, sizeof(*map->table) * (1u << map->hashbits)); map->size = 0; } -size_t nghttp3_map_size(nghttp3_map *map) { return map->size; } +size_t nghttp3_map_size(const nghttp3_map *map) { return map->size; } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_map.h b/deps/ngtcp2/nghttp3/lib/nghttp3_map.h index 7683cfeef3f33e..2b1a6ecab5cf4c 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_map.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_map.h @@ -29,7 +29,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -40,7 +40,7 @@ typedef uint64_t nghttp3_map_key_type; typedef struct nghttp3_map_bucket { - uint32_t hash; + uint32_t psl; nghttp3_map_key_type key; void *data; } nghttp3_map_bucket; @@ -49,33 +49,24 @@ typedef struct nghttp3_map { nghttp3_map_bucket *table; const nghttp3_mem *mem; size_t size; - uint32_t tablelen; - uint32_t tablelenbits; + size_t hashbits; } nghttp3_map; /* - * Initializes the map |map|. + * nghttp3_map_init initializes the map |map|. */ void nghttp3_map_init(nghttp3_map *map, const nghttp3_mem *mem); /* - * Deallocates any resources allocated for |map|. The stored entries - * are not freed by this function. Use nghttp3_map_each_free() to free - * each entries. + * nghttp3_map_free deallocates any resources allocated for |map|. + * The stored entries are not freed by this function. Use + * nghttp3_map_each() to free each entry. */ void nghttp3_map_free(nghttp3_map *map); /* - * Deallocates each entries using |func| function and any resources - * allocated for |map|. The |func| function is responsible for freeing - * given the |data| object. The |ptr| will be passed to the |func| as - * send argument. The return value of the |func| will be ignored. - */ -void nghttp3_map_each_free(nghttp3_map *map, int (*func)(void *data, void *ptr), - void *ptr); - -/* - * Inserts the new |data| with the |key| to the map |map|. + * nghttp3_map_insert inserts the new |data| with the |key| to the map + * |map|. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -83,57 +74,56 @@ void nghttp3_map_each_free(nghttp3_map *map, int (*func)(void *data, void *ptr), * NGHTTP3_ERR_INVALID_ARGUMENT * The item associated by |key| already exists. * NGHTTP3_ERR_NOMEM - * Out of memory + * Out of memory */ int nghttp3_map_insert(nghttp3_map *map, nghttp3_map_key_type key, void *data); /* - * Returns the data associated by the key |key|. If there is no such - * data, this function returns NULL. + * nghttp3_map_find returns the entry associated by the key |key|. If + * there is no such entry, this function returns NULL. */ -void *nghttp3_map_find(nghttp3_map *map, nghttp3_map_key_type key); +void *nghttp3_map_find(const nghttp3_map *map, nghttp3_map_key_type key); /* - * Removes the data associated by the key |key| from the |map|. The - * removed data is not freed by this function. + * nghttp3_map_remove removes the entry associated by the key |key| + * from the |map|. The removed entry is not freed by this function. * * This function returns 0 if it succeeds, or one of the following * negative error codes: * * NGHTTP3_ERR_INVALID_ARGUMENT - * The data associated by |key| does not exist. + * The entry associated by |key| does not exist. */ int nghttp3_map_remove(nghttp3_map *map, nghttp3_map_key_type key); /* - * Removes all entries from |map|. + * nghttp3_map_clear removes all entries from |map|. The removed + * entry is not freed by this function. */ void nghttp3_map_clear(nghttp3_map *map); /* - * Returns the number of items stored in the map |map|. + * nghttp3_map_size returns the number of items stored in the map + * |map|. */ -size_t nghttp3_map_size(nghttp3_map *map); +size_t nghttp3_map_size(const nghttp3_map *map); /* - * Applies the function |func| to each data in the |map| with the - * optional user supplied pointer |ptr|. + * nghttp3_map_each applies the function |func| to each entry in the + * |map| with the optional user supplied pointer |ptr|. * * If the |func| returns 0, this function calls the |func| with the - * next data. If the |func| returns nonzero, it will not call the + * next entry. If the |func| returns nonzero, it will not call the * |func| for further entries and return the return value of the * |func| immediately. Thus, this function returns 0 if all the * invocations of the |func| return 0, or nonzero value which the last * invocation of |func| returns. - * - * Don't use this function to free each data. Use - * nghttp3_map_each_free() instead. */ -int nghttp3_map_each(nghttp3_map *map, int (*func)(void *data, void *ptr), +int nghttp3_map_each(const nghttp3_map *map, int (*func)(void *data, void *ptr), void *ptr); #ifndef WIN32 -void nghttp3_map_print_distance(nghttp3_map *map); -#endif /* !WIN32 */ +void nghttp3_map_print_distance(const nghttp3_map *map); +#endif /* !defined(WIN32) */ -#endif /* NGHTTP3_MAP_H */ +#endif /* !defined(NGHTTP3_MAP_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_mem.c b/deps/ngtcp2/nghttp3/lib/nghttp3_mem.c index 0379e99b59cb74..687872f9020e78 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_mem.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_mem.c @@ -73,7 +73,7 @@ void *nghttp3_mem_calloc(const nghttp3_mem *mem, size_t nmemb, size_t size) { void *nghttp3_mem_realloc(const nghttp3_mem *mem, void *ptr, size_t size) { return mem->realloc(ptr, size, mem->user_data); } -#else /* MEMDEBUG */ +#else /* defined(MEMDEBUG) */ void *nghttp3_mem_malloc_debug(const nghttp3_mem *mem, size_t size, const char *func, const char *file, size_t line) { @@ -121,4 +121,4 @@ void *nghttp3_mem_realloc_debug(const nghttp3_mem *mem, void *ptr, size_t size, return nptr; } -#endif /* MEMDEBUG */ +#endif /* defined(MEMDEBUG) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_mem.h b/deps/ngtcp2/nghttp3/lib/nghttp3_mem.h index d6c3ada6f7e894..1ae53c91575f9f 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_mem.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_mem.h @@ -29,7 +29,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -40,7 +40,7 @@ void *nghttp3_mem_malloc(const nghttp3_mem *mem, size_t size); void nghttp3_mem_free(const nghttp3_mem *mem, void *ptr); void *nghttp3_mem_calloc(const nghttp3_mem *mem, size_t nmemb, size_t size); void *nghttp3_mem_realloc(const nghttp3_mem *mem, void *ptr, size_t size); -#else /* MEMDEBUG */ +#else /* defined(MEMDEBUG) */ void *nghttp3_mem_malloc_debug(const nghttp3_mem *mem, size_t size, const char *func, const char *file, size_t line); @@ -75,6 +75,6 @@ void *nghttp3_mem_realloc_debug(const nghttp3_mem *mem, void *ptr, size_t size, # define nghttp3_mem_realloc(MEM, PTR, SIZE) \ nghttp3_mem_realloc_debug((MEM), (PTR), (SIZE), __func__, __FILE__, \ __LINE__) -#endif /* MEMDEBUG */ +#endif /* defined(MEMDEBUG) */ -#endif /* NGHTTP3_MEM_H */ +#endif /* !defined(NGHTTP3_MEM_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_objalloc.h b/deps/ngtcp2/nghttp3/lib/nghttp3_objalloc.h index 02dff285f24060..4f8ffa093751a7 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_objalloc.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_objalloc.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -68,9 +68,9 @@ void nghttp3_objalloc_clear(nghttp3_objalloc *objalloc); #ifndef NOMEMPOOL # define nghttp3_objalloc_decl(NAME, TYPE, OPLENTFIELD) \ inline static void nghttp3_objalloc_##NAME##_init( \ - nghttp3_objalloc *objalloc, size_t nmemb, const nghttp3_mem *mem) { \ + nghttp3_objalloc *objalloc, size_t nmemb, const nghttp3_mem *mem) { \ nghttp3_objalloc_init( \ - objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ + objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ } \ \ TYPE *nghttp3_objalloc_##NAME##_get(nghttp3_objalloc *objalloc); \ @@ -79,7 +79,7 @@ void nghttp3_objalloc_clear(nghttp3_objalloc *objalloc); size_t len); \ \ inline static void nghttp3_objalloc_##NAME##_release( \ - nghttp3_objalloc *objalloc, TYPE *obj) { \ + nghttp3_objalloc *objalloc, TYPE *obj) { \ nghttp3_opl_push(&objalloc->opl, &obj->OPLENTFIELD); \ } @@ -90,8 +90,8 @@ void nghttp3_objalloc_clear(nghttp3_objalloc *objalloc); int rv; \ \ if (!oplent) { \ - rv = nghttp3_balloc_get(&objalloc->balloc, (void **)&obj, \ - sizeof(TYPE)); \ + rv = \ + nghttp3_balloc_get(&objalloc->balloc, (void **)&obj, sizeof(TYPE)); \ if (rv != 0) { \ return NULL; \ } \ @@ -119,30 +119,30 @@ void nghttp3_objalloc_clear(nghttp3_objalloc *objalloc); \ return nghttp3_struct_of(oplent, TYPE, OPLENTFIELD); \ } -#else /* NOMEMPOOL */ +#else /* defined(NOMEMPOOL) */ # define nghttp3_objalloc_decl(NAME, TYPE, OPLENTFIELD) \ inline static void nghttp3_objalloc_##NAME##_init( \ - nghttp3_objalloc *objalloc, size_t nmemb, const nghttp3_mem *mem) { \ + nghttp3_objalloc *objalloc, size_t nmemb, const nghttp3_mem *mem) { \ nghttp3_objalloc_init( \ - objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ + objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ } \ \ inline static TYPE *nghttp3_objalloc_##NAME##_get( \ - nghttp3_objalloc *objalloc) { \ + nghttp3_objalloc *objalloc) { \ return nghttp3_mem_malloc(objalloc->balloc.mem, sizeof(TYPE)); \ } \ \ inline static TYPE *nghttp3_objalloc_##NAME##_len_get( \ - nghttp3_objalloc *objalloc, size_t len) { \ + nghttp3_objalloc *objalloc, size_t len) { \ return nghttp3_mem_malloc(objalloc->balloc.mem, len); \ } \ \ inline static void nghttp3_objalloc_##NAME##_release( \ - nghttp3_objalloc *objalloc, TYPE *obj) { \ + nghttp3_objalloc *objalloc, TYPE *obj) { \ nghttp3_mem_free(objalloc->balloc.mem, obj); \ } # define nghttp3_objalloc_def(NAME, TYPE, OPLENTFIELD) -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ -#endif /* NGHTTP3_OBJALLOC_H */ +#endif /* !defined(NGHTTP3_OBJALLOC_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_opl.h b/deps/ngtcp2/nghttp3/lib/nghttp3_opl.h index 8c8a4f2051b25a..6609371dbfb89f 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_opl.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_opl.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -63,4 +63,4 @@ nghttp3_opl_entry *nghttp3_opl_pop(nghttp3_opl *opl); void nghttp3_opl_clear(nghttp3_opl *opl); -#endif /* NGHTTP3_OPL_H */ +#endif /* !defined(NGHTTP3_OPL_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_pq.c b/deps/ngtcp2/nghttp3/lib/nghttp3_pq.c index 5d09050ae63798..feefcd6fc717c3 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_pq.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_pq.c @@ -30,18 +30,21 @@ #include "nghttp3_macro.h" -void nghttp3_pq_init(nghttp3_pq *pq, nghttp3_less less, +void nghttp3_pq_init(nghttp3_pq *pq, nghttp3_pq_less less, const nghttp3_mem *mem) { - pq->mem = mem; - pq->capacity = 0; pq->q = NULL; + pq->mem = mem; pq->length = 0; + pq->capacity = 0; pq->less = less; } void nghttp3_pq_free(nghttp3_pq *pq) { + if (!pq) { + return; + } + nghttp3_mem_free(pq->mem, pq->q); - pq->q = NULL; } static void swap(nghttp3_pq *pq, size_t i, size_t j) { @@ -56,11 +59,13 @@ static void swap(nghttp3_pq *pq, size_t i, size_t j) { static void bubble_up(nghttp3_pq *pq, size_t index) { size_t parent; - while (index != 0) { + + while (index) { parent = (index - 1) / 2; if (!pq->less(pq->q[index], pq->q[parent])) { return; } + swap(pq, parent, index); index = parent; } @@ -71,20 +76,23 @@ int nghttp3_pq_push(nghttp3_pq *pq, nghttp3_pq_entry *item) { void *nq; size_t ncapacity; - ncapacity = nghttp3_max(4, (pq->capacity * 2)); + ncapacity = nghttp3_max_size(4, pq->capacity * 2); nq = nghttp3_mem_realloc(pq->mem, pq->q, ncapacity * sizeof(nghttp3_pq_entry *)); if (nq == NULL) { return NGHTTP3_ERR_NOMEM; } + pq->capacity = ncapacity; pq->q = nq; } + pq->q[pq->length] = item; item->index = pq->length; ++pq->length; - bubble_up(pq, pq->length - 1); + bubble_up(pq, item->index); + return 0; } @@ -95,32 +103,37 @@ nghttp3_pq_entry *nghttp3_pq_top(const nghttp3_pq *pq) { static void bubble_down(nghttp3_pq *pq, size_t index) { size_t i, j, minindex; + for (;;) { j = index * 2 + 1; minindex = index; + for (i = 0; i < 2; ++i, ++j) { if (j >= pq->length) { break; } + if (pq->less(pq->q[j], pq->q[minindex])) { minindex = j; } } + if (minindex == index) { return; } + swap(pq, index, minindex); index = minindex; } } void nghttp3_pq_pop(nghttp3_pq *pq) { - if (pq->length > 0) { - pq->q[0] = pq->q[pq->length - 1]; - pq->q[0]->index = 0; - --pq->length; - bubble_down(pq, 0); - } + assert(pq->length); + + pq->q[0] = pq->q[pq->length - 1]; + pq->q[0]->index = 0; + --pq->length; + bubble_down(pq, 0); } void nghttp3_pq_remove(nghttp3_pq *pq, nghttp3_pq_entry *item) { @@ -157,11 +170,13 @@ int nghttp3_pq_each(const nghttp3_pq *pq, nghttp3_pq_item_cb fun, void *arg) { if (pq->length == 0) { return 0; } + for (i = 0; i < pq->length; ++i) { if ((*fun)(pq->q[i], arg)) { return 1; } } + return 0; } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_pq.h b/deps/ngtcp2/nghttp3/lib/nghttp3_pq.h index c1a54f505bbd03..3813b529473075 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_pq.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_pq.h @@ -29,7 +29,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -39,44 +39,48 @@ /* NGHTTP3_PQ_BAD_INDEX is the priority queue index which indicates that an entry is not queued. Assigning this value to - nghttp3_pq_entry.index can check that the entry is queued or not. */ + nghttp3_pq_entry.index can check that the entry is queued or + not. */ #define NGHTTP3_PQ_BAD_INDEX SIZE_MAX typedef struct nghttp3_pq_entry { size_t index; } nghttp3_pq_entry; -/* "less" function, return nonzero if |lhs| is less than |rhs|. */ -typedef int (*nghttp3_less)(const nghttp3_pq_entry *lhs, - const nghttp3_pq_entry *rhs); +/* nghttp3_pq_less is a "less" function, that returns nonzero if |lhs| + is considered to be less than |rhs|. */ +typedef int (*nghttp3_pq_less)(const nghttp3_pq_entry *lhs, + const nghttp3_pq_entry *rhs); typedef struct nghttp3_pq { - /* The pointer to the pointer to the item stored */ + /* q is a pointer to an array that stores the items. */ nghttp3_pq_entry **q; - /* Memory allocator */ + /* mem is a memory allocator. */ const nghttp3_mem *mem; - /* The number of items stored */ + /* length is the number of items stored. */ size_t length; - /* The maximum number of items this pq can store. This is - automatically extended when length is reached to this value. */ + /* capacity is the maximum number of items this queue can store. + This is automatically extended when length is reached to this + limit. */ size_t capacity; - /* The less function between items */ - nghttp3_less less; + /* less is the less function to compare items. */ + nghttp3_pq_less less; } nghttp3_pq; /* - * Initializes priority queue |pq| with compare function |cmp|. + * nghttp3_pq_init initializes |pq| with compare function |cmp|. */ -void nghttp3_pq_init(nghttp3_pq *pq, nghttp3_less less, const nghttp3_mem *mem); +void nghttp3_pq_init(nghttp3_pq *pq, nghttp3_pq_less less, + const nghttp3_mem *mem); /* - * Deallocates any resources allocated for |pq|. The stored items are - * not freed by this function. + * nghttp3_pq_free deallocates any resources allocated for |pq|. The + * stored items are not freed by this function. */ void nghttp3_pq_free(nghttp3_pq *pq); /* - * Adds |item| to the priority queue |pq|. + * nghttp3_pq_push adds |item| to |pq|. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -87,43 +91,47 @@ void nghttp3_pq_free(nghttp3_pq *pq); int nghttp3_pq_push(nghttp3_pq *pq, nghttp3_pq_entry *item); /* - * Returns item at the top of the queue |pq|. It is undefined if the - * queue is empty. + * nghttp3_pq_top returns item at the top of |pq|. It is undefined if + * |pq| is empty. */ nghttp3_pq_entry *nghttp3_pq_top(const nghttp3_pq *pq); /* - * Pops item at the top of the queue |pq|. The popped item is not - * freed by this function. + * nghttp3_pq_pop pops item at the top of |pq|. The popped item is + * not freed by this function. It is undefined if |pq| is empty. */ void nghttp3_pq_pop(nghttp3_pq *pq); /* - * Returns nonzero if the queue |pq| is empty. + * nghttp3_pq_empty returns nonzero if |pq| is empty. */ int nghttp3_pq_empty(const nghttp3_pq *pq); /* - * Returns the number of items in the queue |pq|. + * nghttp3_pq_size returns the number of items |pq| contains. */ size_t nghttp3_pq_size(const nghttp3_pq *pq); typedef int (*nghttp3_pq_item_cb)(nghttp3_pq_entry *item, void *arg); /* - * Applies |fun| to each item in |pq|. The |arg| is passed as arg - * parameter to callback function. This function must not change the - * ordering key. If the return value from callback is nonzero, this - * function returns 1 immediately without iterating remaining items. - * Otherwise this function returns 0. + * nghttp3_pq_each applies |fun| to each item in |pq|. The |arg| is + * passed as arg parameter to callback function. This function must + * not change the ordering key. If the return value from callback is + * nonzero, this function returns 1 immediately without iterating + * remaining items. Otherwise this function returns 0. */ int nghttp3_pq_each(const nghttp3_pq *pq, nghttp3_pq_item_cb fun, void *arg); /* - * Removes |item| from priority queue. + * nghttp3_pq_remove removes |item| from |pq|. |pq| must contain + * |item| otherwise the behavior is undefined. */ void nghttp3_pq_remove(nghttp3_pq *pq, nghttp3_pq_entry *item); +/* + * nghttp3_pq_clear removes all items from |pq|. + */ void nghttp3_pq_clear(nghttp3_pq *pq); -#endif /* NGHTTP3_PQ_H */ +#endif /* !defined(NGHTTP3_PQ_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack.c b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack.c index 428c06a82c6bfb..a1c7e7487c1a34 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack.c @@ -41,287 +41,282 @@ #define NGHTTP3_QPACK_MAX_QPACK_STREAMS 2000 /* Make scalar initialization form of nghttp3_qpack_static_entry */ -#define MAKE_STATIC_ENT(I, T, H) \ - { I, T, H } +#define MAKE_STATIC_ENT(I, T, H) {I, T, H} /* Generated by mkstatichdtbl.py */ static nghttp3_qpack_static_entry token_stable[] = { - MAKE_STATIC_ENT(0, NGHTTP3_QPACK_TOKEN__AUTHORITY, 3153725150u), - MAKE_STATIC_ENT(15, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), - MAKE_STATIC_ENT(16, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), - MAKE_STATIC_ENT(17, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), - MAKE_STATIC_ENT(18, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), - MAKE_STATIC_ENT(19, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), - MAKE_STATIC_ENT(20, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), - MAKE_STATIC_ENT(21, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), - MAKE_STATIC_ENT(1, NGHTTP3_QPACK_TOKEN__PATH, 3292848686u), - MAKE_STATIC_ENT(22, NGHTTP3_QPACK_TOKEN__SCHEME, 2510477674u), - MAKE_STATIC_ENT(23, NGHTTP3_QPACK_TOKEN__SCHEME, 2510477674u), - MAKE_STATIC_ENT(24, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(25, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(26, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(27, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(28, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(63, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(64, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(65, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(66, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(67, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(68, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(69, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(70, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(71, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), - MAKE_STATIC_ENT(29, NGHTTP3_QPACK_TOKEN_ACCEPT, 136609321u), - MAKE_STATIC_ENT(30, NGHTTP3_QPACK_TOKEN_ACCEPT, 136609321u), - MAKE_STATIC_ENT(31, NGHTTP3_QPACK_TOKEN_ACCEPT_ENCODING, 3379649177u), - MAKE_STATIC_ENT(72, NGHTTP3_QPACK_TOKEN_ACCEPT_LANGUAGE, 1979086614u), - MAKE_STATIC_ENT(32, NGHTTP3_QPACK_TOKEN_ACCEPT_RANGES, 1713753958u), - MAKE_STATIC_ENT(73, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_CREDENTIALS, - 901040780u), - MAKE_STATIC_ENT(74, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_CREDENTIALS, - 901040780u), - MAKE_STATIC_ENT(33, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS, - 1524311232u), - MAKE_STATIC_ENT(34, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS, - 1524311232u), - MAKE_STATIC_ENT(75, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS, - 1524311232u), - MAKE_STATIC_ENT(76, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS, - 2175229868u), - MAKE_STATIC_ENT(77, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS, - 2175229868u), - MAKE_STATIC_ENT(78, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS, - 2175229868u), - MAKE_STATIC_ENT(35, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_ORIGIN, - 2710797292u), - MAKE_STATIC_ENT(79, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_EXPOSE_HEADERS, - 2449824425u), - MAKE_STATIC_ENT(80, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_HEADERS, - 3599549072u), - MAKE_STATIC_ENT(81, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_METHOD, - 2417078055u), - MAKE_STATIC_ENT(82, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_METHOD, - 2417078055u), - MAKE_STATIC_ENT(2, NGHTTP3_QPACK_TOKEN_AGE, 742476188u), - MAKE_STATIC_ENT(83, NGHTTP3_QPACK_TOKEN_ALT_SVC, 2148877059u), - MAKE_STATIC_ENT(84, NGHTTP3_QPACK_TOKEN_AUTHORIZATION, 2436257726u), - MAKE_STATIC_ENT(36, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), - MAKE_STATIC_ENT(37, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), - MAKE_STATIC_ENT(38, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), - MAKE_STATIC_ENT(39, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), - MAKE_STATIC_ENT(40, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), - MAKE_STATIC_ENT(41, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), - MAKE_STATIC_ENT(3, NGHTTP3_QPACK_TOKEN_CONTENT_DISPOSITION, 3889184348u), - MAKE_STATIC_ENT(42, NGHTTP3_QPACK_TOKEN_CONTENT_ENCODING, 65203592u), - MAKE_STATIC_ENT(43, NGHTTP3_QPACK_TOKEN_CONTENT_ENCODING, 65203592u), - MAKE_STATIC_ENT(4, NGHTTP3_QPACK_TOKEN_CONTENT_LENGTH, 1308181789u), - MAKE_STATIC_ENT(85, NGHTTP3_QPACK_TOKEN_CONTENT_SECURITY_POLICY, - 1569039836u), - MAKE_STATIC_ENT(44, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(45, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(46, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(47, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(48, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(49, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(50, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(51, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(52, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(53, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(54, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), - MAKE_STATIC_ENT(5, NGHTTP3_QPACK_TOKEN_COOKIE, 2007449791u), - MAKE_STATIC_ENT(6, NGHTTP3_QPACK_TOKEN_DATE, 3564297305u), - MAKE_STATIC_ENT(86, NGHTTP3_QPACK_TOKEN_EARLY_DATA, 4080895051u), - MAKE_STATIC_ENT(7, NGHTTP3_QPACK_TOKEN_ETAG, 113792960u), - MAKE_STATIC_ENT(87, NGHTTP3_QPACK_TOKEN_EXPECT_CT, 1183214960u), - MAKE_STATIC_ENT(88, NGHTTP3_QPACK_TOKEN_FORWARDED, 1485178027u), - MAKE_STATIC_ENT(8, NGHTTP3_QPACK_TOKEN_IF_MODIFIED_SINCE, 2213050793u), - MAKE_STATIC_ENT(9, NGHTTP3_QPACK_TOKEN_IF_NONE_MATCH, 2536202615u), - MAKE_STATIC_ENT(89, NGHTTP3_QPACK_TOKEN_IF_RANGE, 2340978238u), - MAKE_STATIC_ENT(10, NGHTTP3_QPACK_TOKEN_LAST_MODIFIED, 3226950251u), - MAKE_STATIC_ENT(11, NGHTTP3_QPACK_TOKEN_LINK, 232457833u), - MAKE_STATIC_ENT(12, NGHTTP3_QPACK_TOKEN_LOCATION, 200649126u), - MAKE_STATIC_ENT(90, NGHTTP3_QPACK_TOKEN_ORIGIN, 3649018447u), - MAKE_STATIC_ENT(91, NGHTTP3_QPACK_TOKEN_PURPOSE, 4212263681u), - MAKE_STATIC_ENT(55, NGHTTP3_QPACK_TOKEN_RANGE, 4208725202u), - MAKE_STATIC_ENT(13, NGHTTP3_QPACK_TOKEN_REFERER, 3969579366u), - MAKE_STATIC_ENT(92, NGHTTP3_QPACK_TOKEN_SERVER, 1085029842u), - MAKE_STATIC_ENT(14, NGHTTP3_QPACK_TOKEN_SET_COOKIE, 1848371000u), - MAKE_STATIC_ENT(56, NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY, - 4138147361u), - MAKE_STATIC_ENT(57, NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY, - 4138147361u), - MAKE_STATIC_ENT(58, NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY, - 4138147361u), - MAKE_STATIC_ENT(93, NGHTTP3_QPACK_TOKEN_TIMING_ALLOW_ORIGIN, 2432297564u), - MAKE_STATIC_ENT(94, NGHTTP3_QPACK_TOKEN_UPGRADE_INSECURE_REQUESTS, - 2479169413u), - MAKE_STATIC_ENT(95, NGHTTP3_QPACK_TOKEN_USER_AGENT, 606444526u), - MAKE_STATIC_ENT(59, NGHTTP3_QPACK_TOKEN_VARY, 1085005381u), - MAKE_STATIC_ENT(60, NGHTTP3_QPACK_TOKEN_VARY, 1085005381u), - MAKE_STATIC_ENT(61, NGHTTP3_QPACK_TOKEN_X_CONTENT_TYPE_OPTIONS, - 3644557769u), - MAKE_STATIC_ENT(96, NGHTTP3_QPACK_TOKEN_X_FORWARDED_FOR, 2914187656u), - MAKE_STATIC_ENT(97, NGHTTP3_QPACK_TOKEN_X_FRAME_OPTIONS, 3993834824u), - MAKE_STATIC_ENT(98, NGHTTP3_QPACK_TOKEN_X_FRAME_OPTIONS, 3993834824u), - MAKE_STATIC_ENT(62, NGHTTP3_QPACK_TOKEN_X_XSS_PROTECTION, 2501058888u), + MAKE_STATIC_ENT(0, NGHTTP3_QPACK_TOKEN__AUTHORITY, 3153725150u), + MAKE_STATIC_ENT(15, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), + MAKE_STATIC_ENT(16, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), + MAKE_STATIC_ENT(17, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), + MAKE_STATIC_ENT(18, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), + MAKE_STATIC_ENT(19, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), + MAKE_STATIC_ENT(20, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), + MAKE_STATIC_ENT(21, NGHTTP3_QPACK_TOKEN__METHOD, 695666056u), + MAKE_STATIC_ENT(1, NGHTTP3_QPACK_TOKEN__PATH, 3292848686u), + MAKE_STATIC_ENT(22, NGHTTP3_QPACK_TOKEN__SCHEME, 2510477674u), + MAKE_STATIC_ENT(23, NGHTTP3_QPACK_TOKEN__SCHEME, 2510477674u), + MAKE_STATIC_ENT(24, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(25, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(26, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(27, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(28, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(63, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(64, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(65, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(66, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(67, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(68, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(69, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(70, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(71, NGHTTP3_QPACK_TOKEN__STATUS, 4000288983u), + MAKE_STATIC_ENT(29, NGHTTP3_QPACK_TOKEN_ACCEPT, 136609321u), + MAKE_STATIC_ENT(30, NGHTTP3_QPACK_TOKEN_ACCEPT, 136609321u), + MAKE_STATIC_ENT(31, NGHTTP3_QPACK_TOKEN_ACCEPT_ENCODING, 3379649177u), + MAKE_STATIC_ENT(72, NGHTTP3_QPACK_TOKEN_ACCEPT_LANGUAGE, 1979086614u), + MAKE_STATIC_ENT(32, NGHTTP3_QPACK_TOKEN_ACCEPT_RANGES, 1713753958u), + MAKE_STATIC_ENT(73, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_CREDENTIALS, + 901040780u), + MAKE_STATIC_ENT(74, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_CREDENTIALS, + 901040780u), + MAKE_STATIC_ENT(33, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS, + 1524311232u), + MAKE_STATIC_ENT(34, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS, + 1524311232u), + MAKE_STATIC_ENT(75, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS, + 1524311232u), + MAKE_STATIC_ENT(76, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS, + 2175229868u), + MAKE_STATIC_ENT(77, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS, + 2175229868u), + MAKE_STATIC_ENT(78, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS, + 2175229868u), + MAKE_STATIC_ENT(35, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_ORIGIN, + 2710797292u), + MAKE_STATIC_ENT(79, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_EXPOSE_HEADERS, + 2449824425u), + MAKE_STATIC_ENT(80, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_HEADERS, + 3599549072u), + MAKE_STATIC_ENT(81, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_METHOD, + 2417078055u), + MAKE_STATIC_ENT(82, NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_METHOD, + 2417078055u), + MAKE_STATIC_ENT(2, NGHTTP3_QPACK_TOKEN_AGE, 742476188u), + MAKE_STATIC_ENT(83, NGHTTP3_QPACK_TOKEN_ALT_SVC, 2148877059u), + MAKE_STATIC_ENT(84, NGHTTP3_QPACK_TOKEN_AUTHORIZATION, 2436257726u), + MAKE_STATIC_ENT(36, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), + MAKE_STATIC_ENT(37, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), + MAKE_STATIC_ENT(38, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), + MAKE_STATIC_ENT(39, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), + MAKE_STATIC_ENT(40, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), + MAKE_STATIC_ENT(41, NGHTTP3_QPACK_TOKEN_CACHE_CONTROL, 1355326669u), + MAKE_STATIC_ENT(3, NGHTTP3_QPACK_TOKEN_CONTENT_DISPOSITION, 3889184348u), + MAKE_STATIC_ENT(42, NGHTTP3_QPACK_TOKEN_CONTENT_ENCODING, 65203592u), + MAKE_STATIC_ENT(43, NGHTTP3_QPACK_TOKEN_CONTENT_ENCODING, 65203592u), + MAKE_STATIC_ENT(4, NGHTTP3_QPACK_TOKEN_CONTENT_LENGTH, 1308181789u), + MAKE_STATIC_ENT(85, NGHTTP3_QPACK_TOKEN_CONTENT_SECURITY_POLICY, 1569039836u), + MAKE_STATIC_ENT(44, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(45, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(46, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(47, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(48, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(49, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(50, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(51, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(52, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(53, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(54, NGHTTP3_QPACK_TOKEN_CONTENT_TYPE, 4244048277u), + MAKE_STATIC_ENT(5, NGHTTP3_QPACK_TOKEN_COOKIE, 2007449791u), + MAKE_STATIC_ENT(6, NGHTTP3_QPACK_TOKEN_DATE, 3564297305u), + MAKE_STATIC_ENT(86, NGHTTP3_QPACK_TOKEN_EARLY_DATA, 4080895051u), + MAKE_STATIC_ENT(7, NGHTTP3_QPACK_TOKEN_ETAG, 113792960u), + MAKE_STATIC_ENT(87, NGHTTP3_QPACK_TOKEN_EXPECT_CT, 1183214960u), + MAKE_STATIC_ENT(88, NGHTTP3_QPACK_TOKEN_FORWARDED, 1485178027u), + MAKE_STATIC_ENT(8, NGHTTP3_QPACK_TOKEN_IF_MODIFIED_SINCE, 2213050793u), + MAKE_STATIC_ENT(9, NGHTTP3_QPACK_TOKEN_IF_NONE_MATCH, 2536202615u), + MAKE_STATIC_ENT(89, NGHTTP3_QPACK_TOKEN_IF_RANGE, 2340978238u), + MAKE_STATIC_ENT(10, NGHTTP3_QPACK_TOKEN_LAST_MODIFIED, 3226950251u), + MAKE_STATIC_ENT(11, NGHTTP3_QPACK_TOKEN_LINK, 232457833u), + MAKE_STATIC_ENT(12, NGHTTP3_QPACK_TOKEN_LOCATION, 200649126u), + MAKE_STATIC_ENT(90, NGHTTP3_QPACK_TOKEN_ORIGIN, 3649018447u), + MAKE_STATIC_ENT(91, NGHTTP3_QPACK_TOKEN_PURPOSE, 4212263681u), + MAKE_STATIC_ENT(55, NGHTTP3_QPACK_TOKEN_RANGE, 4208725202u), + MAKE_STATIC_ENT(13, NGHTTP3_QPACK_TOKEN_REFERER, 3969579366u), + MAKE_STATIC_ENT(92, NGHTTP3_QPACK_TOKEN_SERVER, 1085029842u), + MAKE_STATIC_ENT(14, NGHTTP3_QPACK_TOKEN_SET_COOKIE, 1848371000u), + MAKE_STATIC_ENT(56, NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY, + 4138147361u), + MAKE_STATIC_ENT(57, NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY, + 4138147361u), + MAKE_STATIC_ENT(58, NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY, + 4138147361u), + MAKE_STATIC_ENT(93, NGHTTP3_QPACK_TOKEN_TIMING_ALLOW_ORIGIN, 2432297564u), + MAKE_STATIC_ENT(94, NGHTTP3_QPACK_TOKEN_UPGRADE_INSECURE_REQUESTS, + 2479169413u), + MAKE_STATIC_ENT(95, NGHTTP3_QPACK_TOKEN_USER_AGENT, 606444526u), + MAKE_STATIC_ENT(59, NGHTTP3_QPACK_TOKEN_VARY, 1085005381u), + MAKE_STATIC_ENT(60, NGHTTP3_QPACK_TOKEN_VARY, 1085005381u), + MAKE_STATIC_ENT(61, NGHTTP3_QPACK_TOKEN_X_CONTENT_TYPE_OPTIONS, 3644557769u), + MAKE_STATIC_ENT(96, NGHTTP3_QPACK_TOKEN_X_FORWARDED_FOR, 2914187656u), + MAKE_STATIC_ENT(97, NGHTTP3_QPACK_TOKEN_X_FRAME_OPTIONS, 3993834824u), + MAKE_STATIC_ENT(98, NGHTTP3_QPACK_TOKEN_X_FRAME_OPTIONS, 3993834824u), + MAKE_STATIC_ENT(62, NGHTTP3_QPACK_TOKEN_X_XSS_PROTECTION, 2501058888u), }; /* Make scalar initialization form of nghttp3_qpack_static_entry */ #define MAKE_STATIC_HD(N, V, T) \ { \ {NULL, (uint8_t *)(N), sizeof((N)) - 1, -1}, \ - {NULL, (uint8_t *)(V), sizeof((V)) - 1, -1}, T \ + {NULL, (uint8_t *)(V), sizeof((V)) - 1, -1}, \ + T, \ } static nghttp3_qpack_static_header stable[] = { - MAKE_STATIC_HD(":authority", "", NGHTTP3_QPACK_TOKEN__AUTHORITY), - MAKE_STATIC_HD(":path", "/", NGHTTP3_QPACK_TOKEN__PATH), - MAKE_STATIC_HD("age", "0", NGHTTP3_QPACK_TOKEN_AGE), - MAKE_STATIC_HD("content-disposition", "", - NGHTTP3_QPACK_TOKEN_CONTENT_DISPOSITION), - MAKE_STATIC_HD("content-length", "0", NGHTTP3_QPACK_TOKEN_CONTENT_LENGTH), - MAKE_STATIC_HD("cookie", "", NGHTTP3_QPACK_TOKEN_COOKIE), - MAKE_STATIC_HD("date", "", NGHTTP3_QPACK_TOKEN_DATE), - MAKE_STATIC_HD("etag", "", NGHTTP3_QPACK_TOKEN_ETAG), - MAKE_STATIC_HD("if-modified-since", "", - NGHTTP3_QPACK_TOKEN_IF_MODIFIED_SINCE), - MAKE_STATIC_HD("if-none-match", "", NGHTTP3_QPACK_TOKEN_IF_NONE_MATCH), - MAKE_STATIC_HD("last-modified", "", NGHTTP3_QPACK_TOKEN_LAST_MODIFIED), - MAKE_STATIC_HD("link", "", NGHTTP3_QPACK_TOKEN_LINK), - MAKE_STATIC_HD("location", "", NGHTTP3_QPACK_TOKEN_LOCATION), - MAKE_STATIC_HD("referer", "", NGHTTP3_QPACK_TOKEN_REFERER), - MAKE_STATIC_HD("set-cookie", "", NGHTTP3_QPACK_TOKEN_SET_COOKIE), - MAKE_STATIC_HD(":method", "CONNECT", NGHTTP3_QPACK_TOKEN__METHOD), - MAKE_STATIC_HD(":method", "DELETE", NGHTTP3_QPACK_TOKEN__METHOD), - MAKE_STATIC_HD(":method", "GET", NGHTTP3_QPACK_TOKEN__METHOD), - MAKE_STATIC_HD(":method", "HEAD", NGHTTP3_QPACK_TOKEN__METHOD), - MAKE_STATIC_HD(":method", "OPTIONS", NGHTTP3_QPACK_TOKEN__METHOD), - MAKE_STATIC_HD(":method", "POST", NGHTTP3_QPACK_TOKEN__METHOD), - MAKE_STATIC_HD(":method", "PUT", NGHTTP3_QPACK_TOKEN__METHOD), - MAKE_STATIC_HD(":scheme", "http", NGHTTP3_QPACK_TOKEN__SCHEME), - MAKE_STATIC_HD(":scheme", "https", NGHTTP3_QPACK_TOKEN__SCHEME), - MAKE_STATIC_HD(":status", "103", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "200", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "304", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "404", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "503", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD("accept", "*/*", NGHTTP3_QPACK_TOKEN_ACCEPT), - MAKE_STATIC_HD("accept", "application/dns-message", - NGHTTP3_QPACK_TOKEN_ACCEPT), - MAKE_STATIC_HD("accept-encoding", "gzip, deflate, br", - NGHTTP3_QPACK_TOKEN_ACCEPT_ENCODING), - MAKE_STATIC_HD("accept-ranges", "bytes", NGHTTP3_QPACK_TOKEN_ACCEPT_RANGES), - MAKE_STATIC_HD("access-control-allow-headers", "cache-control", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS), - MAKE_STATIC_HD("access-control-allow-headers", "content-type", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS), - MAKE_STATIC_HD("access-control-allow-origin", "*", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_ORIGIN), - MAKE_STATIC_HD("cache-control", "max-age=0", - NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), - MAKE_STATIC_HD("cache-control", "max-age=2592000", - NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), - MAKE_STATIC_HD("cache-control", "max-age=604800", - NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), - MAKE_STATIC_HD("cache-control", "no-cache", - NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), - MAKE_STATIC_HD("cache-control", "no-store", - NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), - MAKE_STATIC_HD("cache-control", "public, max-age=31536000", - NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), - MAKE_STATIC_HD("content-encoding", "br", - NGHTTP3_QPACK_TOKEN_CONTENT_ENCODING), - MAKE_STATIC_HD("content-encoding", "gzip", - NGHTTP3_QPACK_TOKEN_CONTENT_ENCODING), - MAKE_STATIC_HD("content-type", "application/dns-message", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "application/javascript", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "application/json", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "application/x-www-form-urlencoded", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "image/gif", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "image/jpeg", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "image/png", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "text/css", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "text/html; charset=utf-8", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "text/plain", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("content-type", "text/plain;charset=utf-8", - NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), - MAKE_STATIC_HD("range", "bytes=0-", NGHTTP3_QPACK_TOKEN_RANGE), - MAKE_STATIC_HD("strict-transport-security", "max-age=31536000", - NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY), - MAKE_STATIC_HD("strict-transport-security", - "max-age=31536000; includesubdomains", - NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY), - MAKE_STATIC_HD("strict-transport-security", - "max-age=31536000; includesubdomains; preload", - NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY), - MAKE_STATIC_HD("vary", "accept-encoding", NGHTTP3_QPACK_TOKEN_VARY), - MAKE_STATIC_HD("vary", "origin", NGHTTP3_QPACK_TOKEN_VARY), - MAKE_STATIC_HD("x-content-type-options", "nosniff", - NGHTTP3_QPACK_TOKEN_X_CONTENT_TYPE_OPTIONS), - MAKE_STATIC_HD("x-xss-protection", "1; mode=block", - NGHTTP3_QPACK_TOKEN_X_XSS_PROTECTION), - MAKE_STATIC_HD(":status", "100", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "204", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "206", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "302", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "400", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "403", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "421", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "425", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD(":status", "500", NGHTTP3_QPACK_TOKEN__STATUS), - MAKE_STATIC_HD("accept-language", "", NGHTTP3_QPACK_TOKEN_ACCEPT_LANGUAGE), - MAKE_STATIC_HD("access-control-allow-credentials", "FALSE", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_CREDENTIALS), - MAKE_STATIC_HD("access-control-allow-credentials", "TRUE", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_CREDENTIALS), - MAKE_STATIC_HD("access-control-allow-headers", "*", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS), - MAKE_STATIC_HD("access-control-allow-methods", "get", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS), - MAKE_STATIC_HD("access-control-allow-methods", "get, post, options", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS), - MAKE_STATIC_HD("access-control-allow-methods", "options", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS), - MAKE_STATIC_HD("access-control-expose-headers", "content-length", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_EXPOSE_HEADERS), - MAKE_STATIC_HD("access-control-request-headers", "content-type", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_HEADERS), - MAKE_STATIC_HD("access-control-request-method", "get", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_METHOD), - MAKE_STATIC_HD("access-control-request-method", "post", - NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_METHOD), - MAKE_STATIC_HD("alt-svc", "clear", NGHTTP3_QPACK_TOKEN_ALT_SVC), - MAKE_STATIC_HD("authorization", "", NGHTTP3_QPACK_TOKEN_AUTHORIZATION), - MAKE_STATIC_HD("content-security-policy", - "script-src 'none'; object-src 'none'; base-uri 'none'", - NGHTTP3_QPACK_TOKEN_CONTENT_SECURITY_POLICY), - MAKE_STATIC_HD("early-data", "1", NGHTTP3_QPACK_TOKEN_EARLY_DATA), - MAKE_STATIC_HD("expect-ct", "", NGHTTP3_QPACK_TOKEN_EXPECT_CT), - MAKE_STATIC_HD("forwarded", "", NGHTTP3_QPACK_TOKEN_FORWARDED), - MAKE_STATIC_HD("if-range", "", NGHTTP3_QPACK_TOKEN_IF_RANGE), - MAKE_STATIC_HD("origin", "", NGHTTP3_QPACK_TOKEN_ORIGIN), - MAKE_STATIC_HD("purpose", "prefetch", NGHTTP3_QPACK_TOKEN_PURPOSE), - MAKE_STATIC_HD("server", "", NGHTTP3_QPACK_TOKEN_SERVER), - MAKE_STATIC_HD("timing-allow-origin", "*", - NGHTTP3_QPACK_TOKEN_TIMING_ALLOW_ORIGIN), - MAKE_STATIC_HD("upgrade-insecure-requests", "1", - NGHTTP3_QPACK_TOKEN_UPGRADE_INSECURE_REQUESTS), - MAKE_STATIC_HD("user-agent", "", NGHTTP3_QPACK_TOKEN_USER_AGENT), - MAKE_STATIC_HD("x-forwarded-for", "", NGHTTP3_QPACK_TOKEN_X_FORWARDED_FOR), - MAKE_STATIC_HD("x-frame-options", "deny", - NGHTTP3_QPACK_TOKEN_X_FRAME_OPTIONS), - MAKE_STATIC_HD("x-frame-options", "sameorigin", - NGHTTP3_QPACK_TOKEN_X_FRAME_OPTIONS), + MAKE_STATIC_HD(":authority", "", NGHTTP3_QPACK_TOKEN__AUTHORITY), + MAKE_STATIC_HD(":path", "/", NGHTTP3_QPACK_TOKEN__PATH), + MAKE_STATIC_HD("age", "0", NGHTTP3_QPACK_TOKEN_AGE), + MAKE_STATIC_HD("content-disposition", "", + NGHTTP3_QPACK_TOKEN_CONTENT_DISPOSITION), + MAKE_STATIC_HD("content-length", "0", NGHTTP3_QPACK_TOKEN_CONTENT_LENGTH), + MAKE_STATIC_HD("cookie", "", NGHTTP3_QPACK_TOKEN_COOKIE), + MAKE_STATIC_HD("date", "", NGHTTP3_QPACK_TOKEN_DATE), + MAKE_STATIC_HD("etag", "", NGHTTP3_QPACK_TOKEN_ETAG), + MAKE_STATIC_HD("if-modified-since", "", + NGHTTP3_QPACK_TOKEN_IF_MODIFIED_SINCE), + MAKE_STATIC_HD("if-none-match", "", NGHTTP3_QPACK_TOKEN_IF_NONE_MATCH), + MAKE_STATIC_HD("last-modified", "", NGHTTP3_QPACK_TOKEN_LAST_MODIFIED), + MAKE_STATIC_HD("link", "", NGHTTP3_QPACK_TOKEN_LINK), + MAKE_STATIC_HD("location", "", NGHTTP3_QPACK_TOKEN_LOCATION), + MAKE_STATIC_HD("referer", "", NGHTTP3_QPACK_TOKEN_REFERER), + MAKE_STATIC_HD("set-cookie", "", NGHTTP3_QPACK_TOKEN_SET_COOKIE), + MAKE_STATIC_HD(":method", "CONNECT", NGHTTP3_QPACK_TOKEN__METHOD), + MAKE_STATIC_HD(":method", "DELETE", NGHTTP3_QPACK_TOKEN__METHOD), + MAKE_STATIC_HD(":method", "GET", NGHTTP3_QPACK_TOKEN__METHOD), + MAKE_STATIC_HD(":method", "HEAD", NGHTTP3_QPACK_TOKEN__METHOD), + MAKE_STATIC_HD(":method", "OPTIONS", NGHTTP3_QPACK_TOKEN__METHOD), + MAKE_STATIC_HD(":method", "POST", NGHTTP3_QPACK_TOKEN__METHOD), + MAKE_STATIC_HD(":method", "PUT", NGHTTP3_QPACK_TOKEN__METHOD), + MAKE_STATIC_HD(":scheme", "http", NGHTTP3_QPACK_TOKEN__SCHEME), + MAKE_STATIC_HD(":scheme", "https", NGHTTP3_QPACK_TOKEN__SCHEME), + MAKE_STATIC_HD(":status", "103", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "200", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "304", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "404", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "503", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD("accept", "*/*", NGHTTP3_QPACK_TOKEN_ACCEPT), + MAKE_STATIC_HD("accept", "application/dns-message", + NGHTTP3_QPACK_TOKEN_ACCEPT), + MAKE_STATIC_HD("accept-encoding", "gzip, deflate, br", + NGHTTP3_QPACK_TOKEN_ACCEPT_ENCODING), + MAKE_STATIC_HD("accept-ranges", "bytes", NGHTTP3_QPACK_TOKEN_ACCEPT_RANGES), + MAKE_STATIC_HD("access-control-allow-headers", "cache-control", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS), + MAKE_STATIC_HD("access-control-allow-headers", "content-type", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS), + MAKE_STATIC_HD("access-control-allow-origin", "*", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_ORIGIN), + MAKE_STATIC_HD("cache-control", "max-age=0", + NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), + MAKE_STATIC_HD("cache-control", "max-age=2592000", + NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), + MAKE_STATIC_HD("cache-control", "max-age=604800", + NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), + MAKE_STATIC_HD("cache-control", "no-cache", + NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), + MAKE_STATIC_HD("cache-control", "no-store", + NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), + MAKE_STATIC_HD("cache-control", "public, max-age=31536000", + NGHTTP3_QPACK_TOKEN_CACHE_CONTROL), + MAKE_STATIC_HD("content-encoding", "br", + NGHTTP3_QPACK_TOKEN_CONTENT_ENCODING), + MAKE_STATIC_HD("content-encoding", "gzip", + NGHTTP3_QPACK_TOKEN_CONTENT_ENCODING), + MAKE_STATIC_HD("content-type", "application/dns-message", + NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "application/javascript", + NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "application/json", + NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "application/x-www-form-urlencoded", + NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "image/gif", NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "image/jpeg", + NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "image/png", NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "text/css", NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "text/html; charset=utf-8", + NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "text/plain", + NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("content-type", "text/plain;charset=utf-8", + NGHTTP3_QPACK_TOKEN_CONTENT_TYPE), + MAKE_STATIC_HD("range", "bytes=0-", NGHTTP3_QPACK_TOKEN_RANGE), + MAKE_STATIC_HD("strict-transport-security", "max-age=31536000", + NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY), + MAKE_STATIC_HD("strict-transport-security", + "max-age=31536000; includesubdomains", + NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY), + MAKE_STATIC_HD("strict-transport-security", + "max-age=31536000; includesubdomains; preload", + NGHTTP3_QPACK_TOKEN_STRICT_TRANSPORT_SECURITY), + MAKE_STATIC_HD("vary", "accept-encoding", NGHTTP3_QPACK_TOKEN_VARY), + MAKE_STATIC_HD("vary", "origin", NGHTTP3_QPACK_TOKEN_VARY), + MAKE_STATIC_HD("x-content-type-options", "nosniff", + NGHTTP3_QPACK_TOKEN_X_CONTENT_TYPE_OPTIONS), + MAKE_STATIC_HD("x-xss-protection", "1; mode=block", + NGHTTP3_QPACK_TOKEN_X_XSS_PROTECTION), + MAKE_STATIC_HD(":status", "100", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "204", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "206", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "302", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "400", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "403", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "421", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "425", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD(":status", "500", NGHTTP3_QPACK_TOKEN__STATUS), + MAKE_STATIC_HD("accept-language", "", NGHTTP3_QPACK_TOKEN_ACCEPT_LANGUAGE), + MAKE_STATIC_HD("access-control-allow-credentials", "FALSE", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_CREDENTIALS), + MAKE_STATIC_HD("access-control-allow-credentials", "TRUE", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_CREDENTIALS), + MAKE_STATIC_HD("access-control-allow-headers", "*", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_HEADERS), + MAKE_STATIC_HD("access-control-allow-methods", "get", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS), + MAKE_STATIC_HD("access-control-allow-methods", "get, post, options", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS), + MAKE_STATIC_HD("access-control-allow-methods", "options", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_ALLOW_METHODS), + MAKE_STATIC_HD("access-control-expose-headers", "content-length", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_EXPOSE_HEADERS), + MAKE_STATIC_HD("access-control-request-headers", "content-type", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_HEADERS), + MAKE_STATIC_HD("access-control-request-method", "get", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_METHOD), + MAKE_STATIC_HD("access-control-request-method", "post", + NGHTTP3_QPACK_TOKEN_ACCESS_CONTROL_REQUEST_METHOD), + MAKE_STATIC_HD("alt-svc", "clear", NGHTTP3_QPACK_TOKEN_ALT_SVC), + MAKE_STATIC_HD("authorization", "", NGHTTP3_QPACK_TOKEN_AUTHORIZATION), + MAKE_STATIC_HD("content-security-policy", + "script-src 'none'; object-src 'none'; base-uri 'none'", + NGHTTP3_QPACK_TOKEN_CONTENT_SECURITY_POLICY), + MAKE_STATIC_HD("early-data", "1", NGHTTP3_QPACK_TOKEN_EARLY_DATA), + MAKE_STATIC_HD("expect-ct", "", NGHTTP3_QPACK_TOKEN_EXPECT_CT), + MAKE_STATIC_HD("forwarded", "", NGHTTP3_QPACK_TOKEN_FORWARDED), + MAKE_STATIC_HD("if-range", "", NGHTTP3_QPACK_TOKEN_IF_RANGE), + MAKE_STATIC_HD("origin", "", NGHTTP3_QPACK_TOKEN_ORIGIN), + MAKE_STATIC_HD("purpose", "prefetch", NGHTTP3_QPACK_TOKEN_PURPOSE), + MAKE_STATIC_HD("server", "", NGHTTP3_QPACK_TOKEN_SERVER), + MAKE_STATIC_HD("timing-allow-origin", "*", + NGHTTP3_QPACK_TOKEN_TIMING_ALLOW_ORIGIN), + MAKE_STATIC_HD("upgrade-insecure-requests", "1", + NGHTTP3_QPACK_TOKEN_UPGRADE_INSECURE_REQUESTS), + MAKE_STATIC_HD("user-agent", "", NGHTTP3_QPACK_TOKEN_USER_AGENT), + MAKE_STATIC_HD("x-forwarded-for", "", NGHTTP3_QPACK_TOKEN_X_FORWARDED_FOR), + MAKE_STATIC_HD("x-frame-options", "deny", + NGHTTP3_QPACK_TOKEN_X_FRAME_OPTIONS), + MAKE_STATIC_HD("x-frame-options", "sameorigin", + NGHTTP3_QPACK_TOKEN_X_FRAME_OPTIONS), }; static int memeq(const void *s1, const void *s2, size_t n) { @@ -884,9 +879,9 @@ static void qpack_context_free(nghttp3_qpack_context *ctx) { static int ref_min_cnt_less(const nghttp3_pq_entry *lhsx, const nghttp3_pq_entry *rhsx) { nghttp3_qpack_header_block_ref *lhs = - nghttp3_struct_of(lhsx, nghttp3_qpack_header_block_ref, min_cnts_pe); + nghttp3_struct_of(lhsx, nghttp3_qpack_header_block_ref, min_cnts_pe); nghttp3_qpack_header_block_ref *rhs = - nghttp3_struct_of(rhsx, nghttp3_qpack_header_block_ref, min_cnts_pe); + nghttp3_struct_of(rhsx, nghttp3_qpack_header_block_ref, min_cnts_pe); return lhs->min_cnt < rhs->min_cnt; } @@ -926,6 +921,7 @@ int nghttp3_qpack_encoder_init(nghttp3_qpack_encoder *encoder, encoder->opcode = 0; encoder->min_dtable_update = SIZE_MAX; encoder->last_max_dtable_update = 0; + encoder->uninterrupted_decoderlen = 0; encoder->flags = NGHTTP3_QPACK_ENCODER_FLAG_NONE; nghttp3_qpack_read_state_reset(&encoder->rstate); @@ -943,16 +939,16 @@ static int map_stream_free(void *data, void *ptr) { void nghttp3_qpack_encoder_free(nghttp3_qpack_encoder *encoder) { nghttp3_pq_free(&encoder->min_cnts); nghttp3_ksl_free(&encoder->blocked_streams); - nghttp3_map_each_free(&encoder->streams, map_stream_free, - (void *)encoder->ctx.mem); + nghttp3_map_each(&encoder->streams, map_stream_free, + (void *)encoder->ctx.mem); nghttp3_map_free(&encoder->streams); qpack_context_free(&encoder->ctx); } void nghttp3_qpack_encoder_set_max_dtable_capacity( - nghttp3_qpack_encoder *encoder, size_t max_dtable_capacity) { - max_dtable_capacity = - nghttp3_min(max_dtable_capacity, encoder->ctx.hard_max_dtable_capacity); + nghttp3_qpack_encoder *encoder, size_t max_dtable_capacity) { + max_dtable_capacity = nghttp3_min_size(max_dtable_capacity, + encoder->ctx.hard_max_dtable_capacity); if (encoder->ctx.max_dtable_capacity == max_dtable_capacity) { return; @@ -968,7 +964,7 @@ void nghttp3_qpack_encoder_set_max_dtable_capacity( } void nghttp3_qpack_encoder_set_max_blocked_streams( - nghttp3_qpack_encoder *encoder, size_t max_blocked_streams) { + nghttp3_qpack_encoder *encoder, size_t max_blocked_streams) { encoder->ctx.max_blocked_streams = max_blocked_streams; } @@ -977,7 +973,7 @@ uint64_t nghttp3_qpack_encoder_get_min_cnt(nghttp3_qpack_encoder *encoder) { return nghttp3_struct_of(nghttp3_pq_top(&encoder->min_cnts), nghttp3_qpack_header_block_ref, min_cnts_pe) - ->min_cnt; + ->min_cnt; } void nghttp3_qpack_encoder_shrink_dtable(nghttp3_qpack_encoder *encoder) { @@ -1003,7 +999,7 @@ void nghttp3_qpack_encoder_shrink_dtable(nghttp3_qpack_encoder *encoder) { } encoder->ctx.dtable_size -= - table_space(ent->nv.name->len, ent->nv.value->len); + table_space(ent->nv.name->len, ent->nv.value->len); nghttp3_ringbuf_pop_back(dtable); qpack_map_remove(&encoder->dtable_map, ent); @@ -1088,8 +1084,8 @@ static void qpack_encoder_remove_stream(nghttp3_qpack_encoder *encoder, len = nghttp3_ringbuf_len(&stream->refs); for (i = 0; i < len; ++i) { - ref = *(nghttp3_qpack_header_block_ref **)nghttp3_ringbuf_get(&stream->refs, - i); + ref = + *(nghttp3_qpack_header_block_ref **)nghttp3_ringbuf_get(&stream->refs, i); assert(ref->min_cnts_pe.index != NGHTTP3_PQ_BAD_INDEX); @@ -1098,11 +1094,9 @@ static void qpack_encoder_remove_stream(nghttp3_qpack_encoder *encoder, } /* - * reserve_buf_internal ensures that |buf| contains at least - * |extra_size| of free space. In other words, if this function - * succeeds, nghttp3_buf_left(buf) >= extra_size holds. |min_size| is - * the minimum size of buffer. The allocated buffer has at least - * |min_size| bytes. + * reserve_buf ensures that |buf| contains at least |extra_size| of + * free space. In other words, if this function succeeds, + * nghttp3_buf_left(buf) >= extra_size holds. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -1110,31 +1104,37 @@ static void qpack_encoder_remove_stream(nghttp3_qpack_encoder *encoder, * NGHTTP3_ERR_NOMEM * Out of memory. */ -static int reserve_buf_internal(nghttp3_buf *buf, size_t extra_size, - size_t min_size, const nghttp3_mem *mem) { +static int reserve_buf(nghttp3_buf *buf, size_t extra_size, + const nghttp3_mem *mem) { size_t left = nghttp3_buf_left(buf); - size_t n = min_size, need; + size_t n = 32; if (left >= extra_size) { return 0; } - need = nghttp3_buf_cap(buf) + extra_size - left; - - for (; n < need; n *= 2) - ; + n = nghttp3_max_size(n, nghttp3_buf_cap(buf) + extra_size - left); - return nghttp3_buf_reserve(buf, n, mem); -} + /* Check whether we are requesting too much memory */ + if (n > (1u << 31)) { + return NGHTTP3_ERR_NOMEM; + } -static int reserve_buf_small(nghttp3_buf *buf, size_t extra_size, - const nghttp3_mem *mem) { - return reserve_buf_internal(buf, extra_size, 32, mem); -} +#ifndef WIN32 + n = 1u << (32 - __builtin_clz((uint32_t)n - 1)); +#else /* defined(WIN32) */ + /* Round up to the next highest power of 2 from Bit Twiddling + Hacks */ + --n; + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + ++n; +#endif /* defined(WIN32) */ -static int reserve_buf(nghttp3_buf *buf, size_t extra_size, - const nghttp3_mem *mem) { - return reserve_buf_internal(buf, extra_size, 32, mem); + return nghttp3_buf_reserve(buf, n, mem); } int nghttp3_qpack_encoder_encode(nghttp3_qpack_encoder *encoder, @@ -1162,10 +1162,10 @@ int nghttp3_qpack_encoder_encode(nghttp3_qpack_encoder *encoder, stream = nghttp3_qpack_encoder_find_stream(encoder, stream_id); blocked_stream = - stream && nghttp3_qpack_encoder_stream_is_blocked(encoder, stream); + stream && nghttp3_qpack_encoder_stream_is_blocked(encoder, stream); allow_blocking = - blocked_stream || encoder->ctx.max_blocked_streams > - nghttp3_ksl_len(&encoder->blocked_streams); + blocked_stream || encoder->ctx.max_blocked_streams > + nghttp3_ksl_len(&encoder->blocked_streams); DEBUGF("qpack::encode: stream %ld blocked=%d allow_blocking=%d\n", stream_id, blocked_stream, allow_blocking); @@ -1181,13 +1181,15 @@ int nghttp3_qpack_encoder_encode(nghttp3_qpack_encoder *encoder, nghttp3_qpack_encoder_write_field_section_prefix(encoder, pbuf, max_cnt, base); + encoder->uninterrupted_decoderlen = 0; + /* TODO If max_cnt == 0, no reference is made to dtable. */ if (!max_cnt) { return 0; } - rv = qpack_encoder_add_stream_ref(encoder, stream_id, stream, max_cnt, - min_cnt); + rv = + qpack_encoder_add_stream_ref(encoder, stream_id, stream, max_cnt, min_cnt); if (rv != 0) { goto fail; } @@ -1253,7 +1255,7 @@ int nghttp3_qpack_encoder_process_dtable_update(nghttp3_qpack_encoder *encoder, } rv = nghttp3_qpack_encoder_write_set_dtable_cap( - encoder, ebuf, encoder->last_max_dtable_update); + encoder, ebuf, encoder->last_max_dtable_update); if (rv != 0) { return rv; } @@ -1374,7 +1376,7 @@ static int qpack_encoder_can_index(nghttp3_qpack_encoder *encoder, size_t need, if (!nghttp3_pq_empty(&encoder->min_cnts)) { gmin_cnt = nghttp3_qpack_encoder_get_min_cnt(encoder); - min_cnt = nghttp3_min(min_cnt, gmin_cnt); + min_cnt = nghttp3_min_uint64(min_cnt, gmin_cnt); } if (min_cnt == UINT64_MAX) { @@ -1402,7 +1404,7 @@ static int qpack_encoder_can_index(nghttp3_qpack_encoder *encoder, size_t need, static int qpack_encoder_can_index_nv(nghttp3_qpack_encoder *encoder, const nghttp3_nv *nv, uint64_t min_cnt) { return qpack_encoder_can_index( - encoder, table_space(nv->namelen, nv->valuelen), min_cnt); + encoder, table_space(nv->namelen, nv->valuelen), min_cnt); } /* @@ -1415,10 +1417,10 @@ static int qpack_encoder_can_index_duplicate(nghttp3_qpack_encoder *encoder, uint64_t absidx, uint64_t min_cnt) { nghttp3_qpack_entry *ent = - nghttp3_qpack_context_dtable_get(&encoder->ctx, absidx); + nghttp3_qpack_context_dtable_get(&encoder->ctx, absidx); return qpack_encoder_can_index( - encoder, table_space(ent->nv.name->len, ent->nv.value->len), min_cnt); + encoder, table_space(ent->nv.name->len, ent->nv.value->len), min_cnt); } /* @@ -1428,7 +1430,7 @@ static int qpack_encoder_can_index_duplicate(nghttp3_qpack_encoder *encoder, static int qpack_context_check_draining(nghttp3_qpack_context *ctx, uint64_t absidx) { const size_t safe = ctx->max_dtable_capacity - - nghttp3_min(512, ctx->max_dtable_capacity * 1 / 8); + nghttp3_min_size(512, ctx->max_dtable_capacity * 1 / 8); nghttp3_qpack_entry *ent = nghttp3_qpack_context_dtable_get(ctx, absidx); return ctx->dtable_sum - ent->sum > safe; @@ -1483,11 +1485,10 @@ int nghttp3_qpack_encoder_encode_nv(nghttp3_qpack_encoder *encoder, } if (nghttp3_map_size(&encoder->streams) < NGHTTP3_QPACK_MAX_QPACK_STREAMS) { - dres = nghttp3_qpack_encoder_lookup_dtable(encoder, nv, token, hash, - indexing_mode, encoder->krcnt, - allow_blocking); - just_index = indexing_mode == NGHTTP3_QPACK_INDEXING_MODE_STORE && - dres.pb_index == -1; + dres = nghttp3_qpack_encoder_lookup_dtable( + encoder, nv, token, hash, indexing_mode, encoder->krcnt, allow_blocking); + just_index = + indexing_mode == NGHTTP3_QPACK_INDEXING_MODE_STORE && dres.pb_index == -1; } if (dres.index != -1 && dres.name_value_match) { @@ -1500,8 +1501,8 @@ int nghttp3_qpack_encoder_encode_nv(nghttp3_qpack_encoder *encoder, if (rv != 0) { return rv; } - rv = nghttp3_qpack_encoder_dtable_duplicate_add(encoder, - (size_t)dres.index); + rv = + nghttp3_qpack_encoder_dtable_duplicate_add(encoder, (size_t)dres.index); if (rv != 0) { return rv; } @@ -1509,11 +1510,11 @@ int nghttp3_qpack_encoder_encode_nv(nghttp3_qpack_encoder *encoder, new_ent = nghttp3_qpack_context_dtable_top(&encoder->ctx); dres.index = (nghttp3_ssize)new_ent->absidx; } - *pmax_cnt = nghttp3_max(*pmax_cnt, (size_t)(dres.index + 1)); - *pmin_cnt = nghttp3_min(*pmin_cnt, (size_t)(dres.index + 1)); + *pmax_cnt = nghttp3_max_uint64(*pmax_cnt, (uint64_t)(dres.index + 1)); + *pmin_cnt = nghttp3_min_uint64(*pmin_cnt, (uint64_t)(dres.index + 1)); return nghttp3_qpack_encoder_write_dynamic_indexed( - encoder, rbuf, (size_t)dres.index, base); + encoder, rbuf, (size_t)dres.index, base); } if (sres.index != -1) { @@ -1530,24 +1531,25 @@ int nghttp3_qpack_encoder_encode_nv(nghttp3_qpack_encoder *encoder, } if (allow_blocking) { new_ent = nghttp3_qpack_context_dtable_top(&encoder->ctx); - *pmax_cnt = nghttp3_max(*pmax_cnt, new_ent->absidx + 1); - *pmin_cnt = nghttp3_min(*pmin_cnt, new_ent->absidx + 1); + *pmax_cnt = nghttp3_max_uint64(*pmax_cnt, new_ent->absidx + 1); + *pmin_cnt = nghttp3_min_uint64(*pmin_cnt, new_ent->absidx + 1); return nghttp3_qpack_encoder_write_dynamic_indexed( - encoder, rbuf, new_ent->absidx, base); + encoder, rbuf, new_ent->absidx, base); } } return nghttp3_qpack_encoder_write_static_indexed_name( - encoder, rbuf, (size_t)sres.index, nv); + encoder, rbuf, (size_t)sres.index, nv); } if (dres.index != -1) { if (just_index && qpack_encoder_can_index_nv( - encoder, nv, - allow_blocking ? *pmin_cnt - : nghttp3_min((size_t)dres.index + 1, *pmin_cnt))) { + encoder, nv, + allow_blocking + ? *pmin_cnt + : nghttp3_min_uint64((uint64_t)dres.index + 1, *pmin_cnt))) { rv = nghttp3_qpack_encoder_write_dynamic_insert(encoder, ebuf, (size_t)dres.index, nv); if (rv != 0) { @@ -1555,7 +1557,7 @@ int nghttp3_qpack_encoder_encode_nv(nghttp3_qpack_encoder *encoder, } if (!allow_blocking) { - *pmin_cnt = nghttp3_min(*pmin_cnt, (size_t)dres.index + 1); + *pmin_cnt = nghttp3_min_uint64(*pmin_cnt, (uint64_t)dres.index + 1); } rv = nghttp3_qpack_encoder_dtable_dynamic_add(encoder, (size_t)dres.index, @@ -1566,19 +1568,19 @@ int nghttp3_qpack_encoder_encode_nv(nghttp3_qpack_encoder *encoder, if (allow_blocking) { new_ent = nghttp3_qpack_context_dtable_top(&encoder->ctx); - *pmax_cnt = nghttp3_max(*pmax_cnt, new_ent->absidx + 1); - *pmin_cnt = nghttp3_min(*pmin_cnt, new_ent->absidx + 1); + *pmax_cnt = nghttp3_max_uint64(*pmax_cnt, new_ent->absidx + 1); + *pmin_cnt = nghttp3_min_uint64(*pmin_cnt, new_ent->absidx + 1); return nghttp3_qpack_encoder_write_dynamic_indexed( - encoder, rbuf, new_ent->absidx, base); + encoder, rbuf, new_ent->absidx, base); } } - *pmax_cnt = nghttp3_max(*pmax_cnt, (size_t)(dres.index + 1)); - *pmin_cnt = nghttp3_min(*pmin_cnt, (size_t)(dres.index + 1)); + *pmax_cnt = nghttp3_max_uint64(*pmax_cnt, (uint64_t)(dres.index + 1)); + *pmin_cnt = nghttp3_min_uint64(*pmin_cnt, (uint64_t)(dres.index + 1)); return nghttp3_qpack_encoder_write_dynamic_indexed_name( - encoder, rbuf, (size_t)dres.index, base, nv); + encoder, rbuf, (size_t)dres.index, base, nv); } if (just_index && qpack_encoder_can_index_nv(encoder, nv, *pmin_cnt)) { @@ -1592,8 +1594,8 @@ int nghttp3_qpack_encoder_encode_nv(nghttp3_qpack_encoder *encoder, } if (allow_blocking) { new_ent = nghttp3_qpack_context_dtable_top(&encoder->ctx); - *pmax_cnt = nghttp3_max(*pmax_cnt, new_ent->absidx + 1); - *pmin_cnt = nghttp3_min(*pmin_cnt, new_ent->absidx + 1); + *pmax_cnt = nghttp3_max_uint64(*pmax_cnt, new_ent->absidx + 1); + *pmin_cnt = nghttp3_min_uint64(*pmin_cnt, new_ent->absidx + 1); return nghttp3_qpack_encoder_write_dynamic_indexed(encoder, rbuf, new_ent->absidx, base); @@ -1634,9 +1636,9 @@ nghttp3_qpack_lookup_stable(const nghttp3_nv *nv, int32_t token, } nghttp3_qpack_lookup_result nghttp3_qpack_encoder_lookup_dtable( - nghttp3_qpack_encoder *encoder, const nghttp3_nv *nv, int32_t token, - uint32_t hash, nghttp3_qpack_indexing_mode indexing_mode, uint64_t krcnt, - int allow_blocking) { + nghttp3_qpack_encoder *encoder, const nghttp3_nv *nv, int32_t token, + uint32_t hash, nghttp3_qpack_indexing_mode indexing_mode, uint64_t krcnt, + int allow_blocking) { nghttp3_qpack_lookup_result res = {-1, 0, -1}; int exact_match = 0; nghttp3_qpack_entry *match, *pb_match; @@ -1659,7 +1661,7 @@ int nghttp3_qpack_header_block_ref_new(nghttp3_qpack_header_block_ref **pref, uint64_t max_cnt, uint64_t min_cnt, const nghttp3_mem *mem) { nghttp3_qpack_header_block_ref *ref = - nghttp3_mem_malloc(mem, sizeof(nghttp3_qpack_header_block_ref)); + nghttp3_mem_malloc(mem, sizeof(nghttp3_qpack_header_block_ref)); if (ref == NULL) { return NGHTTP3_ERR_NOMEM; @@ -1683,9 +1685,9 @@ void nghttp3_qpack_header_block_ref_del(nghttp3_qpack_header_block_ref *ref, static int ref_max_cnt_greater(const nghttp3_pq_entry *lhsx, const nghttp3_pq_entry *rhsx) { const nghttp3_qpack_header_block_ref *lhs = - nghttp3_struct_of(lhsx, nghttp3_qpack_header_block_ref, max_cnts_pe); + nghttp3_struct_of(lhsx, nghttp3_qpack_header_block_ref, max_cnts_pe); const nghttp3_qpack_header_block_ref *rhs = - nghttp3_struct_of(rhsx, nghttp3_qpack_header_block_ref, max_cnts_pe); + nghttp3_struct_of(rhsx, nghttp3_qpack_header_block_ref, max_cnts_pe); return lhs->max_cnt > rhs->max_cnt; } @@ -1729,8 +1731,8 @@ void nghttp3_qpack_stream_del(nghttp3_qpack_stream *stream, len = nghttp3_ringbuf_len(&stream->refs); for (i = 0; i < len; ++i) { - ref = *(nghttp3_qpack_header_block_ref **)nghttp3_ringbuf_get(&stream->refs, - i); + ref = + *(nghttp3_qpack_header_block_ref **)nghttp3_ringbuf_get(&stream->refs, i); nghttp3_qpack_header_block_ref_del(ref, mem); } @@ -1776,7 +1778,7 @@ void nghttp3_qpack_stream_pop_ref(nghttp3_qpack_stream *stream) { assert(nghttp3_ringbuf_len(&stream->refs)); ref = - *(nghttp3_qpack_header_block_ref **)nghttp3_ringbuf_get(&stream->refs, 0); + *(nghttp3_qpack_header_block_ref **)nghttp3_ringbuf_get(&stream->refs, 0); assert(ref->max_cnts_pe.index != NGHTTP3_PQ_BAD_INDEX); @@ -1832,7 +1834,7 @@ static int qpack_encoder_write_indexed_name(nghttp3_qpack_encoder *encoder, int h = 0; hlen = nghttp3_qpack_huffman_encode_count(nv->value, nv->valuelen); - if (hlen < nv->valuelen) { + if (hlen * 4 < nv->valuelen * 3) { h = 1; len += nghttp3_qpack_put_varint_len(hlen, 7) + hlen; } else { @@ -1869,10 +1871,10 @@ static int qpack_encoder_write_indexed_name(nghttp3_qpack_encoder *encoder, } int nghttp3_qpack_encoder_write_static_indexed_name( - nghttp3_qpack_encoder *encoder, nghttp3_buf *rbuf, uint64_t absidx, - const nghttp3_nv *nv) { + nghttp3_qpack_encoder *encoder, nghttp3_buf *rbuf, uint64_t absidx, + const nghttp3_nv *nv) { uint8_t fb = - (uint8_t)(0x50 | ((nv->flags & NGHTTP3_NV_FLAG_NEVER_INDEX) ? 0x20 : 0)); + (uint8_t)(0x50 | ((nv->flags & NGHTTP3_NV_FLAG_NEVER_INDEX) ? 0x20 : 0)); DEBUGF("qpack::encode: Literal Field Line With Name Reference (static) " "absidx=%" PRIu64 " never=%d\n", @@ -1881,8 +1883,8 @@ int nghttp3_qpack_encoder_write_static_indexed_name( } int nghttp3_qpack_encoder_write_dynamic_indexed_name( - nghttp3_qpack_encoder *encoder, nghttp3_buf *rbuf, uint64_t absidx, - uint64_t base, const nghttp3_nv *nv) { + nghttp3_qpack_encoder *encoder, nghttp3_buf *rbuf, uint64_t absidx, + uint64_t base, const nghttp3_nv *nv) { uint8_t fb; DEBUGF("qpack::encode: Literal Field Line With Name Reference (dynamic) " @@ -1890,8 +1892,8 @@ int nghttp3_qpack_encoder_write_dynamic_indexed_name( absidx, base, (nv->flags & NGHTTP3_NV_FLAG_NEVER_INDEX) != 0); if (absidx < base) { - fb = (uint8_t)(0x40 | - ((nv->flags & NGHTTP3_NV_FLAG_NEVER_INDEX) ? 0x20 : 0)); + fb = + (uint8_t)(0x40 | ((nv->flags & NGHTTP3_NV_FLAG_NEVER_INDEX) ? 0x20 : 0)); return qpack_encoder_write_indexed_name(encoder, rbuf, fb, base - absidx - 1, 4, nv); } @@ -1923,7 +1925,7 @@ static int qpack_encoder_write_literal(nghttp3_qpack_encoder *encoder, int nh = 0, vh = 0; nhlen = nghttp3_qpack_huffman_encode_count(nv->name, nv->namelen); - if (nhlen < nv->namelen) { + if (nhlen * 4 < nv->namelen * 3) { nh = 1; len = nghttp3_qpack_put_varint_len(nhlen, prefix) + nhlen; } else { @@ -1931,7 +1933,7 @@ static int qpack_encoder_write_literal(nghttp3_qpack_encoder *encoder, } vhlen = nghttp3_qpack_huffman_encode_count(nv->value, nv->valuelen); - if (vhlen < nv->valuelen) { + if (vhlen * 4 < nv->valuelen * 3) { vh = 1; len += nghttp3_qpack_put_varint_len(vhlen, 7) + vhlen; } else { @@ -1981,7 +1983,7 @@ int nghttp3_qpack_encoder_write_literal(nghttp3_qpack_encoder *encoder, nghttp3_buf *rbuf, const nghttp3_nv *nv) { uint8_t fb = - (uint8_t)(0x20 | ((nv->flags & NGHTTP3_NV_FLAG_NEVER_INDEX) ? 0x10 : 0)); + (uint8_t)(0x20 | ((nv->flags & NGHTTP3_NV_FLAG_NEVER_INDEX) ? 0x10 : 0)); DEBUGF("qpack::encode: Literal Field Line With Literal Name\n"); return qpack_encoder_write_literal(encoder, rbuf, fb, 3, nv); @@ -2005,7 +2007,7 @@ int nghttp3_qpack_encoder_write_dynamic_insert(nghttp3_qpack_encoder *encoder, "\n", absidx); return qpack_encoder_write_indexed_name( - encoder, ebuf, 0x80, encoder->ctx.next_absidx - absidx - 1, 6, nv); + encoder, ebuf, 0x80, encoder->ctx.next_absidx - absidx - 1, 6, nv); } int nghttp3_qpack_encoder_write_duplicate_insert(nghttp3_qpack_encoder *encoder, @@ -2257,10 +2259,10 @@ void nghttp3_qpack_entry_free(nghttp3_qpack_entry *ent) { int nghttp3_qpack_encoder_block_stream(nghttp3_qpack_encoder *encoder, nghttp3_qpack_stream *stream) { nghttp3_blocked_streams_key bsk = { - nghttp3_struct_of(nghttp3_pq_top(&stream->max_cnts), - nghttp3_qpack_header_block_ref, max_cnts_pe) - ->max_cnt, - (uint64_t)stream->stream_id}; + nghttp3_struct_of(nghttp3_pq_top(&stream->max_cnts), + nghttp3_qpack_header_block_ref, max_cnts_pe) + ->max_cnt, + (uint64_t)stream->stream_id}; return nghttp3_ksl_insert(&encoder->blocked_streams, NULL, &bsk, stream); } @@ -2268,10 +2270,10 @@ int nghttp3_qpack_encoder_block_stream(nghttp3_qpack_encoder *encoder, void nghttp3_qpack_encoder_unblock_stream(nghttp3_qpack_encoder *encoder, nghttp3_qpack_stream *stream) { nghttp3_blocked_streams_key bsk = { - nghttp3_struct_of(nghttp3_pq_top(&stream->max_cnts), - nghttp3_qpack_header_block_ref, max_cnts_pe) - ->max_cnt, - (uint64_t)stream->stream_id}; + nghttp3_struct_of(nghttp3_pq_top(&stream->max_cnts), + nghttp3_qpack_header_block_ref, max_cnts_pe) + ->max_cnt, + (uint64_t)stream->stream_id}; nghttp3_ksl_it it; /* This is purely debugging purpose only */ @@ -2299,7 +2301,7 @@ void nghttp3_qpack_encoder_unblock(nghttp3_qpack_encoder *encoder, int nghttp3_qpack_encoder_ack_header(nghttp3_qpack_encoder *encoder, int64_t stream_id) { nghttp3_qpack_stream *stream = - nghttp3_qpack_encoder_find_stream(encoder, stream_id); + nghttp3_qpack_encoder_find_stream(encoder, stream_id); const nghttp3_mem *mem = encoder->ctx.mem; nghttp3_qpack_header_block_ref *ref; @@ -2310,7 +2312,7 @@ int nghttp3_qpack_encoder_ack_header(nghttp3_qpack_encoder *encoder, assert(nghttp3_ringbuf_len(&stream->refs)); ref = - *(nghttp3_qpack_header_block_ref **)nghttp3_ringbuf_get(&stream->refs, 0); + *(nghttp3_qpack_header_block_ref **)nghttp3_ringbuf_get(&stream->refs, 0); DEBUGF("qpack::encoder: Header acknowledgement stream=%ld ricnt=%" PRIu64 " krcnt=%" PRIu64 "\n", @@ -2357,15 +2359,15 @@ void nghttp3_qpack_encoder_ack_everything(nghttp3_qpack_encoder *encoder) { nghttp3_ksl_clear(&encoder->blocked_streams); nghttp3_pq_clear(&encoder->min_cnts); - nghttp3_map_each_free(&encoder->streams, map_stream_free, - (void *)encoder->ctx.mem); + nghttp3_map_each(&encoder->streams, map_stream_free, + (void *)encoder->ctx.mem); nghttp3_map_clear(&encoder->streams); } void nghttp3_qpack_encoder_cancel_stream(nghttp3_qpack_encoder *encoder, int64_t stream_id) { nghttp3_qpack_stream *stream = - nghttp3_qpack_encoder_find_stream(encoder, stream_id); + nghttp3_qpack_encoder_find_stream(encoder, stream_id); const nghttp3_mem *mem = encoder->ctx.mem; if (stream == NULL) { @@ -2387,10 +2389,10 @@ nghttp3_qpack_encoder_get_num_blocked_streams(nghttp3_qpack_encoder *encoder) { } int nghttp3_qpack_encoder_write_field_section_prefix( - nghttp3_qpack_encoder *encoder, nghttp3_buf *pbuf, uint64_t ricnt, - uint64_t base) { + nghttp3_qpack_encoder *encoder, nghttp3_buf *pbuf, uint64_t ricnt, + uint64_t base) { size_t max_ents = - encoder->ctx.hard_max_dtable_capacity / NGHTTP3_QPACK_ENTRY_OVERHEAD; + encoder->ctx.hard_max_dtable_capacity / NGHTTP3_QPACK_ENTRY_OVERHEAD; uint64_t encricnt = ricnt == 0 ? 0 : (ricnt % (2 * max_ents)) + 1; int sign = base < ricnt; uint64_t delta_base = sign ? ricnt - base - 1 : base - ricnt; @@ -2523,20 +2525,31 @@ nghttp3_ssize nghttp3_qpack_encoder_read_decoder(nghttp3_qpack_encoder *encoder, return 0; } + encoder->uninterrupted_decoderlen += srclen; + if (encoder->uninterrupted_decoderlen > NGHTTP3_QPACK_MAX_DECODERLEN) { + return NGHTTP3_ERR_QPACK_DECODER_STREAM_ERROR; + } + end = src + srclen; for (; p != end;) { switch (encoder->state) { case NGHTTP3_QPACK_DS_STATE_OPCODE: - if ((*p) & 0x80) { + switch ((*p) & 0xc0) { + case 0x80: + case 0xc0: DEBUGF("qpack::encode: OPCODE_SECTION_ACK\n"); encoder->opcode = NGHTTP3_QPACK_DS_OPCODE_SECTION_ACK; encoder->rstate.prefix = 7; - } else if ((*p) & 0x40) { + + break; + case 0x40: DEBUGF("qpack::encode: OPCODE_STREAM_CANCEL\n"); encoder->opcode = NGHTTP3_QPACK_DS_OPCODE_STREAM_CANCEL; encoder->rstate.prefix = 6; - } else { + + break; + default: DEBUGF("qpack::encode: OPCODE_ICNT_INCREMENT\n"); encoder->opcode = NGHTTP3_QPACK_DS_OPCODE_ICNT_INCREMENT; encoder->rstate.prefix = 6; @@ -2670,6 +2683,7 @@ int nghttp3_qpack_decoder_init(nghttp3_qpack_decoder *decoder, decoder->opcode = 0; decoder->written_icnt = 0; decoder->max_concurrent_streams = 0; + decoder->uninterrupted_encoderlen = 0; nghttp3_qpack_read_state_reset(&decoder->rstate); nghttp3_buf_init(&decoder->dbuf); @@ -2726,7 +2740,7 @@ static nghttp3_ssize qpack_read_string(nghttp3_qpack_read_state *rstate, nghttp3_buf *dest, const uint8_t *begin, const uint8_t *end) { size_t len = (size_t)(end - begin); - size_t n = (size_t)nghttp3_min((uint64_t)len, rstate->left); + size_t n = (size_t)nghttp3_min_uint64((uint64_t)len, rstate->left); dest->last = nghttp3_cpymem(dest->last, begin, n); @@ -2747,10 +2761,10 @@ static int qpack_decoder_validate_index(nghttp3_qpack_decoder *decoder, nghttp3_qpack_read_state *rstate) { if (rstate->dynamic) { return rstate->absidx < decoder->ctx.next_absidx && - decoder->ctx.next_absidx - rstate->absidx - 1 < - nghttp3_ringbuf_len(&decoder->ctx.dtable) - ? 0 - : NGHTTP3_ERR_QPACK_FATAL; + decoder->ctx.next_absidx - rstate->absidx - 1 < + nghttp3_ringbuf_len(&decoder->ctx.dtable) + ? 0 + : NGHTTP3_ERR_QPACK_FATAL; } return rstate->absidx < nghttp3_arraylen(stable) ? 0 : NGHTTP3_ERR_QPACK_FATAL; @@ -2789,39 +2803,51 @@ nghttp3_ssize nghttp3_qpack_decoder_read_encoder(nghttp3_qpack_decoder *decoder, return 0; } + decoder->uninterrupted_encoderlen += srclen; + if (decoder->uninterrupted_encoderlen > NGHTTP3_QPACK_MAX_ENCODERLEN) { + return NGHTTP3_ERR_QPACK_ENCODER_STREAM_ERROR; + } + end = src + srclen; for (; p != end || busy;) { busy = 0; switch (decoder->state) { case NGHTTP3_QPACK_ES_STATE_OPCODE: - if ((*p) & 0x80) { + switch ((*p) & 0xe0) { + case 0x80: + case 0xa0: + case 0xc0: + case 0xe0: DEBUGF("qpack::decode: OPCODE_INSERT_INDEXED\n"); decoder->opcode = NGHTTP3_QPACK_ES_OPCODE_INSERT_INDEXED; decoder->rstate.dynamic = !((*p) & 0x40); decoder->rstate.prefix = 6; decoder->state = NGHTTP3_QPACK_ES_STATE_READ_INDEX; - } else if ((*p) & 0x40) { + + break; + case 0x40: + case 0x60: DEBUGF("qpack::decode: OPCODE_INSERT\n"); decoder->opcode = NGHTTP3_QPACK_ES_OPCODE_INSERT; decoder->rstate.dynamic = 0; decoder->rstate.prefix = 5; decoder->state = NGHTTP3_QPACK_ES_STATE_CHECK_NAME_HUFFMAN; - } else if ((*p) & 0x20) { + + break; + case 0x20: DEBUGF("qpack::decode: OPCODE_SET_DTABLE_TABLE_CAP\n"); decoder->opcode = NGHTTP3_QPACK_ES_OPCODE_SET_DTABLE_CAP; decoder->rstate.prefix = 5; decoder->state = NGHTTP3_QPACK_ES_STATE_READ_INDEX; - } else if (!((*p) & 0x20)) { + + break; + default: DEBUGF("qpack::decode: OPCODE_DUPLICATE\n"); decoder->opcode = NGHTTP3_QPACK_ES_OPCODE_DUPLICATE; decoder->rstate.dynamic = 1; decoder->rstate.prefix = 5; decoder->state = NGHTTP3_QPACK_ES_STATE_READ_INDEX; - } else { - DEBUGF("qpack::decode: unknown opcode %02x\n", *p); - rv = NGHTTP3_ERR_QPACK_ENCODER_STREAM_ERROR; - goto fail; } break; case NGHTTP3_QPACK_ES_STATE_READ_INDEX: @@ -2842,7 +2868,7 @@ nghttp3_ssize nghttp3_qpack_decoder_read_encoder(nghttp3_qpack_decoder *decoder, DEBUGF("qpack::decode: Set dtable capacity to %" PRIu64 "\n", decoder->rstate.left); rv = nghttp3_qpack_decoder_set_max_dtable_capacity( - decoder, (size_t)decoder->rstate.left); + decoder, (size_t)decoder->rstate.left); if (rv != 0) { rv = NGHTTP3_ERR_QPACK_ENCODER_STREAM_ERROR; goto fail; @@ -2944,7 +2970,7 @@ nghttp3_ssize nghttp3_qpack_decoder_read_encoder(nghttp3_qpack_decoder *decoder, break; case NGHTTP3_QPACK_ES_STATE_READ_NAME: nread = - qpack_read_string(&decoder->rstate, &decoder->rstate.namebuf, p, end); + qpack_read_string(&decoder->rstate, &decoder->rstate.namebuf, p, end); if (nread < 0) { rv = (int)nread; goto fail; @@ -3042,8 +3068,8 @@ nghttp3_ssize nghttp3_qpack_decoder_read_encoder(nghttp3_qpack_decoder *decoder, nghttp3_qpack_read_state_reset(&decoder->rstate); break; case NGHTTP3_QPACK_ES_STATE_READ_VALUE: - nread = qpack_read_string(&decoder->rstate, &decoder->rstate.valuebuf, p, - end); + nread = + qpack_read_string(&decoder->rstate, &decoder->rstate.valuebuf, p, end); if (nread < 0) { rv = (int)nread; goto fail; @@ -3085,7 +3111,7 @@ nghttp3_ssize nghttp3_qpack_decoder_read_encoder(nghttp3_qpack_decoder *decoder, } int nghttp3_qpack_decoder_set_max_dtable_capacity( - nghttp3_qpack_decoder *decoder, size_t max_dtable_capacity) { + nghttp3_qpack_decoder *decoder, size_t max_dtable_capacity) { nghttp3_qpack_entry *ent; size_t i; nghttp3_qpack_context *ctx = &decoder->ctx; @@ -3230,9 +3256,9 @@ int nghttp3_qpack_decoder_dtable_literal_add(nghttp3_qpack_decoder *decoder) { } void nghttp3_qpack_decoder_set_max_concurrent_streams( - nghttp3_qpack_decoder *decoder, size_t max_concurrent_streams) { + nghttp3_qpack_decoder *decoder, size_t max_concurrent_streams) { decoder->max_concurrent_streams = - nghttp3_max(decoder->max_concurrent_streams, max_concurrent_streams); + nghttp3_max_size(decoder->max_concurrent_streams, max_concurrent_streams); } void nghttp3_qpack_stream_context_init(nghttp3_qpack_stream_context *sctx, @@ -3357,33 +3383,53 @@ nghttp3_qpack_decoder_read_request(nghttp3_qpack_decoder *decoder, case NGHTTP3_QPACK_RS_STATE_OPCODE: assert(sctx->rstate.left == 0); assert(sctx->rstate.shift == 0); - if ((*p) & 0x80) { + switch ((*p) & 0xf0) { + case 0x80: + case 0x90: + case 0xa0: + case 0xb0: + case 0xc0: + case 0xd0: + case 0xe0: + case 0xf0: DEBUGF("qpack::decode: OPCODE_INDEXED\n"); sctx->opcode = NGHTTP3_QPACK_RS_OPCODE_INDEXED; sctx->rstate.dynamic = !((*p) & 0x40); sctx->rstate.prefix = 6; sctx->state = NGHTTP3_QPACK_RS_STATE_READ_INDEX; - } else if ((*p) & 0x40) { + + break; + case 0x40: + case 0x50: + case 0x60: + case 0x70: DEBUGF("qpack::decode: OPCODE_INDEXED_NAME\n"); sctx->opcode = NGHTTP3_QPACK_RS_OPCODE_INDEXED_NAME; sctx->rstate.never = (*p) & 0x20; sctx->rstate.dynamic = !((*p) & 0x10); sctx->rstate.prefix = 4; sctx->state = NGHTTP3_QPACK_RS_STATE_READ_INDEX; - } else if ((*p) & 0x20) { + + break; + case 0x20: + case 0x30: DEBUGF("qpack::decode: OPCODE_LITERAL\n"); sctx->opcode = NGHTTP3_QPACK_RS_OPCODE_LITERAL; sctx->rstate.never = (*p) & 0x10; sctx->rstate.dynamic = 0; sctx->rstate.prefix = 3; sctx->state = NGHTTP3_QPACK_RS_STATE_CHECK_NAME_HUFFMAN; - } else if ((*p) & 0x10) { + + break; + case 0x10: DEBUGF("qpack::decode: OPCODE_INDEXED_PB\n"); sctx->opcode = NGHTTP3_QPACK_RS_OPCODE_INDEXED_PB; sctx->rstate.dynamic = 1; sctx->rstate.prefix = 4; sctx->state = NGHTTP3_QPACK_RS_STATE_READ_INDEX; - } else { + + break; + default: DEBUGF("qpack::decode: OPCODE_INDEXED_NAME_PB\n"); sctx->opcode = NGHTTP3_QPACK_RS_OPCODE_INDEXED_NAME_PB; sctx->rstate.never = (*p) & 0x08; @@ -3494,8 +3540,8 @@ nghttp3_qpack_decoder_read_request(nghttp3_qpack_decoder *decoder, sctx->rstate.name->len); break; case NGHTTP3_QPACK_RS_STATE_READ_NAME_HUFFMAN: - nread = qpack_read_huffman_string(&sctx->rstate, &sctx->rstate.namebuf, p, - end); + nread = + qpack_read_huffman_string(&sctx->rstate, &sctx->rstate.namebuf, p, end); if (nread < 0) { assert(NGHTTP3_ERR_QPACK_FATAL == nread); rv = NGHTTP3_ERR_QPACK_DECOMPRESSION_FAILED; @@ -3679,6 +3725,8 @@ nghttp3_qpack_decoder_read_request(nghttp3_qpack_decoder *decoder, goto fail; } } + + decoder->uninterrupted_encoderlen = 0; } return p - src; @@ -3689,13 +3737,13 @@ nghttp3_qpack_decoder_read_request(nghttp3_qpack_decoder *decoder, } static int qpack_decoder_dbuf_overflow(nghttp3_qpack_decoder *decoder) { - size_t limit = nghttp3_max(decoder->max_concurrent_streams, 100); + size_t limit = nghttp3_max_size(decoder->max_concurrent_streams, 100); /* 10 = nghttp3_qpack_put_varint_len((1ULL << 62) - 1, 2)) */ return nghttp3_buf_len(&decoder->dbuf) > limit * 2 * 10; } int nghttp3_qpack_decoder_write_section_ack( - nghttp3_qpack_decoder *decoder, const nghttp3_qpack_stream_context *sctx) { + nghttp3_qpack_decoder *decoder, const nghttp3_qpack_stream_context *sctx) { nghttp3_buf *dbuf = &decoder->dbuf; uint8_t *p; int rv; @@ -3704,9 +3752,9 @@ int nghttp3_qpack_decoder_write_section_ack( return NGHTTP3_ERR_QPACK_FATAL; } - rv = reserve_buf_small( - dbuf, nghttp3_qpack_put_varint_len((uint64_t)sctx->stream_id, 7), - decoder->ctx.mem); + rv = reserve_buf(dbuf, + nghttp3_qpack_put_varint_len((uint64_t)sctx->stream_id, 7), + decoder->ctx.mem); if (rv != 0) { return rv; } @@ -3798,7 +3846,7 @@ int nghttp3_qpack_decoder_reconstruct_ricnt(nghttp3_qpack_decoder *decoder, } max_ents = - decoder->ctx.hard_max_dtable_capacity / NGHTTP3_QPACK_ENTRY_OVERHEAD; + decoder->ctx.hard_max_dtable_capacity / NGHTTP3_QPACK_ENTRY_OVERHEAD; full = 2 * max_ents; if (encricnt > full) { @@ -3910,7 +3958,7 @@ qpack_decoder_emit_dynamic_indexed(nghttp3_qpack_decoder *decoder, nghttp3_qpack_stream_context *sctx, nghttp3_qpack_nv *nv) { nghttp3_qpack_entry *ent = - nghttp3_qpack_context_dtable_get(&decoder->ctx, sctx->rstate.absidx); + nghttp3_qpack_context_dtable_get(&decoder->ctx, sctx->rstate.absidx); *nv = ent->nv; @@ -3942,7 +3990,7 @@ qpack_decoder_emit_static_indexed_name(nghttp3_qpack_decoder *decoder, nv->value = sctx->rstate.value; nv->token = shd->token; nv->flags = - sctx->rstate.never ? NGHTTP3_NV_FLAG_NEVER_INDEX : NGHTTP3_NV_FLAG_NONE; + sctx->rstate.never ? NGHTTP3_NV_FLAG_NEVER_INDEX : NGHTTP3_NV_FLAG_NONE; sctx->rstate.value = NULL; } @@ -3965,7 +4013,7 @@ qpack_decoder_emit_dynamic_indexed_name(nghttp3_qpack_decoder *decoder, nv->value = sctx->rstate.value; nv->token = ent->nv.token; nv->flags = - sctx->rstate.never ? NGHTTP3_NV_FLAG_NEVER_INDEX : NGHTTP3_NV_FLAG_NONE; + sctx->rstate.never ? NGHTTP3_NV_FLAG_NEVER_INDEX : NGHTTP3_NV_FLAG_NONE; nghttp3_rcbuf_incref(nv->name); @@ -4005,7 +4053,7 @@ void nghttp3_qpack_decoder_emit_literal(nghttp3_qpack_decoder *decoder, nv->value = sctx->rstate.value; nv->token = qpack_lookup_token(nv->name->base, nv->name->len); nv->flags = - sctx->rstate.never ? NGHTTP3_NV_FLAG_NEVER_INDEX : NGHTTP3_NV_FLAG_NONE; + sctx->rstate.never ? NGHTTP3_NV_FLAG_NEVER_INDEX : NGHTTP3_NV_FLAG_NONE; sctx->rstate.name = NULL; sctx->rstate.value = NULL; diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack.h b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack.h index 804969e14d6091..d2bb8a3581135b 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -48,6 +48,14 @@ /* NGHTTP3_QPACK_MAX_VALUELEN is the maximum (compressed) length of header value this library can decode. */ #define NGHTTP3_QPACK_MAX_VALUELEN 65536 +/* NGHTTP3_QPACK_MAX_ENCODERLEN is the maximum encoder stream length + that a decoder accepts without completely processing a single field + section. */ +#define NGHTTP3_QPACK_MAX_ENCODERLEN (128 * 1024) +/* NGHTTP3_QPACK_MAX_DECODERLEN is the maximum decoder stream length + that an encoder accepts without completely encoding a single field + section. */ +#define NGHTTP3_QPACK_MAX_DECODERLEN (4 * 1024) /* nghttp3_qpack_indexing_mode is a indexing strategy. */ typedef enum nghttp3_qpack_indexing_mode { @@ -250,6 +258,9 @@ struct nghttp3_qpack_encoder { /* last_max_dtable_update is the dynamic table size last requested. */ size_t last_max_dtable_update; + /* uninterrupted_decoderlen is the number of bytes read from decoder + stream without encoding a single field section. */ + size_t uninterrupted_decoderlen; /* flags is bitwise OR of zero or more of NGHTTP3_QPACK_ENCODER_FLAG_*. */ uint8_t flags; @@ -325,9 +336,9 @@ nghttp3_qpack_lookup_stable(const nghttp3_nv *nv, int32_t token, * blocked (or it has been blocked already). */ nghttp3_qpack_lookup_result nghttp3_qpack_encoder_lookup_dtable( - nghttp3_qpack_encoder *encoder, const nghttp3_nv *nv, int32_t token, - uint32_t hash, nghttp3_qpack_indexing_mode indexing_mode, uint64_t krcnt, - int allow_blocking); + nghttp3_qpack_encoder *encoder, const nghttp3_nv *nv, int32_t token, + uint32_t hash, nghttp3_qpack_indexing_mode indexing_mode, uint64_t krcnt, + int allow_blocking); /* * nghttp3_qpack_encoder_write_field_section_prefix writes Encoded @@ -341,8 +352,8 @@ nghttp3_qpack_lookup_result nghttp3_qpack_encoder_lookup_dtable( * Out of memory. */ int nghttp3_qpack_encoder_write_field_section_prefix( - nghttp3_qpack_encoder *encoder, nghttp3_buf *pbuf, uint64_t ricnt, - uint64_t base); + nghttp3_qpack_encoder *encoder, nghttp3_buf *pbuf, uint64_t ricnt, + uint64_t base); /* * nghttp3_qpack_encoder_write_static_indexed writes Indexed Header @@ -386,8 +397,8 @@ int nghttp3_qpack_encoder_write_dynamic_indexed(nghttp3_qpack_encoder *encoder, * Out of memory. */ int nghttp3_qpack_encoder_write_static_indexed_name( - nghttp3_qpack_encoder *encoder, nghttp3_buf *rbuf, uint64_t absidx, - const nghttp3_nv *nv); + nghttp3_qpack_encoder *encoder, nghttp3_buf *rbuf, uint64_t absidx, + const nghttp3_nv *nv); /* * nghttp3_qpack_encoder_write_dynamic_indexed writes Literal Header @@ -402,8 +413,8 @@ int nghttp3_qpack_encoder_write_static_indexed_name( * Out of memory. */ int nghttp3_qpack_encoder_write_dynamic_indexed_name( - nghttp3_qpack_encoder *encoder, nghttp3_buf *rbuf, uint64_t absidx, - uint64_t base, const nghttp3_nv *nv); + nghttp3_qpack_encoder *encoder, nghttp3_buf *rbuf, uint64_t absidx, + uint64_t base, const nghttp3_nv *nv); /* * nghttp3_qpack_encoder_write_literal writes Literal Header Field @@ -779,6 +790,9 @@ struct nghttp3_qpack_decoder { unidirectional streams which potentially receives QPACK encoded HEADER frame. */ size_t max_concurrent_streams; + /* uninterrupted_encoderlen is the number of bytes read from encoder + stream without completing a single field section. */ + size_t uninterrupted_encoderlen; }; /* @@ -880,19 +894,19 @@ int nghttp3_qpack_decoder_dtable_duplicate_add(nghttp3_qpack_decoder *decoder); int nghttp3_qpack_decoder_dtable_literal_add(nghttp3_qpack_decoder *decoder); struct nghttp3_qpack_stream_context { - /* state is a current state of reading request stream. */ - nghttp3_qpack_request_stream_state state; /* rstate is a set of intermediate state which are used to process request stream. */ nghttp3_qpack_read_state rstate; const nghttp3_mem *mem; - /* opcode is a request stream opcode being processed. */ - nghttp3_qpack_request_stream_opcode opcode; int64_t stream_id; /* ricnt is Required Insert Count to decode this header block. */ uint64_t ricnt; /* base is Base in Header Block Prefix. */ uint64_t base; + /* state is a current state of reading request stream. */ + nghttp3_qpack_request_stream_state state; + /* opcode is a request stream opcode being processed. */ + nghttp3_qpack_request_stream_opcode opcode; /* dbase_sign is the delta base sign in Header Block Prefix. */ int dbase_sign; }; @@ -991,6 +1005,6 @@ void nghttp3_qpack_decoder_emit_literal(nghttp3_qpack_decoder *decoder, * Decoder stream overflow. */ int nghttp3_qpack_decoder_write_section_ack( - nghttp3_qpack_decoder *decoder, const nghttp3_qpack_stream_context *sctx); + nghttp3_qpack_decoder *decoder, const nghttp3_qpack_stream_context *sctx); -#endif /* NGHTTP3_QPACK_H */ +#endif /* !defined(NGHTTP3_QPACK_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman.c b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman.c index c36a68ededd1af..3398f3f5080e60 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman.c @@ -78,7 +78,7 @@ uint8_t *nghttp3_qpack_huffman_encode(uint8_t *dest, const uint8_t *src, } void nghttp3_qpack_huffman_decode_context_init( - nghttp3_qpack_huffman_decode_context *ctx) { + nghttp3_qpack_huffman_decode_context *ctx) { ctx->fstate = NGHTTP3_QPACK_HUFFMAN_ACCEPTED; } @@ -93,7 +93,9 @@ nghttp3_qpack_huffman_decode(nghttp3_qpack_huffman_decode_context *ctx, uint8_t c; /* We use the decoding algorithm described in - http://graphics.ics.uci.edu/pub/Prefix.pdf */ + - http://graphics.ics.uci.edu/pub/Prefix.pdf [!!! NO LONGER VALID !!!] + - https://ics.uci.edu/~dan/pubs/Prefix.pdf + - https://github.com/nghttp2/nghttp2/files/15141264/Prefix.pdf */ for (; src != end;) { c = *src++; t = &qpack_huffman_decode_table[t->fstate & 0x1ff][c >> 4]; @@ -117,6 +119,6 @@ nghttp3_qpack_huffman_decode(nghttp3_qpack_huffman_decode_context *ctx, } int nghttp3_qpack_huffman_decode_failure_state( - nghttp3_qpack_huffman_decode_context *ctx) { + nghttp3_qpack_huffman_decode_context *ctx) { return ctx->fstate == 0x100; } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman.h b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman.h index fc3bc7b264a900..ab6d82a16a9134 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -75,7 +75,7 @@ typedef struct nghttp3_qpack_huffman_decode_context { extern const nghttp3_qpack_huffman_decode_node qpack_huffman_decode_table[][16]; void nghttp3_qpack_huffman_decode_context_init( - nghttp3_qpack_huffman_decode_context *ctx); + nghttp3_qpack_huffman_decode_context *ctx); /* * nghttp3_qpack_huffman_decode decodes huffman encoded byte string @@ -103,6 +103,6 @@ nghttp3_qpack_huffman_decode(nghttp3_qpack_huffman_decode_context *ctx, * indicates that huffman decoding context is in failure state. */ int nghttp3_qpack_huffman_decode_failure_state( - nghttp3_qpack_huffman_decode_context *ctx); + nghttp3_qpack_huffman_decode_context *ctx); -#endif /* NGHTTP3_QPACK_HUFFMAN_H */ +#endif /* !defined(NGHTTP3_QPACK_HUFFMAN_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman_data.c b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman_data.c index 0c104dbc0a0bd8..7c3c230f041211 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman_data.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_qpack_huffman_data.c @@ -28,4954 +28,4954 @@ /* Generated by mkhufftbl.py */ const nghttp3_qpack_huffman_sym huffman_sym_table[] = { - {13, 0xffc00000u}, {23, 0xffffb000u}, {28, 0xfffffe20u}, {28, 0xfffffe30u}, - {28, 0xfffffe40u}, {28, 0xfffffe50u}, {28, 0xfffffe60u}, {28, 0xfffffe70u}, - {28, 0xfffffe80u}, {24, 0xffffea00u}, {30, 0xfffffff0u}, {28, 0xfffffe90u}, - {28, 0xfffffea0u}, {30, 0xfffffff4u}, {28, 0xfffffeb0u}, {28, 0xfffffec0u}, - {28, 0xfffffed0u}, {28, 0xfffffee0u}, {28, 0xfffffef0u}, {28, 0xffffff00u}, - {28, 0xffffff10u}, {28, 0xffffff20u}, {30, 0xfffffff8u}, {28, 0xffffff30u}, - {28, 0xffffff40u}, {28, 0xffffff50u}, {28, 0xffffff60u}, {28, 0xffffff70u}, - {28, 0xffffff80u}, {28, 0xffffff90u}, {28, 0xffffffa0u}, {28, 0xffffffb0u}, - {6, 0x50000000u}, {10, 0xfe000000u}, {10, 0xfe400000u}, {12, 0xffa00000u}, - {13, 0xffc80000u}, {6, 0x54000000u}, {8, 0xf8000000u}, {11, 0xff400000u}, - {10, 0xfe800000u}, {10, 0xfec00000u}, {8, 0xf9000000u}, {11, 0xff600000u}, - {8, 0xfa000000u}, {6, 0x58000000u}, {6, 0x5c000000u}, {6, 0x60000000u}, - {5, 0x0u}, {5, 0x8000000u}, {5, 0x10000000u}, {6, 0x64000000u}, - {6, 0x68000000u}, {6, 0x6c000000u}, {6, 0x70000000u}, {6, 0x74000000u}, - {6, 0x78000000u}, {6, 0x7c000000u}, {7, 0xb8000000u}, {8, 0xfb000000u}, - {15, 0xfff80000u}, {6, 0x80000000u}, {12, 0xffb00000u}, {10, 0xff000000u}, - {13, 0xffd00000u}, {6, 0x84000000u}, {7, 0xba000000u}, {7, 0xbc000000u}, - {7, 0xbe000000u}, {7, 0xc0000000u}, {7, 0xc2000000u}, {7, 0xc4000000u}, - {7, 0xc6000000u}, {7, 0xc8000000u}, {7, 0xca000000u}, {7, 0xcc000000u}, - {7, 0xce000000u}, {7, 0xd0000000u}, {7, 0xd2000000u}, {7, 0xd4000000u}, - {7, 0xd6000000u}, {7, 0xd8000000u}, {7, 0xda000000u}, {7, 0xdc000000u}, - {7, 0xde000000u}, {7, 0xe0000000u}, {7, 0xe2000000u}, {7, 0xe4000000u}, - {8, 0xfc000000u}, {7, 0xe6000000u}, {8, 0xfd000000u}, {13, 0xffd80000u}, - {19, 0xfffe0000u}, {13, 0xffe00000u}, {14, 0xfff00000u}, {6, 0x88000000u}, - {15, 0xfffa0000u}, {5, 0x18000000u}, {6, 0x8c000000u}, {5, 0x20000000u}, - {6, 0x90000000u}, {5, 0x28000000u}, {6, 0x94000000u}, {6, 0x98000000u}, - {6, 0x9c000000u}, {5, 0x30000000u}, {7, 0xe8000000u}, {7, 0xea000000u}, - {6, 0xa0000000u}, {6, 0xa4000000u}, {6, 0xa8000000u}, {5, 0x38000000u}, - {6, 0xac000000u}, {7, 0xec000000u}, {6, 0xb0000000u}, {5, 0x40000000u}, - {5, 0x48000000u}, {6, 0xb4000000u}, {7, 0xee000000u}, {7, 0xf0000000u}, - {7, 0xf2000000u}, {7, 0xf4000000u}, {7, 0xf6000000u}, {15, 0xfffc0000u}, - {11, 0xff800000u}, {14, 0xfff40000u}, {13, 0xffe80000u}, {28, 0xffffffc0u}, - {20, 0xfffe6000u}, {22, 0xffff4800u}, {20, 0xfffe7000u}, {20, 0xfffe8000u}, - {22, 0xffff4c00u}, {22, 0xffff5000u}, {22, 0xffff5400u}, {23, 0xffffb200u}, - {22, 0xffff5800u}, {23, 0xffffb400u}, {23, 0xffffb600u}, {23, 0xffffb800u}, - {23, 0xffffba00u}, {23, 0xffffbc00u}, {24, 0xffffeb00u}, {23, 0xffffbe00u}, - {24, 0xffffec00u}, {24, 0xffffed00u}, {22, 0xffff5c00u}, {23, 0xffffc000u}, - {24, 0xffffee00u}, {23, 0xffffc200u}, {23, 0xffffc400u}, {23, 0xffffc600u}, - {23, 0xffffc800u}, {21, 0xfffee000u}, {22, 0xffff6000u}, {23, 0xffffca00u}, - {22, 0xffff6400u}, {23, 0xffffcc00u}, {23, 0xffffce00u}, {24, 0xffffef00u}, - {22, 0xffff6800u}, {21, 0xfffee800u}, {20, 0xfffe9000u}, {22, 0xffff6c00u}, - {22, 0xffff7000u}, {23, 0xffffd000u}, {23, 0xffffd200u}, {21, 0xfffef000u}, - {23, 0xffffd400u}, {22, 0xffff7400u}, {22, 0xffff7800u}, {24, 0xfffff000u}, - {21, 0xfffef800u}, {22, 0xffff7c00u}, {23, 0xffffd600u}, {23, 0xffffd800u}, - {21, 0xffff0000u}, {21, 0xffff0800u}, {22, 0xffff8000u}, {21, 0xffff1000u}, - {23, 0xffffda00u}, {22, 0xffff8400u}, {23, 0xffffdc00u}, {23, 0xffffde00u}, - {20, 0xfffea000u}, {22, 0xffff8800u}, {22, 0xffff8c00u}, {22, 0xffff9000u}, - {23, 0xffffe000u}, {22, 0xffff9400u}, {22, 0xffff9800u}, {23, 0xffffe200u}, - {26, 0xfffff800u}, {26, 0xfffff840u}, {20, 0xfffeb000u}, {19, 0xfffe2000u}, - {22, 0xffff9c00u}, {23, 0xffffe400u}, {22, 0xffffa000u}, {25, 0xfffff600u}, - {26, 0xfffff880u}, {26, 0xfffff8c0u}, {26, 0xfffff900u}, {27, 0xfffffbc0u}, - {27, 0xfffffbe0u}, {26, 0xfffff940u}, {24, 0xfffff100u}, {25, 0xfffff680u}, - {19, 0xfffe4000u}, {21, 0xffff1800u}, {26, 0xfffff980u}, {27, 0xfffffc00u}, - {27, 0xfffffc20u}, {26, 0xfffff9c0u}, {27, 0xfffffc40u}, {24, 0xfffff200u}, - {21, 0xffff2000u}, {21, 0xffff2800u}, {26, 0xfffffa00u}, {26, 0xfffffa40u}, - {28, 0xffffffd0u}, {27, 0xfffffc60u}, {27, 0xfffffc80u}, {27, 0xfffffca0u}, - {20, 0xfffec000u}, {24, 0xfffff300u}, {20, 0xfffed000u}, {21, 0xffff3000u}, - {22, 0xffffa400u}, {21, 0xffff3800u}, {21, 0xffff4000u}, {23, 0xffffe600u}, - {22, 0xffffa800u}, {22, 0xffffac00u}, {25, 0xfffff700u}, {25, 0xfffff780u}, - {24, 0xfffff400u}, {24, 0xfffff500u}, {26, 0xfffffa80u}, {23, 0xffffe800u}, - {26, 0xfffffac0u}, {27, 0xfffffcc0u}, {26, 0xfffffb00u}, {26, 0xfffffb40u}, - {27, 0xfffffce0u}, {27, 0xfffffd00u}, {27, 0xfffffd20u}, {27, 0xfffffd40u}, - {27, 0xfffffd60u}, {28, 0xffffffe0u}, {27, 0xfffffd80u}, {27, 0xfffffda0u}, - {27, 0xfffffdc0u}, {27, 0xfffffde0u}, {27, 0xfffffe00u}, {26, 0xfffffb80u}, - {30, 0xfffffffcu}}; + {13, 0xffc00000u}, {23, 0xffffb000u}, {28, 0xfffffe20u}, {28, 0xfffffe30u}, + {28, 0xfffffe40u}, {28, 0xfffffe50u}, {28, 0xfffffe60u}, {28, 0xfffffe70u}, + {28, 0xfffffe80u}, {24, 0xffffea00u}, {30, 0xfffffff0u}, {28, 0xfffffe90u}, + {28, 0xfffffea0u}, {30, 0xfffffff4u}, {28, 0xfffffeb0u}, {28, 0xfffffec0u}, + {28, 0xfffffed0u}, {28, 0xfffffee0u}, {28, 0xfffffef0u}, {28, 0xffffff00u}, + {28, 0xffffff10u}, {28, 0xffffff20u}, {30, 0xfffffff8u}, {28, 0xffffff30u}, + {28, 0xffffff40u}, {28, 0xffffff50u}, {28, 0xffffff60u}, {28, 0xffffff70u}, + {28, 0xffffff80u}, {28, 0xffffff90u}, {28, 0xffffffa0u}, {28, 0xffffffb0u}, + {6, 0x50000000u}, {10, 0xfe000000u}, {10, 0xfe400000u}, {12, 0xffa00000u}, + {13, 0xffc80000u}, {6, 0x54000000u}, {8, 0xf8000000u}, {11, 0xff400000u}, + {10, 0xfe800000u}, {10, 0xfec00000u}, {8, 0xf9000000u}, {11, 0xff600000u}, + {8, 0xfa000000u}, {6, 0x58000000u}, {6, 0x5c000000u}, {6, 0x60000000u}, + {5, 0x0u}, {5, 0x8000000u}, {5, 0x10000000u}, {6, 0x64000000u}, + {6, 0x68000000u}, {6, 0x6c000000u}, {6, 0x70000000u}, {6, 0x74000000u}, + {6, 0x78000000u}, {6, 0x7c000000u}, {7, 0xb8000000u}, {8, 0xfb000000u}, + {15, 0xfff80000u}, {6, 0x80000000u}, {12, 0xffb00000u}, {10, 0xff000000u}, + {13, 0xffd00000u}, {6, 0x84000000u}, {7, 0xba000000u}, {7, 0xbc000000u}, + {7, 0xbe000000u}, {7, 0xc0000000u}, {7, 0xc2000000u}, {7, 0xc4000000u}, + {7, 0xc6000000u}, {7, 0xc8000000u}, {7, 0xca000000u}, {7, 0xcc000000u}, + {7, 0xce000000u}, {7, 0xd0000000u}, {7, 0xd2000000u}, {7, 0xd4000000u}, + {7, 0xd6000000u}, {7, 0xd8000000u}, {7, 0xda000000u}, {7, 0xdc000000u}, + {7, 0xde000000u}, {7, 0xe0000000u}, {7, 0xe2000000u}, {7, 0xe4000000u}, + {8, 0xfc000000u}, {7, 0xe6000000u}, {8, 0xfd000000u}, {13, 0xffd80000u}, + {19, 0xfffe0000u}, {13, 0xffe00000u}, {14, 0xfff00000u}, {6, 0x88000000u}, + {15, 0xfffa0000u}, {5, 0x18000000u}, {6, 0x8c000000u}, {5, 0x20000000u}, + {6, 0x90000000u}, {5, 0x28000000u}, {6, 0x94000000u}, {6, 0x98000000u}, + {6, 0x9c000000u}, {5, 0x30000000u}, {7, 0xe8000000u}, {7, 0xea000000u}, + {6, 0xa0000000u}, {6, 0xa4000000u}, {6, 0xa8000000u}, {5, 0x38000000u}, + {6, 0xac000000u}, {7, 0xec000000u}, {6, 0xb0000000u}, {5, 0x40000000u}, + {5, 0x48000000u}, {6, 0xb4000000u}, {7, 0xee000000u}, {7, 0xf0000000u}, + {7, 0xf2000000u}, {7, 0xf4000000u}, {7, 0xf6000000u}, {15, 0xfffc0000u}, + {11, 0xff800000u}, {14, 0xfff40000u}, {13, 0xffe80000u}, {28, 0xffffffc0u}, + {20, 0xfffe6000u}, {22, 0xffff4800u}, {20, 0xfffe7000u}, {20, 0xfffe8000u}, + {22, 0xffff4c00u}, {22, 0xffff5000u}, {22, 0xffff5400u}, {23, 0xffffb200u}, + {22, 0xffff5800u}, {23, 0xffffb400u}, {23, 0xffffb600u}, {23, 0xffffb800u}, + {23, 0xffffba00u}, {23, 0xffffbc00u}, {24, 0xffffeb00u}, {23, 0xffffbe00u}, + {24, 0xffffec00u}, {24, 0xffffed00u}, {22, 0xffff5c00u}, {23, 0xffffc000u}, + {24, 0xffffee00u}, {23, 0xffffc200u}, {23, 0xffffc400u}, {23, 0xffffc600u}, + {23, 0xffffc800u}, {21, 0xfffee000u}, {22, 0xffff6000u}, {23, 0xffffca00u}, + {22, 0xffff6400u}, {23, 0xffffcc00u}, {23, 0xffffce00u}, {24, 0xffffef00u}, + {22, 0xffff6800u}, {21, 0xfffee800u}, {20, 0xfffe9000u}, {22, 0xffff6c00u}, + {22, 0xffff7000u}, {23, 0xffffd000u}, {23, 0xffffd200u}, {21, 0xfffef000u}, + {23, 0xffffd400u}, {22, 0xffff7400u}, {22, 0xffff7800u}, {24, 0xfffff000u}, + {21, 0xfffef800u}, {22, 0xffff7c00u}, {23, 0xffffd600u}, {23, 0xffffd800u}, + {21, 0xffff0000u}, {21, 0xffff0800u}, {22, 0xffff8000u}, {21, 0xffff1000u}, + {23, 0xffffda00u}, {22, 0xffff8400u}, {23, 0xffffdc00u}, {23, 0xffffde00u}, + {20, 0xfffea000u}, {22, 0xffff8800u}, {22, 0xffff8c00u}, {22, 0xffff9000u}, + {23, 0xffffe000u}, {22, 0xffff9400u}, {22, 0xffff9800u}, {23, 0xffffe200u}, + {26, 0xfffff800u}, {26, 0xfffff840u}, {20, 0xfffeb000u}, {19, 0xfffe2000u}, + {22, 0xffff9c00u}, {23, 0xffffe400u}, {22, 0xffffa000u}, {25, 0xfffff600u}, + {26, 0xfffff880u}, {26, 0xfffff8c0u}, {26, 0xfffff900u}, {27, 0xfffffbc0u}, + {27, 0xfffffbe0u}, {26, 0xfffff940u}, {24, 0xfffff100u}, {25, 0xfffff680u}, + {19, 0xfffe4000u}, {21, 0xffff1800u}, {26, 0xfffff980u}, {27, 0xfffffc00u}, + {27, 0xfffffc20u}, {26, 0xfffff9c0u}, {27, 0xfffffc40u}, {24, 0xfffff200u}, + {21, 0xffff2000u}, {21, 0xffff2800u}, {26, 0xfffffa00u}, {26, 0xfffffa40u}, + {28, 0xffffffd0u}, {27, 0xfffffc60u}, {27, 0xfffffc80u}, {27, 0xfffffca0u}, + {20, 0xfffec000u}, {24, 0xfffff300u}, {20, 0xfffed000u}, {21, 0xffff3000u}, + {22, 0xffffa400u}, {21, 0xffff3800u}, {21, 0xffff4000u}, {23, 0xffffe600u}, + {22, 0xffffa800u}, {22, 0xffffac00u}, {25, 0xfffff700u}, {25, 0xfffff780u}, + {24, 0xfffff400u}, {24, 0xfffff500u}, {26, 0xfffffa80u}, {23, 0xffffe800u}, + {26, 0xfffffac0u}, {27, 0xfffffcc0u}, {26, 0xfffffb00u}, {26, 0xfffffb40u}, + {27, 0xfffffce0u}, {27, 0xfffffd00u}, {27, 0xfffffd20u}, {27, 0xfffffd40u}, + {27, 0xfffffd60u}, {28, 0xffffffe0u}, {27, 0xfffffd80u}, {27, 0xfffffda0u}, + {27, 0xfffffdc0u}, {27, 0xfffffde0u}, {27, 0xfffffe00u}, {26, 0xfffffb80u}, + {30, 0xfffffffcu}}; const nghttp3_qpack_huffman_decode_node qpack_huffman_decode_table[][16] = { - /* 0 */ - { - {0x04, 0}, - {0x05, 0}, - {0x07, 0}, - {0x08, 0}, - {0x0b, 0}, - {0x0c, 0}, - {0x10, 0}, - {0x13, 0}, - {0x19, 0}, - {0x1c, 0}, - {0x20, 0}, - {0x23, 0}, - {0x2a, 0}, - {0x31, 0}, - {0x39, 0}, - {0x4040, 0}, - }, - /* 1 */ - { - {0xc000, 48}, - {0xc000, 49}, - {0xc000, 50}, - {0xc000, 97}, - {0xc000, 99}, - {0xc000, 101}, - {0xc000, 105}, - {0xc000, 111}, - {0xc000, 115}, - {0xc000, 116}, - {0x0d, 0}, - {0x0e, 0}, - {0x11, 0}, - {0x12, 0}, - {0x14, 0}, - {0x15, 0}, - }, - /* 2 */ - { - {0x8001, 48}, - {0xc016, 48}, - {0x8001, 49}, - {0xc016, 49}, - {0x8001, 50}, - {0xc016, 50}, - {0x8001, 97}, - {0xc016, 97}, - {0x8001, 99}, - {0xc016, 99}, - {0x8001, 101}, - {0xc016, 101}, - {0x8001, 105}, - {0xc016, 105}, - {0x8001, 111}, - {0xc016, 111}, - }, - /* 3 */ - { - {0x8002, 48}, - {0x8009, 48}, - {0x8017, 48}, - {0xc028, 48}, - {0x8002, 49}, - {0x8009, 49}, - {0x8017, 49}, - {0xc028, 49}, - {0x8002, 50}, - {0x8009, 50}, - {0x8017, 50}, - {0xc028, 50}, - {0x8002, 97}, - {0x8009, 97}, - {0x8017, 97}, - {0xc028, 97}, - }, - /* 4 */ - { - {0x8003, 48}, - {0x8006, 48}, - {0x800a, 48}, - {0x800f, 48}, - {0x8018, 48}, - {0x801f, 48}, - {0x8029, 48}, - {0xc038, 48}, - {0x8003, 49}, - {0x8006, 49}, - {0x800a, 49}, - {0x800f, 49}, - {0x8018, 49}, - {0x801f, 49}, - {0x8029, 49}, - {0xc038, 49}, - }, - /* 5 */ - { - {0x8003, 50}, - {0x8006, 50}, - {0x800a, 50}, - {0x800f, 50}, - {0x8018, 50}, - {0x801f, 50}, - {0x8029, 50}, - {0xc038, 50}, - {0x8003, 97}, - {0x8006, 97}, - {0x800a, 97}, - {0x800f, 97}, - {0x8018, 97}, - {0x801f, 97}, - {0x8029, 97}, - {0xc038, 97}, - }, - /* 6 */ - { - {0x8002, 99}, - {0x8009, 99}, - {0x8017, 99}, - {0xc028, 99}, - {0x8002, 101}, - {0x8009, 101}, - {0x8017, 101}, - {0xc028, 101}, - {0x8002, 105}, - {0x8009, 105}, - {0x8017, 105}, - {0xc028, 105}, - {0x8002, 111}, - {0x8009, 111}, - {0x8017, 111}, - {0xc028, 111}, - }, - /* 7 */ - { - {0x8003, 99}, - {0x8006, 99}, - {0x800a, 99}, - {0x800f, 99}, - {0x8018, 99}, - {0x801f, 99}, - {0x8029, 99}, - {0xc038, 99}, - {0x8003, 101}, - {0x8006, 101}, - {0x800a, 101}, - {0x800f, 101}, - {0x8018, 101}, - {0x801f, 101}, - {0x8029, 101}, - {0xc038, 101}, - }, - /* 8 */ - { - {0x8003, 105}, - {0x8006, 105}, - {0x800a, 105}, - {0x800f, 105}, - {0x8018, 105}, - {0x801f, 105}, - {0x8029, 105}, - {0xc038, 105}, - {0x8003, 111}, - {0x8006, 111}, - {0x800a, 111}, - {0x800f, 111}, - {0x8018, 111}, - {0x801f, 111}, - {0x8029, 111}, - {0xc038, 111}, - }, - /* 9 */ - { - {0x8001, 115}, - {0xc016, 115}, - {0x8001, 116}, - {0xc016, 116}, - {0xc000, 32}, - {0xc000, 37}, - {0xc000, 45}, - {0xc000, 46}, - {0xc000, 47}, - {0xc000, 51}, - {0xc000, 52}, - {0xc000, 53}, - {0xc000, 54}, - {0xc000, 55}, - {0xc000, 56}, - {0xc000, 57}, - }, - /* 10 */ - { - {0x8002, 115}, - {0x8009, 115}, - {0x8017, 115}, - {0xc028, 115}, - {0x8002, 116}, - {0x8009, 116}, - {0x8017, 116}, - {0xc028, 116}, - {0x8001, 32}, - {0xc016, 32}, - {0x8001, 37}, - {0xc016, 37}, - {0x8001, 45}, - {0xc016, 45}, - {0x8001, 46}, - {0xc016, 46}, - }, - /* 11 */ - { - {0x8003, 115}, - {0x8006, 115}, - {0x800a, 115}, - {0x800f, 115}, - {0x8018, 115}, - {0x801f, 115}, - {0x8029, 115}, - {0xc038, 115}, - {0x8003, 116}, - {0x8006, 116}, - {0x800a, 116}, - {0x800f, 116}, - {0x8018, 116}, - {0x801f, 116}, - {0x8029, 116}, - {0xc038, 116}, - }, - /* 12 */ - { - {0x8002, 32}, - {0x8009, 32}, - {0x8017, 32}, - {0xc028, 32}, - {0x8002, 37}, - {0x8009, 37}, - {0x8017, 37}, - {0xc028, 37}, - {0x8002, 45}, - {0x8009, 45}, - {0x8017, 45}, - {0xc028, 45}, - {0x8002, 46}, - {0x8009, 46}, - {0x8017, 46}, - {0xc028, 46}, - }, - /* 13 */ - { - {0x8003, 32}, - {0x8006, 32}, - {0x800a, 32}, - {0x800f, 32}, - {0x8018, 32}, - {0x801f, 32}, - {0x8029, 32}, - {0xc038, 32}, - {0x8003, 37}, - {0x8006, 37}, - {0x800a, 37}, - {0x800f, 37}, - {0x8018, 37}, - {0x801f, 37}, - {0x8029, 37}, - {0xc038, 37}, - }, - /* 14 */ - { - {0x8003, 45}, - {0x8006, 45}, - {0x800a, 45}, - {0x800f, 45}, - {0x8018, 45}, - {0x801f, 45}, - {0x8029, 45}, - {0xc038, 45}, - {0x8003, 46}, - {0x8006, 46}, - {0x800a, 46}, - {0x800f, 46}, - {0x8018, 46}, - {0x801f, 46}, - {0x8029, 46}, - {0xc038, 46}, - }, - /* 15 */ - { - {0x8001, 47}, - {0xc016, 47}, - {0x8001, 51}, - {0xc016, 51}, - {0x8001, 52}, - {0xc016, 52}, - {0x8001, 53}, - {0xc016, 53}, - {0x8001, 54}, - {0xc016, 54}, - {0x8001, 55}, - {0xc016, 55}, - {0x8001, 56}, - {0xc016, 56}, - {0x8001, 57}, - {0xc016, 57}, - }, - /* 16 */ - { - {0x8002, 47}, - {0x8009, 47}, - {0x8017, 47}, - {0xc028, 47}, - {0x8002, 51}, - {0x8009, 51}, - {0x8017, 51}, - {0xc028, 51}, - {0x8002, 52}, - {0x8009, 52}, - {0x8017, 52}, - {0xc028, 52}, - {0x8002, 53}, - {0x8009, 53}, - {0x8017, 53}, - {0xc028, 53}, - }, - /* 17 */ - { - {0x8003, 47}, - {0x8006, 47}, - {0x800a, 47}, - {0x800f, 47}, - {0x8018, 47}, - {0x801f, 47}, - {0x8029, 47}, - {0xc038, 47}, - {0x8003, 51}, - {0x8006, 51}, - {0x800a, 51}, - {0x800f, 51}, - {0x8018, 51}, - {0x801f, 51}, - {0x8029, 51}, - {0xc038, 51}, - }, - /* 18 */ - { - {0x8003, 52}, - {0x8006, 52}, - {0x800a, 52}, - {0x800f, 52}, - {0x8018, 52}, - {0x801f, 52}, - {0x8029, 52}, - {0xc038, 52}, - {0x8003, 53}, - {0x8006, 53}, - {0x800a, 53}, - {0x800f, 53}, - {0x8018, 53}, - {0x801f, 53}, - {0x8029, 53}, - {0xc038, 53}, - }, - /* 19 */ - { - {0x8002, 54}, - {0x8009, 54}, - {0x8017, 54}, - {0xc028, 54}, - {0x8002, 55}, - {0x8009, 55}, - {0x8017, 55}, - {0xc028, 55}, - {0x8002, 56}, - {0x8009, 56}, - {0x8017, 56}, - {0xc028, 56}, - {0x8002, 57}, - {0x8009, 57}, - {0x8017, 57}, - {0xc028, 57}, - }, - /* 20 */ - { - {0x8003, 54}, - {0x8006, 54}, - {0x800a, 54}, - {0x800f, 54}, - {0x8018, 54}, - {0x801f, 54}, - {0x8029, 54}, - {0xc038, 54}, - {0x8003, 55}, - {0x8006, 55}, - {0x800a, 55}, - {0x800f, 55}, - {0x8018, 55}, - {0x801f, 55}, - {0x8029, 55}, - {0xc038, 55}, - }, - /* 21 */ - { - {0x8003, 56}, - {0x8006, 56}, - {0x800a, 56}, - {0x800f, 56}, - {0x8018, 56}, - {0x801f, 56}, - {0x8029, 56}, - {0xc038, 56}, - {0x8003, 57}, - {0x8006, 57}, - {0x800a, 57}, - {0x800f, 57}, - {0x8018, 57}, - {0x801f, 57}, - {0x8029, 57}, - {0xc038, 57}, - }, - /* 22 */ - { - {0x1a, 0}, - {0x1b, 0}, - {0x1d, 0}, - {0x1e, 0}, - {0x21, 0}, - {0x22, 0}, - {0x24, 0}, - {0x25, 0}, - {0x2b, 0}, - {0x2e, 0}, - {0x32, 0}, - {0x35, 0}, - {0x3a, 0}, - {0x3d, 0}, - {0x41, 0}, - {0x4044, 0}, - }, - /* 23 */ - { - {0xc000, 61}, - {0xc000, 65}, - {0xc000, 95}, - {0xc000, 98}, - {0xc000, 100}, - {0xc000, 102}, - {0xc000, 103}, - {0xc000, 104}, - {0xc000, 108}, - {0xc000, 109}, - {0xc000, 110}, - {0xc000, 112}, - {0xc000, 114}, - {0xc000, 117}, - {0x26, 0}, - {0x27, 0}, - }, - /* 24 */ - { - {0x8001, 61}, - {0xc016, 61}, - {0x8001, 65}, - {0xc016, 65}, - {0x8001, 95}, - {0xc016, 95}, - {0x8001, 98}, - {0xc016, 98}, - {0x8001, 100}, - {0xc016, 100}, - {0x8001, 102}, - {0xc016, 102}, - {0x8001, 103}, - {0xc016, 103}, - {0x8001, 104}, - {0xc016, 104}, - }, - /* 25 */ - { - {0x8002, 61}, - {0x8009, 61}, - {0x8017, 61}, - {0xc028, 61}, - {0x8002, 65}, - {0x8009, 65}, - {0x8017, 65}, - {0xc028, 65}, - {0x8002, 95}, - {0x8009, 95}, - {0x8017, 95}, - {0xc028, 95}, - {0x8002, 98}, - {0x8009, 98}, - {0x8017, 98}, - {0xc028, 98}, - }, - /* 26 */ - { - {0x8003, 61}, - {0x8006, 61}, - {0x800a, 61}, - {0x800f, 61}, - {0x8018, 61}, - {0x801f, 61}, - {0x8029, 61}, - {0xc038, 61}, - {0x8003, 65}, - {0x8006, 65}, - {0x800a, 65}, - {0x800f, 65}, - {0x8018, 65}, - {0x801f, 65}, - {0x8029, 65}, - {0xc038, 65}, - }, - /* 27 */ - { - {0x8003, 95}, - {0x8006, 95}, - {0x800a, 95}, - {0x800f, 95}, - {0x8018, 95}, - {0x801f, 95}, - {0x8029, 95}, - {0xc038, 95}, - {0x8003, 98}, - {0x8006, 98}, - {0x800a, 98}, - {0x800f, 98}, - {0x8018, 98}, - {0x801f, 98}, - {0x8029, 98}, - {0xc038, 98}, - }, - /* 28 */ - { - {0x8002, 100}, - {0x8009, 100}, - {0x8017, 100}, - {0xc028, 100}, - {0x8002, 102}, - {0x8009, 102}, - {0x8017, 102}, - {0xc028, 102}, - {0x8002, 103}, - {0x8009, 103}, - {0x8017, 103}, - {0xc028, 103}, - {0x8002, 104}, - {0x8009, 104}, - {0x8017, 104}, - {0xc028, 104}, - }, - /* 29 */ - { - {0x8003, 100}, - {0x8006, 100}, - {0x800a, 100}, - {0x800f, 100}, - {0x8018, 100}, - {0x801f, 100}, - {0x8029, 100}, - {0xc038, 100}, - {0x8003, 102}, - {0x8006, 102}, - {0x800a, 102}, - {0x800f, 102}, - {0x8018, 102}, - {0x801f, 102}, - {0x8029, 102}, - {0xc038, 102}, - }, - /* 30 */ - { - {0x8003, 103}, - {0x8006, 103}, - {0x800a, 103}, - {0x800f, 103}, - {0x8018, 103}, - {0x801f, 103}, - {0x8029, 103}, - {0xc038, 103}, - {0x8003, 104}, - {0x8006, 104}, - {0x800a, 104}, - {0x800f, 104}, - {0x8018, 104}, - {0x801f, 104}, - {0x8029, 104}, - {0xc038, 104}, - }, - /* 31 */ - { - {0x8001, 108}, - {0xc016, 108}, - {0x8001, 109}, - {0xc016, 109}, - {0x8001, 110}, - {0xc016, 110}, - {0x8001, 112}, - {0xc016, 112}, - {0x8001, 114}, - {0xc016, 114}, - {0x8001, 117}, - {0xc016, 117}, - {0xc000, 58}, - {0xc000, 66}, - {0xc000, 67}, - {0xc000, 68}, - }, - /* 32 */ - { - {0x8002, 108}, - {0x8009, 108}, - {0x8017, 108}, - {0xc028, 108}, - {0x8002, 109}, - {0x8009, 109}, - {0x8017, 109}, - {0xc028, 109}, - {0x8002, 110}, - {0x8009, 110}, - {0x8017, 110}, - {0xc028, 110}, - {0x8002, 112}, - {0x8009, 112}, - {0x8017, 112}, - {0xc028, 112}, - }, - /* 33 */ - { - {0x8003, 108}, - {0x8006, 108}, - {0x800a, 108}, - {0x800f, 108}, - {0x8018, 108}, - {0x801f, 108}, - {0x8029, 108}, - {0xc038, 108}, - {0x8003, 109}, - {0x8006, 109}, - {0x800a, 109}, - {0x800f, 109}, - {0x8018, 109}, - {0x801f, 109}, - {0x8029, 109}, - {0xc038, 109}, - }, - /* 34 */ - { - {0x8003, 110}, - {0x8006, 110}, - {0x800a, 110}, - {0x800f, 110}, - {0x8018, 110}, - {0x801f, 110}, - {0x8029, 110}, - {0xc038, 110}, - {0x8003, 112}, - {0x8006, 112}, - {0x800a, 112}, - {0x800f, 112}, - {0x8018, 112}, - {0x801f, 112}, - {0x8029, 112}, - {0xc038, 112}, - }, - /* 35 */ - { - {0x8002, 114}, - {0x8009, 114}, - {0x8017, 114}, - {0xc028, 114}, - {0x8002, 117}, - {0x8009, 117}, - {0x8017, 117}, - {0xc028, 117}, - {0x8001, 58}, - {0xc016, 58}, - {0x8001, 66}, - {0xc016, 66}, - {0x8001, 67}, - {0xc016, 67}, - {0x8001, 68}, - {0xc016, 68}, - }, - /* 36 */ - { - {0x8003, 114}, - {0x8006, 114}, - {0x800a, 114}, - {0x800f, 114}, - {0x8018, 114}, - {0x801f, 114}, - {0x8029, 114}, - {0xc038, 114}, - {0x8003, 117}, - {0x8006, 117}, - {0x800a, 117}, - {0x800f, 117}, - {0x8018, 117}, - {0x801f, 117}, - {0x8029, 117}, - {0xc038, 117}, - }, - /* 37 */ - { - {0x8002, 58}, - {0x8009, 58}, - {0x8017, 58}, - {0xc028, 58}, - {0x8002, 66}, - {0x8009, 66}, - {0x8017, 66}, - {0xc028, 66}, - {0x8002, 67}, - {0x8009, 67}, - {0x8017, 67}, - {0xc028, 67}, - {0x8002, 68}, - {0x8009, 68}, - {0x8017, 68}, - {0xc028, 68}, - }, - /* 38 */ - { - {0x8003, 58}, - {0x8006, 58}, - {0x800a, 58}, - {0x800f, 58}, - {0x8018, 58}, - {0x801f, 58}, - {0x8029, 58}, - {0xc038, 58}, - {0x8003, 66}, - {0x8006, 66}, - {0x800a, 66}, - {0x800f, 66}, - {0x8018, 66}, - {0x801f, 66}, - {0x8029, 66}, - {0xc038, 66}, - }, - /* 39 */ - { - {0x8003, 67}, - {0x8006, 67}, - {0x800a, 67}, - {0x800f, 67}, - {0x8018, 67}, - {0x801f, 67}, - {0x8029, 67}, - {0xc038, 67}, - {0x8003, 68}, - {0x8006, 68}, - {0x800a, 68}, - {0x800f, 68}, - {0x8018, 68}, - {0x801f, 68}, - {0x8029, 68}, - {0xc038, 68}, - }, - /* 40 */ - { - {0x2c, 0}, - {0x2d, 0}, - {0x2f, 0}, - {0x30, 0}, - {0x33, 0}, - {0x34, 0}, - {0x36, 0}, - {0x37, 0}, - {0x3b, 0}, - {0x3c, 0}, - {0x3e, 0}, - {0x3f, 0}, - {0x42, 0}, - {0x43, 0}, - {0x45, 0}, - {0x4048, 0}, - }, - /* 41 */ - { - {0xc000, 69}, - {0xc000, 70}, - {0xc000, 71}, - {0xc000, 72}, - {0xc000, 73}, - {0xc000, 74}, - {0xc000, 75}, - {0xc000, 76}, - {0xc000, 77}, - {0xc000, 78}, - {0xc000, 79}, - {0xc000, 80}, - {0xc000, 81}, - {0xc000, 82}, - {0xc000, 83}, - {0xc000, 84}, - }, - /* 42 */ - { - {0x8001, 69}, - {0xc016, 69}, - {0x8001, 70}, - {0xc016, 70}, - {0x8001, 71}, - {0xc016, 71}, - {0x8001, 72}, - {0xc016, 72}, - {0x8001, 73}, - {0xc016, 73}, - {0x8001, 74}, - {0xc016, 74}, - {0x8001, 75}, - {0xc016, 75}, - {0x8001, 76}, - {0xc016, 76}, - }, - /* 43 */ - { - {0x8002, 69}, - {0x8009, 69}, - {0x8017, 69}, - {0xc028, 69}, - {0x8002, 70}, - {0x8009, 70}, - {0x8017, 70}, - {0xc028, 70}, - {0x8002, 71}, - {0x8009, 71}, - {0x8017, 71}, - {0xc028, 71}, - {0x8002, 72}, - {0x8009, 72}, - {0x8017, 72}, - {0xc028, 72}, - }, - /* 44 */ - { - {0x8003, 69}, - {0x8006, 69}, - {0x800a, 69}, - {0x800f, 69}, - {0x8018, 69}, - {0x801f, 69}, - {0x8029, 69}, - {0xc038, 69}, - {0x8003, 70}, - {0x8006, 70}, - {0x800a, 70}, - {0x800f, 70}, - {0x8018, 70}, - {0x801f, 70}, - {0x8029, 70}, - {0xc038, 70}, - }, - /* 45 */ - { - {0x8003, 71}, - {0x8006, 71}, - {0x800a, 71}, - {0x800f, 71}, - {0x8018, 71}, - {0x801f, 71}, - {0x8029, 71}, - {0xc038, 71}, - {0x8003, 72}, - {0x8006, 72}, - {0x800a, 72}, - {0x800f, 72}, - {0x8018, 72}, - {0x801f, 72}, - {0x8029, 72}, - {0xc038, 72}, - }, - /* 46 */ - { - {0x8002, 73}, - {0x8009, 73}, - {0x8017, 73}, - {0xc028, 73}, - {0x8002, 74}, - {0x8009, 74}, - {0x8017, 74}, - {0xc028, 74}, - {0x8002, 75}, - {0x8009, 75}, - {0x8017, 75}, - {0xc028, 75}, - {0x8002, 76}, - {0x8009, 76}, - {0x8017, 76}, - {0xc028, 76}, - }, - /* 47 */ - { - {0x8003, 73}, - {0x8006, 73}, - {0x800a, 73}, - {0x800f, 73}, - {0x8018, 73}, - {0x801f, 73}, - {0x8029, 73}, - {0xc038, 73}, - {0x8003, 74}, - {0x8006, 74}, - {0x800a, 74}, - {0x800f, 74}, - {0x8018, 74}, - {0x801f, 74}, - {0x8029, 74}, - {0xc038, 74}, - }, - /* 48 */ - { - {0x8003, 75}, - {0x8006, 75}, - {0x800a, 75}, - {0x800f, 75}, - {0x8018, 75}, - {0x801f, 75}, - {0x8029, 75}, - {0xc038, 75}, - {0x8003, 76}, - {0x8006, 76}, - {0x800a, 76}, - {0x800f, 76}, - {0x8018, 76}, - {0x801f, 76}, - {0x8029, 76}, - {0xc038, 76}, - }, - /* 49 */ - { - {0x8001, 77}, - {0xc016, 77}, - {0x8001, 78}, - {0xc016, 78}, - {0x8001, 79}, - {0xc016, 79}, - {0x8001, 80}, - {0xc016, 80}, - {0x8001, 81}, - {0xc016, 81}, - {0x8001, 82}, - {0xc016, 82}, - {0x8001, 83}, - {0xc016, 83}, - {0x8001, 84}, - {0xc016, 84}, - }, - /* 50 */ - { - {0x8002, 77}, - {0x8009, 77}, - {0x8017, 77}, - {0xc028, 77}, - {0x8002, 78}, - {0x8009, 78}, - {0x8017, 78}, - {0xc028, 78}, - {0x8002, 79}, - {0x8009, 79}, - {0x8017, 79}, - {0xc028, 79}, - {0x8002, 80}, - {0x8009, 80}, - {0x8017, 80}, - {0xc028, 80}, - }, - /* 51 */ - { - {0x8003, 77}, - {0x8006, 77}, - {0x800a, 77}, - {0x800f, 77}, - {0x8018, 77}, - {0x801f, 77}, - {0x8029, 77}, - {0xc038, 77}, - {0x8003, 78}, - {0x8006, 78}, - {0x800a, 78}, - {0x800f, 78}, - {0x8018, 78}, - {0x801f, 78}, - {0x8029, 78}, - {0xc038, 78}, - }, - /* 52 */ - { - {0x8003, 79}, - {0x8006, 79}, - {0x800a, 79}, - {0x800f, 79}, - {0x8018, 79}, - {0x801f, 79}, - {0x8029, 79}, - {0xc038, 79}, - {0x8003, 80}, - {0x8006, 80}, - {0x800a, 80}, - {0x800f, 80}, - {0x8018, 80}, - {0x801f, 80}, - {0x8029, 80}, - {0xc038, 80}, - }, - /* 53 */ - { - {0x8002, 81}, - {0x8009, 81}, - {0x8017, 81}, - {0xc028, 81}, - {0x8002, 82}, - {0x8009, 82}, - {0x8017, 82}, - {0xc028, 82}, - {0x8002, 83}, - {0x8009, 83}, - {0x8017, 83}, - {0xc028, 83}, - {0x8002, 84}, - {0x8009, 84}, - {0x8017, 84}, - {0xc028, 84}, - }, - /* 54 */ - { - {0x8003, 81}, - {0x8006, 81}, - {0x800a, 81}, - {0x800f, 81}, - {0x8018, 81}, - {0x801f, 81}, - {0x8029, 81}, - {0xc038, 81}, - {0x8003, 82}, - {0x8006, 82}, - {0x800a, 82}, - {0x800f, 82}, - {0x8018, 82}, - {0x801f, 82}, - {0x8029, 82}, - {0xc038, 82}, - }, - /* 55 */ - { - {0x8003, 83}, - {0x8006, 83}, - {0x800a, 83}, - {0x800f, 83}, - {0x8018, 83}, - {0x801f, 83}, - {0x8029, 83}, - {0xc038, 83}, - {0x8003, 84}, - {0x8006, 84}, - {0x800a, 84}, - {0x800f, 84}, - {0x8018, 84}, - {0x801f, 84}, - {0x8029, 84}, - {0xc038, 84}, - }, - /* 56 */ - { - {0xc000, 85}, - {0xc000, 86}, - {0xc000, 87}, - {0xc000, 89}, - {0xc000, 106}, - {0xc000, 107}, - {0xc000, 113}, - {0xc000, 118}, - {0xc000, 119}, - {0xc000, 120}, - {0xc000, 121}, - {0xc000, 122}, - {0x46, 0}, - {0x47, 0}, - {0x49, 0}, - {0x404a, 0}, - }, - /* 57 */ - { - {0x8001, 85}, - {0xc016, 85}, - {0x8001, 86}, - {0xc016, 86}, - {0x8001, 87}, - {0xc016, 87}, - {0x8001, 89}, - {0xc016, 89}, - {0x8001, 106}, - {0xc016, 106}, - {0x8001, 107}, - {0xc016, 107}, - {0x8001, 113}, - {0xc016, 113}, - {0x8001, 118}, - {0xc016, 118}, - }, - /* 58 */ - { - {0x8002, 85}, - {0x8009, 85}, - {0x8017, 85}, - {0xc028, 85}, - {0x8002, 86}, - {0x8009, 86}, - {0x8017, 86}, - {0xc028, 86}, - {0x8002, 87}, - {0x8009, 87}, - {0x8017, 87}, - {0xc028, 87}, - {0x8002, 89}, - {0x8009, 89}, - {0x8017, 89}, - {0xc028, 89}, - }, - /* 59 */ - { - {0x8003, 85}, - {0x8006, 85}, - {0x800a, 85}, - {0x800f, 85}, - {0x8018, 85}, - {0x801f, 85}, - {0x8029, 85}, - {0xc038, 85}, - {0x8003, 86}, - {0x8006, 86}, - {0x800a, 86}, - {0x800f, 86}, - {0x8018, 86}, - {0x801f, 86}, - {0x8029, 86}, - {0xc038, 86}, - }, - /* 60 */ - { - {0x8003, 87}, - {0x8006, 87}, - {0x800a, 87}, - {0x800f, 87}, - {0x8018, 87}, - {0x801f, 87}, - {0x8029, 87}, - {0xc038, 87}, - {0x8003, 89}, - {0x8006, 89}, - {0x800a, 89}, - {0x800f, 89}, - {0x8018, 89}, - {0x801f, 89}, - {0x8029, 89}, - {0xc038, 89}, - }, - /* 61 */ - { - {0x8002, 106}, - {0x8009, 106}, - {0x8017, 106}, - {0xc028, 106}, - {0x8002, 107}, - {0x8009, 107}, - {0x8017, 107}, - {0xc028, 107}, - {0x8002, 113}, - {0x8009, 113}, - {0x8017, 113}, - {0xc028, 113}, - {0x8002, 118}, - {0x8009, 118}, - {0x8017, 118}, - {0xc028, 118}, - }, - /* 62 */ - { - {0x8003, 106}, - {0x8006, 106}, - {0x800a, 106}, - {0x800f, 106}, - {0x8018, 106}, - {0x801f, 106}, - {0x8029, 106}, - {0xc038, 106}, - {0x8003, 107}, - {0x8006, 107}, - {0x800a, 107}, - {0x800f, 107}, - {0x8018, 107}, - {0x801f, 107}, - {0x8029, 107}, - {0xc038, 107}, - }, - /* 63 */ - { - {0x8003, 113}, - {0x8006, 113}, - {0x800a, 113}, - {0x800f, 113}, - {0x8018, 113}, - {0x801f, 113}, - {0x8029, 113}, - {0xc038, 113}, - {0x8003, 118}, - {0x8006, 118}, - {0x800a, 118}, - {0x800f, 118}, - {0x8018, 118}, - {0x801f, 118}, - {0x8029, 118}, - {0xc038, 118}, - }, - /* 64 */ - { - {0x8001, 119}, - {0xc016, 119}, - {0x8001, 120}, - {0xc016, 120}, - {0x8001, 121}, - {0xc016, 121}, - {0x8001, 122}, - {0xc016, 122}, - {0xc000, 38}, - {0xc000, 42}, - {0xc000, 44}, - {0xc000, 59}, - {0xc000, 88}, - {0xc000, 90}, - {0x4b, 0}, - {0x4e, 0}, - }, - /* 65 */ - { - {0x8002, 119}, - {0x8009, 119}, - {0x8017, 119}, - {0xc028, 119}, - {0x8002, 120}, - {0x8009, 120}, - {0x8017, 120}, - {0xc028, 120}, - {0x8002, 121}, - {0x8009, 121}, - {0x8017, 121}, - {0xc028, 121}, - {0x8002, 122}, - {0x8009, 122}, - {0x8017, 122}, - {0xc028, 122}, - }, - /* 66 */ - { - {0x8003, 119}, - {0x8006, 119}, - {0x800a, 119}, - {0x800f, 119}, - {0x8018, 119}, - {0x801f, 119}, - {0x8029, 119}, - {0xc038, 119}, - {0x8003, 120}, - {0x8006, 120}, - {0x800a, 120}, - {0x800f, 120}, - {0x8018, 120}, - {0x801f, 120}, - {0x8029, 120}, - {0xc038, 120}, - }, - /* 67 */ - { - {0x8003, 121}, - {0x8006, 121}, - {0x800a, 121}, - {0x800f, 121}, - {0x8018, 121}, - {0x801f, 121}, - {0x8029, 121}, - {0xc038, 121}, - {0x8003, 122}, - {0x8006, 122}, - {0x800a, 122}, - {0x800f, 122}, - {0x8018, 122}, - {0x801f, 122}, - {0x8029, 122}, - {0xc038, 122}, - }, - /* 68 */ - { - {0x8001, 38}, - {0xc016, 38}, - {0x8001, 42}, - {0xc016, 42}, - {0x8001, 44}, - {0xc016, 44}, - {0x8001, 59}, - {0xc016, 59}, - {0x8001, 88}, - {0xc016, 88}, - {0x8001, 90}, - {0xc016, 90}, - {0x4c, 0}, - {0x4d, 0}, - {0x4f, 0}, - {0x51, 0}, - }, - /* 69 */ - { - {0x8002, 38}, - {0x8009, 38}, - {0x8017, 38}, - {0xc028, 38}, - {0x8002, 42}, - {0x8009, 42}, - {0x8017, 42}, - {0xc028, 42}, - {0x8002, 44}, - {0x8009, 44}, - {0x8017, 44}, - {0xc028, 44}, - {0x8002, 59}, - {0x8009, 59}, - {0x8017, 59}, - {0xc028, 59}, - }, - /* 70 */ - { - {0x8003, 38}, - {0x8006, 38}, - {0x800a, 38}, - {0x800f, 38}, - {0x8018, 38}, - {0x801f, 38}, - {0x8029, 38}, - {0xc038, 38}, - {0x8003, 42}, - {0x8006, 42}, - {0x800a, 42}, - {0x800f, 42}, - {0x8018, 42}, - {0x801f, 42}, - {0x8029, 42}, - {0xc038, 42}, - }, - /* 71 */ - { - {0x8003, 44}, - {0x8006, 44}, - {0x800a, 44}, - {0x800f, 44}, - {0x8018, 44}, - {0x801f, 44}, - {0x8029, 44}, - {0xc038, 44}, - {0x8003, 59}, - {0x8006, 59}, - {0x800a, 59}, - {0x800f, 59}, - {0x8018, 59}, - {0x801f, 59}, - {0x8029, 59}, - {0xc038, 59}, - }, - /* 72 */ - { - {0x8002, 88}, - {0x8009, 88}, - {0x8017, 88}, - {0xc028, 88}, - {0x8002, 90}, - {0x8009, 90}, - {0x8017, 90}, - {0xc028, 90}, - {0xc000, 33}, - {0xc000, 34}, - {0xc000, 40}, - {0xc000, 41}, - {0xc000, 63}, - {0x50, 0}, - {0x52, 0}, - {0x54, 0}, - }, - /* 73 */ - { - {0x8003, 88}, - {0x8006, 88}, - {0x800a, 88}, - {0x800f, 88}, - {0x8018, 88}, - {0x801f, 88}, - {0x8029, 88}, - {0xc038, 88}, - {0x8003, 90}, - {0x8006, 90}, - {0x800a, 90}, - {0x800f, 90}, - {0x8018, 90}, - {0x801f, 90}, - {0x8029, 90}, - {0xc038, 90}, - }, - /* 74 */ - { - {0x8001, 33}, - {0xc016, 33}, - {0x8001, 34}, - {0xc016, 34}, - {0x8001, 40}, - {0xc016, 40}, - {0x8001, 41}, - {0xc016, 41}, - {0x8001, 63}, - {0xc016, 63}, - {0xc000, 39}, - {0xc000, 43}, - {0xc000, 124}, - {0x53, 0}, - {0x55, 0}, - {0x58, 0}, - }, - /* 75 */ - { - {0x8002, 33}, - {0x8009, 33}, - {0x8017, 33}, - {0xc028, 33}, - {0x8002, 34}, - {0x8009, 34}, - {0x8017, 34}, - {0xc028, 34}, - {0x8002, 40}, - {0x8009, 40}, - {0x8017, 40}, - {0xc028, 40}, - {0x8002, 41}, - {0x8009, 41}, - {0x8017, 41}, - {0xc028, 41}, - }, - /* 76 */ - { - {0x8003, 33}, - {0x8006, 33}, - {0x800a, 33}, - {0x800f, 33}, - {0x8018, 33}, - {0x801f, 33}, - {0x8029, 33}, - {0xc038, 33}, - {0x8003, 34}, - {0x8006, 34}, - {0x800a, 34}, - {0x800f, 34}, - {0x8018, 34}, - {0x801f, 34}, - {0x8029, 34}, - {0xc038, 34}, - }, - /* 77 */ - { - {0x8003, 40}, - {0x8006, 40}, - {0x800a, 40}, - {0x800f, 40}, - {0x8018, 40}, - {0x801f, 40}, - {0x8029, 40}, - {0xc038, 40}, - {0x8003, 41}, - {0x8006, 41}, - {0x800a, 41}, - {0x800f, 41}, - {0x8018, 41}, - {0x801f, 41}, - {0x8029, 41}, - {0xc038, 41}, - }, - /* 78 */ - { - {0x8002, 63}, - {0x8009, 63}, - {0x8017, 63}, - {0xc028, 63}, - {0x8001, 39}, - {0xc016, 39}, - {0x8001, 43}, - {0xc016, 43}, - {0x8001, 124}, - {0xc016, 124}, - {0xc000, 35}, - {0xc000, 62}, - {0x56, 0}, - {0x57, 0}, - {0x59, 0}, - {0x5a, 0}, - }, - /* 79 */ - { - {0x8003, 63}, - {0x8006, 63}, - {0x800a, 63}, - {0x800f, 63}, - {0x8018, 63}, - {0x801f, 63}, - {0x8029, 63}, - {0xc038, 63}, - {0x8002, 39}, - {0x8009, 39}, - {0x8017, 39}, - {0xc028, 39}, - {0x8002, 43}, - {0x8009, 43}, - {0x8017, 43}, - {0xc028, 43}, - }, - /* 80 */ - { - {0x8003, 39}, - {0x8006, 39}, - {0x800a, 39}, - {0x800f, 39}, - {0x8018, 39}, - {0x801f, 39}, - {0x8029, 39}, - {0xc038, 39}, - {0x8003, 43}, - {0x8006, 43}, - {0x800a, 43}, - {0x800f, 43}, - {0x8018, 43}, - {0x801f, 43}, - {0x8029, 43}, - {0xc038, 43}, - }, - /* 81 */ - { - {0x8002, 124}, - {0x8009, 124}, - {0x8017, 124}, - {0xc028, 124}, - {0x8001, 35}, - {0xc016, 35}, - {0x8001, 62}, - {0xc016, 62}, - {0xc000, 0}, - {0xc000, 36}, - {0xc000, 64}, - {0xc000, 91}, - {0xc000, 93}, - {0xc000, 126}, - {0x5b, 0}, - {0x5c, 0}, - }, - /* 82 */ - { - {0x8003, 124}, - {0x8006, 124}, - {0x800a, 124}, - {0x800f, 124}, - {0x8018, 124}, - {0x801f, 124}, - {0x8029, 124}, - {0xc038, 124}, - {0x8002, 35}, - {0x8009, 35}, - {0x8017, 35}, - {0xc028, 35}, - {0x8002, 62}, - {0x8009, 62}, - {0x8017, 62}, - {0xc028, 62}, - }, - /* 83 */ - { - {0x8003, 35}, - {0x8006, 35}, - {0x800a, 35}, - {0x800f, 35}, - {0x8018, 35}, - {0x801f, 35}, - {0x8029, 35}, - {0xc038, 35}, - {0x8003, 62}, - {0x8006, 62}, - {0x800a, 62}, - {0x800f, 62}, - {0x8018, 62}, - {0x801f, 62}, - {0x8029, 62}, - {0xc038, 62}, - }, - /* 84 */ - { - {0x8001, 0}, - {0xc016, 0}, - {0x8001, 36}, - {0xc016, 36}, - {0x8001, 64}, - {0xc016, 64}, - {0x8001, 91}, - {0xc016, 91}, - {0x8001, 93}, - {0xc016, 93}, - {0x8001, 126}, - {0xc016, 126}, - {0xc000, 94}, - {0xc000, 125}, - {0x5d, 0}, - {0x5e, 0}, - }, - /* 85 */ - { - {0x8002, 0}, - {0x8009, 0}, - {0x8017, 0}, - {0xc028, 0}, - {0x8002, 36}, - {0x8009, 36}, - {0x8017, 36}, - {0xc028, 36}, - {0x8002, 64}, - {0x8009, 64}, - {0x8017, 64}, - {0xc028, 64}, - {0x8002, 91}, - {0x8009, 91}, - {0x8017, 91}, - {0xc028, 91}, - }, - /* 86 */ - { - {0x8003, 0}, - {0x8006, 0}, - {0x800a, 0}, - {0x800f, 0}, - {0x8018, 0}, - {0x801f, 0}, - {0x8029, 0}, - {0xc038, 0}, - {0x8003, 36}, - {0x8006, 36}, - {0x800a, 36}, - {0x800f, 36}, - {0x8018, 36}, - {0x801f, 36}, - {0x8029, 36}, - {0xc038, 36}, - }, - /* 87 */ - { - {0x8003, 64}, - {0x8006, 64}, - {0x800a, 64}, - {0x800f, 64}, - {0x8018, 64}, - {0x801f, 64}, - {0x8029, 64}, - {0xc038, 64}, - {0x8003, 91}, - {0x8006, 91}, - {0x800a, 91}, - {0x800f, 91}, - {0x8018, 91}, - {0x801f, 91}, - {0x8029, 91}, - {0xc038, 91}, - }, - /* 88 */ - { - {0x8002, 93}, - {0x8009, 93}, - {0x8017, 93}, - {0xc028, 93}, - {0x8002, 126}, - {0x8009, 126}, - {0x8017, 126}, - {0xc028, 126}, - {0x8001, 94}, - {0xc016, 94}, - {0x8001, 125}, - {0xc016, 125}, - {0xc000, 60}, - {0xc000, 96}, - {0xc000, 123}, - {0x5f, 0}, - }, - /* 89 */ - { - {0x8003, 93}, - {0x8006, 93}, - {0x800a, 93}, - {0x800f, 93}, - {0x8018, 93}, - {0x801f, 93}, - {0x8029, 93}, - {0xc038, 93}, - {0x8003, 126}, - {0x8006, 126}, - {0x800a, 126}, - {0x800f, 126}, - {0x8018, 126}, - {0x801f, 126}, - {0x8029, 126}, - {0xc038, 126}, - }, - /* 90 */ - { - {0x8002, 94}, - {0x8009, 94}, - {0x8017, 94}, - {0xc028, 94}, - {0x8002, 125}, - {0x8009, 125}, - {0x8017, 125}, - {0xc028, 125}, - {0x8001, 60}, - {0xc016, 60}, - {0x8001, 96}, - {0xc016, 96}, - {0x8001, 123}, - {0xc016, 123}, - {0x60, 0}, - {0x6e, 0}, - }, - /* 91 */ - { - {0x8003, 94}, - {0x8006, 94}, - {0x800a, 94}, - {0x800f, 94}, - {0x8018, 94}, - {0x801f, 94}, - {0x8029, 94}, - {0xc038, 94}, - {0x8003, 125}, - {0x8006, 125}, - {0x800a, 125}, - {0x800f, 125}, - {0x8018, 125}, - {0x801f, 125}, - {0x8029, 125}, - {0xc038, 125}, - }, - /* 92 */ - { - {0x8002, 60}, - {0x8009, 60}, - {0x8017, 60}, - {0xc028, 60}, - {0x8002, 96}, - {0x8009, 96}, - {0x8017, 96}, - {0xc028, 96}, - {0x8002, 123}, - {0x8009, 123}, - {0x8017, 123}, - {0xc028, 123}, - {0x61, 0}, - {0x65, 0}, - {0x6f, 0}, - {0x85, 0}, - }, - /* 93 */ - { - {0x8003, 60}, - {0x8006, 60}, - {0x800a, 60}, - {0x800f, 60}, - {0x8018, 60}, - {0x801f, 60}, - {0x8029, 60}, - {0xc038, 60}, - {0x8003, 96}, - {0x8006, 96}, - {0x800a, 96}, - {0x800f, 96}, - {0x8018, 96}, - {0x801f, 96}, - {0x8029, 96}, - {0xc038, 96}, - }, - /* 94 */ - { - {0x8003, 123}, - {0x8006, 123}, - {0x800a, 123}, - {0x800f, 123}, - {0x8018, 123}, - {0x801f, 123}, - {0x8029, 123}, - {0xc038, 123}, - {0x62, 0}, - {0x63, 0}, - {0x66, 0}, - {0x69, 0}, - {0x70, 0}, - {0x77, 0}, - {0x86, 0}, - {0x99, 0}, - }, - /* 95 */ - { - {0xc000, 92}, - {0xc000, 195}, - {0xc000, 208}, - {0x64, 0}, - {0x67, 0}, - {0x68, 0}, - {0x6a, 0}, - {0x6b, 0}, - {0x71, 0}, - {0x74, 0}, - {0x78, 0}, - {0x7e, 0}, - {0x87, 0}, - {0x8e, 0}, - {0x9a, 0}, - {0xa9, 0}, - }, - /* 96 */ - { - {0x8001, 92}, - {0xc016, 92}, - {0x8001, 195}, - {0xc016, 195}, - {0x8001, 208}, - {0xc016, 208}, - {0xc000, 128}, - {0xc000, 130}, - {0xc000, 131}, - {0xc000, 162}, - {0xc000, 184}, - {0xc000, 194}, - {0xc000, 224}, - {0xc000, 226}, - {0x6c, 0}, - {0x6d, 0}, - }, - /* 97 */ - { - {0x8002, 92}, - {0x8009, 92}, - {0x8017, 92}, - {0xc028, 92}, - {0x8002, 195}, - {0x8009, 195}, - {0x8017, 195}, - {0xc028, 195}, - {0x8002, 208}, - {0x8009, 208}, - {0x8017, 208}, - {0xc028, 208}, - {0x8001, 128}, - {0xc016, 128}, - {0x8001, 130}, - {0xc016, 130}, - }, - /* 98 */ - { - {0x8003, 92}, - {0x8006, 92}, - {0x800a, 92}, - {0x800f, 92}, - {0x8018, 92}, - {0x801f, 92}, - {0x8029, 92}, - {0xc038, 92}, - {0x8003, 195}, - {0x8006, 195}, - {0x800a, 195}, - {0x800f, 195}, - {0x8018, 195}, - {0x801f, 195}, - {0x8029, 195}, - {0xc038, 195}, - }, - /* 99 */ - { - {0x8003, 208}, - {0x8006, 208}, - {0x800a, 208}, - {0x800f, 208}, - {0x8018, 208}, - {0x801f, 208}, - {0x8029, 208}, - {0xc038, 208}, - {0x8002, 128}, - {0x8009, 128}, - {0x8017, 128}, - {0xc028, 128}, - {0x8002, 130}, - {0x8009, 130}, - {0x8017, 130}, - {0xc028, 130}, - }, - /* 100 */ - { - {0x8003, 128}, - {0x8006, 128}, - {0x800a, 128}, - {0x800f, 128}, - {0x8018, 128}, - {0x801f, 128}, - {0x8029, 128}, - {0xc038, 128}, - {0x8003, 130}, - {0x8006, 130}, - {0x800a, 130}, - {0x800f, 130}, - {0x8018, 130}, - {0x801f, 130}, - {0x8029, 130}, - {0xc038, 130}, - }, - /* 101 */ - { - {0x8001, 131}, - {0xc016, 131}, - {0x8001, 162}, - {0xc016, 162}, - {0x8001, 184}, - {0xc016, 184}, - {0x8001, 194}, - {0xc016, 194}, - {0x8001, 224}, - {0xc016, 224}, - {0x8001, 226}, - {0xc016, 226}, - {0xc000, 153}, - {0xc000, 161}, - {0xc000, 167}, - {0xc000, 172}, - }, - /* 102 */ - { - {0x8002, 131}, - {0x8009, 131}, - {0x8017, 131}, - {0xc028, 131}, - {0x8002, 162}, - {0x8009, 162}, - {0x8017, 162}, - {0xc028, 162}, - {0x8002, 184}, - {0x8009, 184}, - {0x8017, 184}, - {0xc028, 184}, - {0x8002, 194}, - {0x8009, 194}, - {0x8017, 194}, - {0xc028, 194}, - }, - /* 103 */ - { - {0x8003, 131}, - {0x8006, 131}, - {0x800a, 131}, - {0x800f, 131}, - {0x8018, 131}, - {0x801f, 131}, - {0x8029, 131}, - {0xc038, 131}, - {0x8003, 162}, - {0x8006, 162}, - {0x800a, 162}, - {0x800f, 162}, - {0x8018, 162}, - {0x801f, 162}, - {0x8029, 162}, - {0xc038, 162}, - }, - /* 104 */ - { - {0x8003, 184}, - {0x8006, 184}, - {0x800a, 184}, - {0x800f, 184}, - {0x8018, 184}, - {0x801f, 184}, - {0x8029, 184}, - {0xc038, 184}, - {0x8003, 194}, - {0x8006, 194}, - {0x800a, 194}, - {0x800f, 194}, - {0x8018, 194}, - {0x801f, 194}, - {0x8029, 194}, - {0xc038, 194}, - }, - /* 105 */ - { - {0x8002, 224}, - {0x8009, 224}, - {0x8017, 224}, - {0xc028, 224}, - {0x8002, 226}, - {0x8009, 226}, - {0x8017, 226}, - {0xc028, 226}, - {0x8001, 153}, - {0xc016, 153}, - {0x8001, 161}, - {0xc016, 161}, - {0x8001, 167}, - {0xc016, 167}, - {0x8001, 172}, - {0xc016, 172}, - }, - /* 106 */ - { - {0x8003, 224}, - {0x8006, 224}, - {0x800a, 224}, - {0x800f, 224}, - {0x8018, 224}, - {0x801f, 224}, - {0x8029, 224}, - {0xc038, 224}, - {0x8003, 226}, - {0x8006, 226}, - {0x800a, 226}, - {0x800f, 226}, - {0x8018, 226}, - {0x801f, 226}, - {0x8029, 226}, - {0xc038, 226}, - }, - /* 107 */ - { - {0x8002, 153}, - {0x8009, 153}, - {0x8017, 153}, - {0xc028, 153}, - {0x8002, 161}, - {0x8009, 161}, - {0x8017, 161}, - {0xc028, 161}, - {0x8002, 167}, - {0x8009, 167}, - {0x8017, 167}, - {0xc028, 167}, - {0x8002, 172}, - {0x8009, 172}, - {0x8017, 172}, - {0xc028, 172}, - }, - /* 108 */ - { - {0x8003, 153}, - {0x8006, 153}, - {0x800a, 153}, - {0x800f, 153}, - {0x8018, 153}, - {0x801f, 153}, - {0x8029, 153}, - {0xc038, 153}, - {0x8003, 161}, - {0x8006, 161}, - {0x800a, 161}, - {0x800f, 161}, - {0x8018, 161}, - {0x801f, 161}, - {0x8029, 161}, - {0xc038, 161}, - }, - /* 109 */ - { - {0x8003, 167}, - {0x8006, 167}, - {0x800a, 167}, - {0x800f, 167}, - {0x8018, 167}, - {0x801f, 167}, - {0x8029, 167}, - {0xc038, 167}, - {0x8003, 172}, - {0x8006, 172}, - {0x800a, 172}, - {0x800f, 172}, - {0x8018, 172}, - {0x801f, 172}, - {0x8029, 172}, - {0xc038, 172}, - }, - /* 110 */ - { - {0x72, 0}, - {0x73, 0}, - {0x75, 0}, - {0x76, 0}, - {0x79, 0}, - {0x7b, 0}, - {0x7f, 0}, - {0x82, 0}, - {0x88, 0}, - {0x8b, 0}, - {0x8f, 0}, - {0x92, 0}, - {0x9b, 0}, - {0xa2, 0}, - {0xaa, 0}, - {0xb4, 0}, - }, - /* 111 */ - { - {0xc000, 176}, - {0xc000, 177}, - {0xc000, 179}, - {0xc000, 209}, - {0xc000, 216}, - {0xc000, 217}, - {0xc000, 227}, - {0xc000, 229}, - {0xc000, 230}, - {0x7a, 0}, - {0x7c, 0}, - {0x7d, 0}, - {0x80, 0}, - {0x81, 0}, - {0x83, 0}, - {0x84, 0}, - }, - /* 112 */ - { - {0x8001, 176}, - {0xc016, 176}, - {0x8001, 177}, - {0xc016, 177}, - {0x8001, 179}, - {0xc016, 179}, - {0x8001, 209}, - {0xc016, 209}, - {0x8001, 216}, - {0xc016, 216}, - {0x8001, 217}, - {0xc016, 217}, - {0x8001, 227}, - {0xc016, 227}, - {0x8001, 229}, - {0xc016, 229}, - }, - /* 113 */ - { - {0x8002, 176}, - {0x8009, 176}, - {0x8017, 176}, - {0xc028, 176}, - {0x8002, 177}, - {0x8009, 177}, - {0x8017, 177}, - {0xc028, 177}, - {0x8002, 179}, - {0x8009, 179}, - {0x8017, 179}, - {0xc028, 179}, - {0x8002, 209}, - {0x8009, 209}, - {0x8017, 209}, - {0xc028, 209}, - }, - /* 114 */ - { - {0x8003, 176}, - {0x8006, 176}, - {0x800a, 176}, - {0x800f, 176}, - {0x8018, 176}, - {0x801f, 176}, - {0x8029, 176}, - {0xc038, 176}, - {0x8003, 177}, - {0x8006, 177}, - {0x800a, 177}, - {0x800f, 177}, - {0x8018, 177}, - {0x801f, 177}, - {0x8029, 177}, - {0xc038, 177}, - }, - /* 115 */ - { - {0x8003, 179}, - {0x8006, 179}, - {0x800a, 179}, - {0x800f, 179}, - {0x8018, 179}, - {0x801f, 179}, - {0x8029, 179}, - {0xc038, 179}, - {0x8003, 209}, - {0x8006, 209}, - {0x800a, 209}, - {0x800f, 209}, - {0x8018, 209}, - {0x801f, 209}, - {0x8029, 209}, - {0xc038, 209}, - }, - /* 116 */ - { - {0x8002, 216}, - {0x8009, 216}, - {0x8017, 216}, - {0xc028, 216}, - {0x8002, 217}, - {0x8009, 217}, - {0x8017, 217}, - {0xc028, 217}, - {0x8002, 227}, - {0x8009, 227}, - {0x8017, 227}, - {0xc028, 227}, - {0x8002, 229}, - {0x8009, 229}, - {0x8017, 229}, - {0xc028, 229}, - }, - /* 117 */ - { - {0x8003, 216}, - {0x8006, 216}, - {0x800a, 216}, - {0x800f, 216}, - {0x8018, 216}, - {0x801f, 216}, - {0x8029, 216}, - {0xc038, 216}, - {0x8003, 217}, - {0x8006, 217}, - {0x800a, 217}, - {0x800f, 217}, - {0x8018, 217}, - {0x801f, 217}, - {0x8029, 217}, - {0xc038, 217}, - }, - /* 118 */ - { - {0x8003, 227}, - {0x8006, 227}, - {0x800a, 227}, - {0x800f, 227}, - {0x8018, 227}, - {0x801f, 227}, - {0x8029, 227}, - {0xc038, 227}, - {0x8003, 229}, - {0x8006, 229}, - {0x800a, 229}, - {0x800f, 229}, - {0x8018, 229}, - {0x801f, 229}, - {0x8029, 229}, - {0xc038, 229}, - }, - /* 119 */ - { - {0x8001, 230}, - {0xc016, 230}, - {0xc000, 129}, - {0xc000, 132}, - {0xc000, 133}, - {0xc000, 134}, - {0xc000, 136}, - {0xc000, 146}, - {0xc000, 154}, - {0xc000, 156}, - {0xc000, 160}, - {0xc000, 163}, - {0xc000, 164}, - {0xc000, 169}, - {0xc000, 170}, - {0xc000, 173}, - }, - /* 120 */ - { - {0x8002, 230}, - {0x8009, 230}, - {0x8017, 230}, - {0xc028, 230}, - {0x8001, 129}, - {0xc016, 129}, - {0x8001, 132}, - {0xc016, 132}, - {0x8001, 133}, - {0xc016, 133}, - {0x8001, 134}, - {0xc016, 134}, - {0x8001, 136}, - {0xc016, 136}, - {0x8001, 146}, - {0xc016, 146}, - }, - /* 121 */ - { - {0x8003, 230}, - {0x8006, 230}, - {0x800a, 230}, - {0x800f, 230}, - {0x8018, 230}, - {0x801f, 230}, - {0x8029, 230}, - {0xc038, 230}, - {0x8002, 129}, - {0x8009, 129}, - {0x8017, 129}, - {0xc028, 129}, - {0x8002, 132}, - {0x8009, 132}, - {0x8017, 132}, - {0xc028, 132}, - }, - /* 122 */ - { - {0x8003, 129}, - {0x8006, 129}, - {0x800a, 129}, - {0x800f, 129}, - {0x8018, 129}, - {0x801f, 129}, - {0x8029, 129}, - {0xc038, 129}, - {0x8003, 132}, - {0x8006, 132}, - {0x800a, 132}, - {0x800f, 132}, - {0x8018, 132}, - {0x801f, 132}, - {0x8029, 132}, - {0xc038, 132}, - }, - /* 123 */ - { - {0x8002, 133}, - {0x8009, 133}, - {0x8017, 133}, - {0xc028, 133}, - {0x8002, 134}, - {0x8009, 134}, - {0x8017, 134}, - {0xc028, 134}, - {0x8002, 136}, - {0x8009, 136}, - {0x8017, 136}, - {0xc028, 136}, - {0x8002, 146}, - {0x8009, 146}, - {0x8017, 146}, - {0xc028, 146}, - }, - /* 124 */ - { - {0x8003, 133}, - {0x8006, 133}, - {0x800a, 133}, - {0x800f, 133}, - {0x8018, 133}, - {0x801f, 133}, - {0x8029, 133}, - {0xc038, 133}, - {0x8003, 134}, - {0x8006, 134}, - {0x800a, 134}, - {0x800f, 134}, - {0x8018, 134}, - {0x801f, 134}, - {0x8029, 134}, - {0xc038, 134}, - }, - /* 125 */ - { - {0x8003, 136}, - {0x8006, 136}, - {0x800a, 136}, - {0x800f, 136}, - {0x8018, 136}, - {0x801f, 136}, - {0x8029, 136}, - {0xc038, 136}, - {0x8003, 146}, - {0x8006, 146}, - {0x800a, 146}, - {0x800f, 146}, - {0x8018, 146}, - {0x801f, 146}, - {0x8029, 146}, - {0xc038, 146}, - }, - /* 126 */ - { - {0x8001, 154}, - {0xc016, 154}, - {0x8001, 156}, - {0xc016, 156}, - {0x8001, 160}, - {0xc016, 160}, - {0x8001, 163}, - {0xc016, 163}, - {0x8001, 164}, - {0xc016, 164}, - {0x8001, 169}, - {0xc016, 169}, - {0x8001, 170}, - {0xc016, 170}, - {0x8001, 173}, - {0xc016, 173}, - }, - /* 127 */ - { - {0x8002, 154}, - {0x8009, 154}, - {0x8017, 154}, - {0xc028, 154}, - {0x8002, 156}, - {0x8009, 156}, - {0x8017, 156}, - {0xc028, 156}, - {0x8002, 160}, - {0x8009, 160}, - {0x8017, 160}, - {0xc028, 160}, - {0x8002, 163}, - {0x8009, 163}, - {0x8017, 163}, - {0xc028, 163}, - }, - /* 128 */ - { - {0x8003, 154}, - {0x8006, 154}, - {0x800a, 154}, - {0x800f, 154}, - {0x8018, 154}, - {0x801f, 154}, - {0x8029, 154}, - {0xc038, 154}, - {0x8003, 156}, - {0x8006, 156}, - {0x800a, 156}, - {0x800f, 156}, - {0x8018, 156}, - {0x801f, 156}, - {0x8029, 156}, - {0xc038, 156}, - }, - /* 129 */ - { - {0x8003, 160}, - {0x8006, 160}, - {0x800a, 160}, - {0x800f, 160}, - {0x8018, 160}, - {0x801f, 160}, - {0x8029, 160}, - {0xc038, 160}, - {0x8003, 163}, - {0x8006, 163}, - {0x800a, 163}, - {0x800f, 163}, - {0x8018, 163}, - {0x801f, 163}, - {0x8029, 163}, - {0xc038, 163}, - }, - /* 130 */ - { - {0x8002, 164}, - {0x8009, 164}, - {0x8017, 164}, - {0xc028, 164}, - {0x8002, 169}, - {0x8009, 169}, - {0x8017, 169}, - {0xc028, 169}, - {0x8002, 170}, - {0x8009, 170}, - {0x8017, 170}, - {0xc028, 170}, - {0x8002, 173}, - {0x8009, 173}, - {0x8017, 173}, - {0xc028, 173}, - }, - /* 131 */ - { - {0x8003, 164}, - {0x8006, 164}, - {0x800a, 164}, - {0x800f, 164}, - {0x8018, 164}, - {0x801f, 164}, - {0x8029, 164}, - {0xc038, 164}, - {0x8003, 169}, - {0x8006, 169}, - {0x800a, 169}, - {0x800f, 169}, - {0x8018, 169}, - {0x801f, 169}, - {0x8029, 169}, - {0xc038, 169}, - }, - /* 132 */ - { - {0x8003, 170}, - {0x8006, 170}, - {0x800a, 170}, - {0x800f, 170}, - {0x8018, 170}, - {0x801f, 170}, - {0x8029, 170}, - {0xc038, 170}, - {0x8003, 173}, - {0x8006, 173}, - {0x800a, 173}, - {0x800f, 173}, - {0x8018, 173}, - {0x801f, 173}, - {0x8029, 173}, - {0xc038, 173}, - }, - /* 133 */ - { - {0x89, 0}, - {0x8a, 0}, - {0x8c, 0}, - {0x8d, 0}, - {0x90, 0}, - {0x91, 0}, - {0x93, 0}, - {0x96, 0}, - {0x9c, 0}, - {0x9f, 0}, - {0xa3, 0}, - {0xa6, 0}, - {0xab, 0}, - {0xae, 0}, - {0xb5, 0}, - {0xbe, 0}, - }, - /* 134 */ - { - {0xc000, 178}, - {0xc000, 181}, - {0xc000, 185}, - {0xc000, 186}, - {0xc000, 187}, - {0xc000, 189}, - {0xc000, 190}, - {0xc000, 196}, - {0xc000, 198}, - {0xc000, 228}, - {0xc000, 232}, - {0xc000, 233}, - {0x94, 0}, - {0x95, 0}, - {0x97, 0}, - {0x98, 0}, - }, - /* 135 */ - { - {0x8001, 178}, - {0xc016, 178}, - {0x8001, 181}, - {0xc016, 181}, - {0x8001, 185}, - {0xc016, 185}, - {0x8001, 186}, - {0xc016, 186}, - {0x8001, 187}, - {0xc016, 187}, - {0x8001, 189}, - {0xc016, 189}, - {0x8001, 190}, - {0xc016, 190}, - {0x8001, 196}, - {0xc016, 196}, - }, - /* 136 */ - { - {0x8002, 178}, - {0x8009, 178}, - {0x8017, 178}, - {0xc028, 178}, - {0x8002, 181}, - {0x8009, 181}, - {0x8017, 181}, - {0xc028, 181}, - {0x8002, 185}, - {0x8009, 185}, - {0x8017, 185}, - {0xc028, 185}, - {0x8002, 186}, - {0x8009, 186}, - {0x8017, 186}, - {0xc028, 186}, - }, - /* 137 */ - { - {0x8003, 178}, - {0x8006, 178}, - {0x800a, 178}, - {0x800f, 178}, - {0x8018, 178}, - {0x801f, 178}, - {0x8029, 178}, - {0xc038, 178}, - {0x8003, 181}, - {0x8006, 181}, - {0x800a, 181}, - {0x800f, 181}, - {0x8018, 181}, - {0x801f, 181}, - {0x8029, 181}, - {0xc038, 181}, - }, - /* 138 */ - { - {0x8003, 185}, - {0x8006, 185}, - {0x800a, 185}, - {0x800f, 185}, - {0x8018, 185}, - {0x801f, 185}, - {0x8029, 185}, - {0xc038, 185}, - {0x8003, 186}, - {0x8006, 186}, - {0x800a, 186}, - {0x800f, 186}, - {0x8018, 186}, - {0x801f, 186}, - {0x8029, 186}, - {0xc038, 186}, - }, - /* 139 */ - { - {0x8002, 187}, - {0x8009, 187}, - {0x8017, 187}, - {0xc028, 187}, - {0x8002, 189}, - {0x8009, 189}, - {0x8017, 189}, - {0xc028, 189}, - {0x8002, 190}, - {0x8009, 190}, - {0x8017, 190}, - {0xc028, 190}, - {0x8002, 196}, - {0x8009, 196}, - {0x8017, 196}, - {0xc028, 196}, - }, - /* 140 */ - { - {0x8003, 187}, - {0x8006, 187}, - {0x800a, 187}, - {0x800f, 187}, - {0x8018, 187}, - {0x801f, 187}, - {0x8029, 187}, - {0xc038, 187}, - {0x8003, 189}, - {0x8006, 189}, - {0x800a, 189}, - {0x800f, 189}, - {0x8018, 189}, - {0x801f, 189}, - {0x8029, 189}, - {0xc038, 189}, - }, - /* 141 */ - { - {0x8003, 190}, - {0x8006, 190}, - {0x800a, 190}, - {0x800f, 190}, - {0x8018, 190}, - {0x801f, 190}, - {0x8029, 190}, - {0xc038, 190}, - {0x8003, 196}, - {0x8006, 196}, - {0x800a, 196}, - {0x800f, 196}, - {0x8018, 196}, - {0x801f, 196}, - {0x8029, 196}, - {0xc038, 196}, - }, - /* 142 */ - { - {0x8001, 198}, - {0xc016, 198}, - {0x8001, 228}, - {0xc016, 228}, - {0x8001, 232}, - {0xc016, 232}, - {0x8001, 233}, - {0xc016, 233}, - {0xc000, 1}, - {0xc000, 135}, - {0xc000, 137}, - {0xc000, 138}, - {0xc000, 139}, - {0xc000, 140}, - {0xc000, 141}, - {0xc000, 143}, - }, - /* 143 */ - { - {0x8002, 198}, - {0x8009, 198}, - {0x8017, 198}, - {0xc028, 198}, - {0x8002, 228}, - {0x8009, 228}, - {0x8017, 228}, - {0xc028, 228}, - {0x8002, 232}, - {0x8009, 232}, - {0x8017, 232}, - {0xc028, 232}, - {0x8002, 233}, - {0x8009, 233}, - {0x8017, 233}, - {0xc028, 233}, - }, - /* 144 */ - { - {0x8003, 198}, - {0x8006, 198}, - {0x800a, 198}, - {0x800f, 198}, - {0x8018, 198}, - {0x801f, 198}, - {0x8029, 198}, - {0xc038, 198}, - {0x8003, 228}, - {0x8006, 228}, - {0x800a, 228}, - {0x800f, 228}, - {0x8018, 228}, - {0x801f, 228}, - {0x8029, 228}, - {0xc038, 228}, - }, - /* 145 */ - { - {0x8003, 232}, - {0x8006, 232}, - {0x800a, 232}, - {0x800f, 232}, - {0x8018, 232}, - {0x801f, 232}, - {0x8029, 232}, - {0xc038, 232}, - {0x8003, 233}, - {0x8006, 233}, - {0x800a, 233}, - {0x800f, 233}, - {0x8018, 233}, - {0x801f, 233}, - {0x8029, 233}, - {0xc038, 233}, - }, - /* 146 */ - { - {0x8001, 1}, - {0xc016, 1}, - {0x8001, 135}, - {0xc016, 135}, - {0x8001, 137}, - {0xc016, 137}, - {0x8001, 138}, - {0xc016, 138}, - {0x8001, 139}, - {0xc016, 139}, - {0x8001, 140}, - {0xc016, 140}, - {0x8001, 141}, - {0xc016, 141}, - {0x8001, 143}, - {0xc016, 143}, - }, - /* 147 */ - { - {0x8002, 1}, - {0x8009, 1}, - {0x8017, 1}, - {0xc028, 1}, - {0x8002, 135}, - {0x8009, 135}, - {0x8017, 135}, - {0xc028, 135}, - {0x8002, 137}, - {0x8009, 137}, - {0x8017, 137}, - {0xc028, 137}, - {0x8002, 138}, - {0x8009, 138}, - {0x8017, 138}, - {0xc028, 138}, - }, - /* 148 */ - { - {0x8003, 1}, - {0x8006, 1}, - {0x800a, 1}, - {0x800f, 1}, - {0x8018, 1}, - {0x801f, 1}, - {0x8029, 1}, - {0xc038, 1}, - {0x8003, 135}, - {0x8006, 135}, - {0x800a, 135}, - {0x800f, 135}, - {0x8018, 135}, - {0x801f, 135}, - {0x8029, 135}, - {0xc038, 135}, - }, - /* 149 */ - { - {0x8003, 137}, - {0x8006, 137}, - {0x800a, 137}, - {0x800f, 137}, - {0x8018, 137}, - {0x801f, 137}, - {0x8029, 137}, - {0xc038, 137}, - {0x8003, 138}, - {0x8006, 138}, - {0x800a, 138}, - {0x800f, 138}, - {0x8018, 138}, - {0x801f, 138}, - {0x8029, 138}, - {0xc038, 138}, - }, - /* 150 */ - { - {0x8002, 139}, - {0x8009, 139}, - {0x8017, 139}, - {0xc028, 139}, - {0x8002, 140}, - {0x8009, 140}, - {0x8017, 140}, - {0xc028, 140}, - {0x8002, 141}, - {0x8009, 141}, - {0x8017, 141}, - {0xc028, 141}, - {0x8002, 143}, - {0x8009, 143}, - {0x8017, 143}, - {0xc028, 143}, - }, - /* 151 */ - { - {0x8003, 139}, - {0x8006, 139}, - {0x800a, 139}, - {0x800f, 139}, - {0x8018, 139}, - {0x801f, 139}, - {0x8029, 139}, - {0xc038, 139}, - {0x8003, 140}, - {0x8006, 140}, - {0x800a, 140}, - {0x800f, 140}, - {0x8018, 140}, - {0x801f, 140}, - {0x8029, 140}, - {0xc038, 140}, - }, - /* 152 */ - { - {0x8003, 141}, - {0x8006, 141}, - {0x800a, 141}, - {0x800f, 141}, - {0x8018, 141}, - {0x801f, 141}, - {0x8029, 141}, - {0xc038, 141}, - {0x8003, 143}, - {0x8006, 143}, - {0x800a, 143}, - {0x800f, 143}, - {0x8018, 143}, - {0x801f, 143}, - {0x8029, 143}, - {0xc038, 143}, - }, - /* 153 */ - { - {0x9d, 0}, - {0x9e, 0}, - {0xa0, 0}, - {0xa1, 0}, - {0xa4, 0}, - {0xa5, 0}, - {0xa7, 0}, - {0xa8, 0}, - {0xac, 0}, - {0xad, 0}, - {0xaf, 0}, - {0xb1, 0}, - {0xb6, 0}, - {0xb9, 0}, - {0xbf, 0}, - {0xcf, 0}, - }, - /* 154 */ - { - {0xc000, 147}, - {0xc000, 149}, - {0xc000, 150}, - {0xc000, 151}, - {0xc000, 152}, - {0xc000, 155}, - {0xc000, 157}, - {0xc000, 158}, - {0xc000, 165}, - {0xc000, 166}, - {0xc000, 168}, - {0xc000, 174}, - {0xc000, 175}, - {0xc000, 180}, - {0xc000, 182}, - {0xc000, 183}, - }, - /* 155 */ - { - {0x8001, 147}, - {0xc016, 147}, - {0x8001, 149}, - {0xc016, 149}, - {0x8001, 150}, - {0xc016, 150}, - {0x8001, 151}, - {0xc016, 151}, - {0x8001, 152}, - {0xc016, 152}, - {0x8001, 155}, - {0xc016, 155}, - {0x8001, 157}, - {0xc016, 157}, - {0x8001, 158}, - {0xc016, 158}, - }, - /* 156 */ - { - {0x8002, 147}, - {0x8009, 147}, - {0x8017, 147}, - {0xc028, 147}, - {0x8002, 149}, - {0x8009, 149}, - {0x8017, 149}, - {0xc028, 149}, - {0x8002, 150}, - {0x8009, 150}, - {0x8017, 150}, - {0xc028, 150}, - {0x8002, 151}, - {0x8009, 151}, - {0x8017, 151}, - {0xc028, 151}, - }, - /* 157 */ - { - {0x8003, 147}, - {0x8006, 147}, - {0x800a, 147}, - {0x800f, 147}, - {0x8018, 147}, - {0x801f, 147}, - {0x8029, 147}, - {0xc038, 147}, - {0x8003, 149}, - {0x8006, 149}, - {0x800a, 149}, - {0x800f, 149}, - {0x8018, 149}, - {0x801f, 149}, - {0x8029, 149}, - {0xc038, 149}, - }, - /* 158 */ - { - {0x8003, 150}, - {0x8006, 150}, - {0x800a, 150}, - {0x800f, 150}, - {0x8018, 150}, - {0x801f, 150}, - {0x8029, 150}, - {0xc038, 150}, - {0x8003, 151}, - {0x8006, 151}, - {0x800a, 151}, - {0x800f, 151}, - {0x8018, 151}, - {0x801f, 151}, - {0x8029, 151}, - {0xc038, 151}, - }, - /* 159 */ - { - {0x8002, 152}, - {0x8009, 152}, - {0x8017, 152}, - {0xc028, 152}, - {0x8002, 155}, - {0x8009, 155}, - {0x8017, 155}, - {0xc028, 155}, - {0x8002, 157}, - {0x8009, 157}, - {0x8017, 157}, - {0xc028, 157}, - {0x8002, 158}, - {0x8009, 158}, - {0x8017, 158}, - {0xc028, 158}, - }, - /* 160 */ - { - {0x8003, 152}, - {0x8006, 152}, - {0x800a, 152}, - {0x800f, 152}, - {0x8018, 152}, - {0x801f, 152}, - {0x8029, 152}, - {0xc038, 152}, - {0x8003, 155}, - {0x8006, 155}, - {0x800a, 155}, - {0x800f, 155}, - {0x8018, 155}, - {0x801f, 155}, - {0x8029, 155}, - {0xc038, 155}, - }, - /* 161 */ - { - {0x8003, 157}, - {0x8006, 157}, - {0x800a, 157}, - {0x800f, 157}, - {0x8018, 157}, - {0x801f, 157}, - {0x8029, 157}, - {0xc038, 157}, - {0x8003, 158}, - {0x8006, 158}, - {0x800a, 158}, - {0x800f, 158}, - {0x8018, 158}, - {0x801f, 158}, - {0x8029, 158}, - {0xc038, 158}, - }, - /* 162 */ - { - {0x8001, 165}, - {0xc016, 165}, - {0x8001, 166}, - {0xc016, 166}, - {0x8001, 168}, - {0xc016, 168}, - {0x8001, 174}, - {0xc016, 174}, - {0x8001, 175}, - {0xc016, 175}, - {0x8001, 180}, - {0xc016, 180}, - {0x8001, 182}, - {0xc016, 182}, - {0x8001, 183}, - {0xc016, 183}, - }, - /* 163 */ - { - {0x8002, 165}, - {0x8009, 165}, - {0x8017, 165}, - {0xc028, 165}, - {0x8002, 166}, - {0x8009, 166}, - {0x8017, 166}, - {0xc028, 166}, - {0x8002, 168}, - {0x8009, 168}, - {0x8017, 168}, - {0xc028, 168}, - {0x8002, 174}, - {0x8009, 174}, - {0x8017, 174}, - {0xc028, 174}, - }, - /* 164 */ - { - {0x8003, 165}, - {0x8006, 165}, - {0x800a, 165}, - {0x800f, 165}, - {0x8018, 165}, - {0x801f, 165}, - {0x8029, 165}, - {0xc038, 165}, - {0x8003, 166}, - {0x8006, 166}, - {0x800a, 166}, - {0x800f, 166}, - {0x8018, 166}, - {0x801f, 166}, - {0x8029, 166}, - {0xc038, 166}, - }, - /* 165 */ - { - {0x8003, 168}, - {0x8006, 168}, - {0x800a, 168}, - {0x800f, 168}, - {0x8018, 168}, - {0x801f, 168}, - {0x8029, 168}, - {0xc038, 168}, - {0x8003, 174}, - {0x8006, 174}, - {0x800a, 174}, - {0x800f, 174}, - {0x8018, 174}, - {0x801f, 174}, - {0x8029, 174}, - {0xc038, 174}, - }, - /* 166 */ - { - {0x8002, 175}, - {0x8009, 175}, - {0x8017, 175}, - {0xc028, 175}, - {0x8002, 180}, - {0x8009, 180}, - {0x8017, 180}, - {0xc028, 180}, - {0x8002, 182}, - {0x8009, 182}, - {0x8017, 182}, - {0xc028, 182}, - {0x8002, 183}, - {0x8009, 183}, - {0x8017, 183}, - {0xc028, 183}, - }, - /* 167 */ - { - {0x8003, 175}, - {0x8006, 175}, - {0x800a, 175}, - {0x800f, 175}, - {0x8018, 175}, - {0x801f, 175}, - {0x8029, 175}, - {0xc038, 175}, - {0x8003, 180}, - {0x8006, 180}, - {0x800a, 180}, - {0x800f, 180}, - {0x8018, 180}, - {0x801f, 180}, - {0x8029, 180}, - {0xc038, 180}, - }, - /* 168 */ - { - {0x8003, 182}, - {0x8006, 182}, - {0x800a, 182}, - {0x800f, 182}, - {0x8018, 182}, - {0x801f, 182}, - {0x8029, 182}, - {0xc038, 182}, - {0x8003, 183}, - {0x8006, 183}, - {0x800a, 183}, - {0x800f, 183}, - {0x8018, 183}, - {0x801f, 183}, - {0x8029, 183}, - {0xc038, 183}, - }, - /* 169 */ - { - {0xc000, 188}, - {0xc000, 191}, - {0xc000, 197}, - {0xc000, 231}, - {0xc000, 239}, - {0xb0, 0}, - {0xb2, 0}, - {0xb3, 0}, - {0xb7, 0}, - {0xb8, 0}, - {0xba, 0}, - {0xbb, 0}, - {0xc0, 0}, - {0xc7, 0}, - {0xd0, 0}, - {0xdf, 0}, - }, - /* 170 */ - { - {0x8001, 188}, - {0xc016, 188}, - {0x8001, 191}, - {0xc016, 191}, - {0x8001, 197}, - {0xc016, 197}, - {0x8001, 231}, - {0xc016, 231}, - {0x8001, 239}, - {0xc016, 239}, - {0xc000, 9}, - {0xc000, 142}, - {0xc000, 144}, - {0xc000, 145}, - {0xc000, 148}, - {0xc000, 159}, - }, - /* 171 */ - { - {0x8002, 188}, - {0x8009, 188}, - {0x8017, 188}, - {0xc028, 188}, - {0x8002, 191}, - {0x8009, 191}, - {0x8017, 191}, - {0xc028, 191}, - {0x8002, 197}, - {0x8009, 197}, - {0x8017, 197}, - {0xc028, 197}, - {0x8002, 231}, - {0x8009, 231}, - {0x8017, 231}, - {0xc028, 231}, - }, - /* 172 */ - { - {0x8003, 188}, - {0x8006, 188}, - {0x800a, 188}, - {0x800f, 188}, - {0x8018, 188}, - {0x801f, 188}, - {0x8029, 188}, - {0xc038, 188}, - {0x8003, 191}, - {0x8006, 191}, - {0x800a, 191}, - {0x800f, 191}, - {0x8018, 191}, - {0x801f, 191}, - {0x8029, 191}, - {0xc038, 191}, - }, - /* 173 */ - { - {0x8003, 197}, - {0x8006, 197}, - {0x800a, 197}, - {0x800f, 197}, - {0x8018, 197}, - {0x801f, 197}, - {0x8029, 197}, - {0xc038, 197}, - {0x8003, 231}, - {0x8006, 231}, - {0x800a, 231}, - {0x800f, 231}, - {0x8018, 231}, - {0x801f, 231}, - {0x8029, 231}, - {0xc038, 231}, - }, - /* 174 */ - { - {0x8002, 239}, - {0x8009, 239}, - {0x8017, 239}, - {0xc028, 239}, - {0x8001, 9}, - {0xc016, 9}, - {0x8001, 142}, - {0xc016, 142}, - {0x8001, 144}, - {0xc016, 144}, - {0x8001, 145}, - {0xc016, 145}, - {0x8001, 148}, - {0xc016, 148}, - {0x8001, 159}, - {0xc016, 159}, - }, - /* 175 */ - { - {0x8003, 239}, - {0x8006, 239}, - {0x800a, 239}, - {0x800f, 239}, - {0x8018, 239}, - {0x801f, 239}, - {0x8029, 239}, - {0xc038, 239}, - {0x8002, 9}, - {0x8009, 9}, - {0x8017, 9}, - {0xc028, 9}, - {0x8002, 142}, - {0x8009, 142}, - {0x8017, 142}, - {0xc028, 142}, - }, - /* 176 */ - { - {0x8003, 9}, - {0x8006, 9}, - {0x800a, 9}, - {0x800f, 9}, - {0x8018, 9}, - {0x801f, 9}, - {0x8029, 9}, - {0xc038, 9}, - {0x8003, 142}, - {0x8006, 142}, - {0x800a, 142}, - {0x800f, 142}, - {0x8018, 142}, - {0x801f, 142}, - {0x8029, 142}, - {0xc038, 142}, - }, - /* 177 */ - { - {0x8002, 144}, - {0x8009, 144}, - {0x8017, 144}, - {0xc028, 144}, - {0x8002, 145}, - {0x8009, 145}, - {0x8017, 145}, - {0xc028, 145}, - {0x8002, 148}, - {0x8009, 148}, - {0x8017, 148}, - {0xc028, 148}, - {0x8002, 159}, - {0x8009, 159}, - {0x8017, 159}, - {0xc028, 159}, - }, - /* 178 */ - { - {0x8003, 144}, - {0x8006, 144}, - {0x800a, 144}, - {0x800f, 144}, - {0x8018, 144}, - {0x801f, 144}, - {0x8029, 144}, - {0xc038, 144}, - {0x8003, 145}, - {0x8006, 145}, - {0x800a, 145}, - {0x800f, 145}, - {0x8018, 145}, - {0x801f, 145}, - {0x8029, 145}, - {0xc038, 145}, - }, - /* 179 */ - { - {0x8003, 148}, - {0x8006, 148}, - {0x800a, 148}, - {0x800f, 148}, - {0x8018, 148}, - {0x801f, 148}, - {0x8029, 148}, - {0xc038, 148}, - {0x8003, 159}, - {0x8006, 159}, - {0x800a, 159}, - {0x800f, 159}, - {0x8018, 159}, - {0x801f, 159}, - {0x8029, 159}, - {0xc038, 159}, - }, - /* 180 */ - { - {0xc000, 171}, - {0xc000, 206}, - {0xc000, 215}, - {0xc000, 225}, - {0xc000, 236}, - {0xc000, 237}, - {0xbc, 0}, - {0xbd, 0}, - {0xc1, 0}, - {0xc4, 0}, - {0xc8, 0}, - {0xcb, 0}, - {0xd1, 0}, - {0xd8, 0}, - {0xe0, 0}, - {0xee, 0}, - }, - /* 181 */ - { - {0x8001, 171}, - {0xc016, 171}, - {0x8001, 206}, - {0xc016, 206}, - {0x8001, 215}, - {0xc016, 215}, - {0x8001, 225}, - {0xc016, 225}, - {0x8001, 236}, - {0xc016, 236}, - {0x8001, 237}, - {0xc016, 237}, - {0xc000, 199}, - {0xc000, 207}, - {0xc000, 234}, - {0xc000, 235}, - }, - /* 182 */ - { - {0x8002, 171}, - {0x8009, 171}, - {0x8017, 171}, - {0xc028, 171}, - {0x8002, 206}, - {0x8009, 206}, - {0x8017, 206}, - {0xc028, 206}, - {0x8002, 215}, - {0x8009, 215}, - {0x8017, 215}, - {0xc028, 215}, - {0x8002, 225}, - {0x8009, 225}, - {0x8017, 225}, - {0xc028, 225}, - }, - /* 183 */ - { - {0x8003, 171}, - {0x8006, 171}, - {0x800a, 171}, - {0x800f, 171}, - {0x8018, 171}, - {0x801f, 171}, - {0x8029, 171}, - {0xc038, 171}, - {0x8003, 206}, - {0x8006, 206}, - {0x800a, 206}, - {0x800f, 206}, - {0x8018, 206}, - {0x801f, 206}, - {0x8029, 206}, - {0xc038, 206}, - }, - /* 184 */ - { - {0x8003, 215}, - {0x8006, 215}, - {0x800a, 215}, - {0x800f, 215}, - {0x8018, 215}, - {0x801f, 215}, - {0x8029, 215}, - {0xc038, 215}, - {0x8003, 225}, - {0x8006, 225}, - {0x800a, 225}, - {0x800f, 225}, - {0x8018, 225}, - {0x801f, 225}, - {0x8029, 225}, - {0xc038, 225}, - }, - /* 185 */ - { - {0x8002, 236}, - {0x8009, 236}, - {0x8017, 236}, - {0xc028, 236}, - {0x8002, 237}, - {0x8009, 237}, - {0x8017, 237}, - {0xc028, 237}, - {0x8001, 199}, - {0xc016, 199}, - {0x8001, 207}, - {0xc016, 207}, - {0x8001, 234}, - {0xc016, 234}, - {0x8001, 235}, - {0xc016, 235}, - }, - /* 186 */ - { - {0x8003, 236}, - {0x8006, 236}, - {0x800a, 236}, - {0x800f, 236}, - {0x8018, 236}, - {0x801f, 236}, - {0x8029, 236}, - {0xc038, 236}, - {0x8003, 237}, - {0x8006, 237}, - {0x800a, 237}, - {0x800f, 237}, - {0x8018, 237}, - {0x801f, 237}, - {0x8029, 237}, - {0xc038, 237}, - }, - /* 187 */ - { - {0x8002, 199}, - {0x8009, 199}, - {0x8017, 199}, - {0xc028, 199}, - {0x8002, 207}, - {0x8009, 207}, - {0x8017, 207}, - {0xc028, 207}, - {0x8002, 234}, - {0x8009, 234}, - {0x8017, 234}, - {0xc028, 234}, - {0x8002, 235}, - {0x8009, 235}, - {0x8017, 235}, - {0xc028, 235}, - }, - /* 188 */ - { - {0x8003, 199}, - {0x8006, 199}, - {0x800a, 199}, - {0x800f, 199}, - {0x8018, 199}, - {0x801f, 199}, - {0x8029, 199}, - {0xc038, 199}, - {0x8003, 207}, - {0x8006, 207}, - {0x800a, 207}, - {0x800f, 207}, - {0x8018, 207}, - {0x801f, 207}, - {0x8029, 207}, - {0xc038, 207}, - }, - /* 189 */ - { - {0x8003, 234}, - {0x8006, 234}, - {0x800a, 234}, - {0x800f, 234}, - {0x8018, 234}, - {0x801f, 234}, - {0x8029, 234}, - {0xc038, 234}, - {0x8003, 235}, - {0x8006, 235}, - {0x800a, 235}, - {0x800f, 235}, - {0x8018, 235}, - {0x801f, 235}, - {0x8029, 235}, - {0xc038, 235}, - }, - /* 190 */ - { - {0xc2, 0}, - {0xc3, 0}, - {0xc5, 0}, - {0xc6, 0}, - {0xc9, 0}, - {0xca, 0}, - {0xcc, 0}, - {0xcd, 0}, - {0xd2, 0}, - {0xd5, 0}, - {0xd9, 0}, - {0xdc, 0}, - {0xe1, 0}, - {0xe7, 0}, - {0xef, 0}, - {0xf6, 0}, - }, - /* 191 */ - { - {0xc000, 192}, - {0xc000, 193}, - {0xc000, 200}, - {0xc000, 201}, - {0xc000, 202}, - {0xc000, 205}, - {0xc000, 210}, - {0xc000, 213}, - {0xc000, 218}, - {0xc000, 219}, - {0xc000, 238}, - {0xc000, 240}, - {0xc000, 242}, - {0xc000, 243}, - {0xc000, 255}, - {0xce, 0}, - }, - /* 192 */ - { - {0x8001, 192}, - {0xc016, 192}, - {0x8001, 193}, - {0xc016, 193}, - {0x8001, 200}, - {0xc016, 200}, - {0x8001, 201}, - {0xc016, 201}, - {0x8001, 202}, - {0xc016, 202}, - {0x8001, 205}, - {0xc016, 205}, - {0x8001, 210}, - {0xc016, 210}, - {0x8001, 213}, - {0xc016, 213}, - }, - /* 193 */ - { - {0x8002, 192}, - {0x8009, 192}, - {0x8017, 192}, - {0xc028, 192}, - {0x8002, 193}, - {0x8009, 193}, - {0x8017, 193}, - {0xc028, 193}, - {0x8002, 200}, - {0x8009, 200}, - {0x8017, 200}, - {0xc028, 200}, - {0x8002, 201}, - {0x8009, 201}, - {0x8017, 201}, - {0xc028, 201}, - }, - /* 194 */ - { - {0x8003, 192}, - {0x8006, 192}, - {0x800a, 192}, - {0x800f, 192}, - {0x8018, 192}, - {0x801f, 192}, - {0x8029, 192}, - {0xc038, 192}, - {0x8003, 193}, - {0x8006, 193}, - {0x800a, 193}, - {0x800f, 193}, - {0x8018, 193}, - {0x801f, 193}, - {0x8029, 193}, - {0xc038, 193}, - }, - /* 195 */ - { - {0x8003, 200}, - {0x8006, 200}, - {0x800a, 200}, - {0x800f, 200}, - {0x8018, 200}, - {0x801f, 200}, - {0x8029, 200}, - {0xc038, 200}, - {0x8003, 201}, - {0x8006, 201}, - {0x800a, 201}, - {0x800f, 201}, - {0x8018, 201}, - {0x801f, 201}, - {0x8029, 201}, - {0xc038, 201}, - }, - /* 196 */ - { - {0x8002, 202}, - {0x8009, 202}, - {0x8017, 202}, - {0xc028, 202}, - {0x8002, 205}, - {0x8009, 205}, - {0x8017, 205}, - {0xc028, 205}, - {0x8002, 210}, - {0x8009, 210}, - {0x8017, 210}, - {0xc028, 210}, - {0x8002, 213}, - {0x8009, 213}, - {0x8017, 213}, - {0xc028, 213}, - }, - /* 197 */ - { - {0x8003, 202}, - {0x8006, 202}, - {0x800a, 202}, - {0x800f, 202}, - {0x8018, 202}, - {0x801f, 202}, - {0x8029, 202}, - {0xc038, 202}, - {0x8003, 205}, - {0x8006, 205}, - {0x800a, 205}, - {0x800f, 205}, - {0x8018, 205}, - {0x801f, 205}, - {0x8029, 205}, - {0xc038, 205}, - }, - /* 198 */ - { - {0x8003, 210}, - {0x8006, 210}, - {0x800a, 210}, - {0x800f, 210}, - {0x8018, 210}, - {0x801f, 210}, - {0x8029, 210}, - {0xc038, 210}, - {0x8003, 213}, - {0x8006, 213}, - {0x800a, 213}, - {0x800f, 213}, - {0x8018, 213}, - {0x801f, 213}, - {0x8029, 213}, - {0xc038, 213}, - }, - /* 199 */ - { - {0x8001, 218}, - {0xc016, 218}, - {0x8001, 219}, - {0xc016, 219}, - {0x8001, 238}, - {0xc016, 238}, - {0x8001, 240}, - {0xc016, 240}, - {0x8001, 242}, - {0xc016, 242}, - {0x8001, 243}, - {0xc016, 243}, - {0x8001, 255}, - {0xc016, 255}, - {0xc000, 203}, - {0xc000, 204}, - }, - /* 200 */ - { - {0x8002, 218}, - {0x8009, 218}, - {0x8017, 218}, - {0xc028, 218}, - {0x8002, 219}, - {0x8009, 219}, - {0x8017, 219}, - {0xc028, 219}, - {0x8002, 238}, - {0x8009, 238}, - {0x8017, 238}, - {0xc028, 238}, - {0x8002, 240}, - {0x8009, 240}, - {0x8017, 240}, - {0xc028, 240}, - }, - /* 201 */ - { - {0x8003, 218}, - {0x8006, 218}, - {0x800a, 218}, - {0x800f, 218}, - {0x8018, 218}, - {0x801f, 218}, - {0x8029, 218}, - {0xc038, 218}, - {0x8003, 219}, - {0x8006, 219}, - {0x800a, 219}, - {0x800f, 219}, - {0x8018, 219}, - {0x801f, 219}, - {0x8029, 219}, - {0xc038, 219}, - }, - /* 202 */ - { - {0x8003, 238}, - {0x8006, 238}, - {0x800a, 238}, - {0x800f, 238}, - {0x8018, 238}, - {0x801f, 238}, - {0x8029, 238}, - {0xc038, 238}, - {0x8003, 240}, - {0x8006, 240}, - {0x800a, 240}, - {0x800f, 240}, - {0x8018, 240}, - {0x801f, 240}, - {0x8029, 240}, - {0xc038, 240}, - }, - /* 203 */ - { - {0x8002, 242}, - {0x8009, 242}, - {0x8017, 242}, - {0xc028, 242}, - {0x8002, 243}, - {0x8009, 243}, - {0x8017, 243}, - {0xc028, 243}, - {0x8002, 255}, - {0x8009, 255}, - {0x8017, 255}, - {0xc028, 255}, - {0x8001, 203}, - {0xc016, 203}, - {0x8001, 204}, - {0xc016, 204}, - }, - /* 204 */ - { - {0x8003, 242}, - {0x8006, 242}, - {0x800a, 242}, - {0x800f, 242}, - {0x8018, 242}, - {0x801f, 242}, - {0x8029, 242}, - {0xc038, 242}, - {0x8003, 243}, - {0x8006, 243}, - {0x800a, 243}, - {0x800f, 243}, - {0x8018, 243}, - {0x801f, 243}, - {0x8029, 243}, - {0xc038, 243}, - }, - /* 205 */ - { - {0x8003, 255}, - {0x8006, 255}, - {0x800a, 255}, - {0x800f, 255}, - {0x8018, 255}, - {0x801f, 255}, - {0x8029, 255}, - {0xc038, 255}, - {0x8002, 203}, - {0x8009, 203}, - {0x8017, 203}, - {0xc028, 203}, - {0x8002, 204}, - {0x8009, 204}, - {0x8017, 204}, - {0xc028, 204}, - }, - /* 206 */ - { - {0x8003, 203}, - {0x8006, 203}, - {0x800a, 203}, - {0x800f, 203}, - {0x8018, 203}, - {0x801f, 203}, - {0x8029, 203}, - {0xc038, 203}, - {0x8003, 204}, - {0x8006, 204}, - {0x800a, 204}, - {0x800f, 204}, - {0x8018, 204}, - {0x801f, 204}, - {0x8029, 204}, - {0xc038, 204}, - }, - /* 207 */ - { - {0xd3, 0}, - {0xd4, 0}, - {0xd6, 0}, - {0xd7, 0}, - {0xda, 0}, - {0xdb, 0}, - {0xdd, 0}, - {0xde, 0}, - {0xe2, 0}, - {0xe4, 0}, - {0xe8, 0}, - {0xeb, 0}, - {0xf0, 0}, - {0xf3, 0}, - {0xf7, 0}, - {0xfa, 0}, - }, - /* 208 */ - { - {0xc000, 211}, - {0xc000, 212}, - {0xc000, 214}, - {0xc000, 221}, - {0xc000, 222}, - {0xc000, 223}, - {0xc000, 241}, - {0xc000, 244}, - {0xc000, 245}, - {0xc000, 246}, - {0xc000, 247}, - {0xc000, 248}, - {0xc000, 250}, - {0xc000, 251}, - {0xc000, 252}, - {0xc000, 253}, - }, - /* 209 */ - { - {0x8001, 211}, - {0xc016, 211}, - {0x8001, 212}, - {0xc016, 212}, - {0x8001, 214}, - {0xc016, 214}, - {0x8001, 221}, - {0xc016, 221}, - {0x8001, 222}, - {0xc016, 222}, - {0x8001, 223}, - {0xc016, 223}, - {0x8001, 241}, - {0xc016, 241}, - {0x8001, 244}, - {0xc016, 244}, - }, - /* 210 */ - { - {0x8002, 211}, - {0x8009, 211}, - {0x8017, 211}, - {0xc028, 211}, - {0x8002, 212}, - {0x8009, 212}, - {0x8017, 212}, - {0xc028, 212}, - {0x8002, 214}, - {0x8009, 214}, - {0x8017, 214}, - {0xc028, 214}, - {0x8002, 221}, - {0x8009, 221}, - {0x8017, 221}, - {0xc028, 221}, - }, - /* 211 */ - { - {0x8003, 211}, - {0x8006, 211}, - {0x800a, 211}, - {0x800f, 211}, - {0x8018, 211}, - {0x801f, 211}, - {0x8029, 211}, - {0xc038, 211}, - {0x8003, 212}, - {0x8006, 212}, - {0x800a, 212}, - {0x800f, 212}, - {0x8018, 212}, - {0x801f, 212}, - {0x8029, 212}, - {0xc038, 212}, - }, - /* 212 */ - { - {0x8003, 214}, - {0x8006, 214}, - {0x800a, 214}, - {0x800f, 214}, - {0x8018, 214}, - {0x801f, 214}, - {0x8029, 214}, - {0xc038, 214}, - {0x8003, 221}, - {0x8006, 221}, - {0x800a, 221}, - {0x800f, 221}, - {0x8018, 221}, - {0x801f, 221}, - {0x8029, 221}, - {0xc038, 221}, - }, - /* 213 */ - { - {0x8002, 222}, - {0x8009, 222}, - {0x8017, 222}, - {0xc028, 222}, - {0x8002, 223}, - {0x8009, 223}, - {0x8017, 223}, - {0xc028, 223}, - {0x8002, 241}, - {0x8009, 241}, - {0x8017, 241}, - {0xc028, 241}, - {0x8002, 244}, - {0x8009, 244}, - {0x8017, 244}, - {0xc028, 244}, - }, - /* 214 */ - { - {0x8003, 222}, - {0x8006, 222}, - {0x800a, 222}, - {0x800f, 222}, - {0x8018, 222}, - {0x801f, 222}, - {0x8029, 222}, - {0xc038, 222}, - {0x8003, 223}, - {0x8006, 223}, - {0x800a, 223}, - {0x800f, 223}, - {0x8018, 223}, - {0x801f, 223}, - {0x8029, 223}, - {0xc038, 223}, - }, - /* 215 */ - { - {0x8003, 241}, - {0x8006, 241}, - {0x800a, 241}, - {0x800f, 241}, - {0x8018, 241}, - {0x801f, 241}, - {0x8029, 241}, - {0xc038, 241}, - {0x8003, 244}, - {0x8006, 244}, - {0x800a, 244}, - {0x800f, 244}, - {0x8018, 244}, - {0x801f, 244}, - {0x8029, 244}, - {0xc038, 244}, - }, - /* 216 */ - { - {0x8001, 245}, - {0xc016, 245}, - {0x8001, 246}, - {0xc016, 246}, - {0x8001, 247}, - {0xc016, 247}, - {0x8001, 248}, - {0xc016, 248}, - {0x8001, 250}, - {0xc016, 250}, - {0x8001, 251}, - {0xc016, 251}, - {0x8001, 252}, - {0xc016, 252}, - {0x8001, 253}, - {0xc016, 253}, - }, - /* 217 */ - { - {0x8002, 245}, - {0x8009, 245}, - {0x8017, 245}, - {0xc028, 245}, - {0x8002, 246}, - {0x8009, 246}, - {0x8017, 246}, - {0xc028, 246}, - {0x8002, 247}, - {0x8009, 247}, - {0x8017, 247}, - {0xc028, 247}, - {0x8002, 248}, - {0x8009, 248}, - {0x8017, 248}, - {0xc028, 248}, - }, - /* 218 */ - { - {0x8003, 245}, - {0x8006, 245}, - {0x800a, 245}, - {0x800f, 245}, - {0x8018, 245}, - {0x801f, 245}, - {0x8029, 245}, - {0xc038, 245}, - {0x8003, 246}, - {0x8006, 246}, - {0x800a, 246}, - {0x800f, 246}, - {0x8018, 246}, - {0x801f, 246}, - {0x8029, 246}, - {0xc038, 246}, - }, - /* 219 */ - { - {0x8003, 247}, - {0x8006, 247}, - {0x800a, 247}, - {0x800f, 247}, - {0x8018, 247}, - {0x801f, 247}, - {0x8029, 247}, - {0xc038, 247}, - {0x8003, 248}, - {0x8006, 248}, - {0x800a, 248}, - {0x800f, 248}, - {0x8018, 248}, - {0x801f, 248}, - {0x8029, 248}, - {0xc038, 248}, - }, - /* 220 */ - { - {0x8002, 250}, - {0x8009, 250}, - {0x8017, 250}, - {0xc028, 250}, - {0x8002, 251}, - {0x8009, 251}, - {0x8017, 251}, - {0xc028, 251}, - {0x8002, 252}, - {0x8009, 252}, - {0x8017, 252}, - {0xc028, 252}, - {0x8002, 253}, - {0x8009, 253}, - {0x8017, 253}, - {0xc028, 253}, - }, - /* 221 */ - { - {0x8003, 250}, - {0x8006, 250}, - {0x800a, 250}, - {0x800f, 250}, - {0x8018, 250}, - {0x801f, 250}, - {0x8029, 250}, - {0xc038, 250}, - {0x8003, 251}, - {0x8006, 251}, - {0x800a, 251}, - {0x800f, 251}, - {0x8018, 251}, - {0x801f, 251}, - {0x8029, 251}, - {0xc038, 251}, - }, - /* 222 */ - { - {0x8003, 252}, - {0x8006, 252}, - {0x800a, 252}, - {0x800f, 252}, - {0x8018, 252}, - {0x801f, 252}, - {0x8029, 252}, - {0xc038, 252}, - {0x8003, 253}, - {0x8006, 253}, - {0x800a, 253}, - {0x800f, 253}, - {0x8018, 253}, - {0x801f, 253}, - {0x8029, 253}, - {0xc038, 253}, - }, - /* 223 */ - { - {0xc000, 254}, - {0xe3, 0}, - {0xe5, 0}, - {0xe6, 0}, - {0xe9, 0}, - {0xea, 0}, - {0xec, 0}, - {0xed, 0}, - {0xf1, 0}, - {0xf2, 0}, - {0xf4, 0}, - {0xf5, 0}, - {0xf8, 0}, - {0xf9, 0}, - {0xfb, 0}, - {0xfc, 0}, - }, - /* 224 */ - { - {0x8001, 254}, - {0xc016, 254}, - {0xc000, 2}, - {0xc000, 3}, - {0xc000, 4}, - {0xc000, 5}, - {0xc000, 6}, - {0xc000, 7}, - {0xc000, 8}, - {0xc000, 11}, - {0xc000, 12}, - {0xc000, 14}, - {0xc000, 15}, - {0xc000, 16}, - {0xc000, 17}, - {0xc000, 18}, - }, - /* 225 */ - { - {0x8002, 254}, - {0x8009, 254}, - {0x8017, 254}, - {0xc028, 254}, - {0x8001, 2}, - {0xc016, 2}, - {0x8001, 3}, - {0xc016, 3}, - {0x8001, 4}, - {0xc016, 4}, - {0x8001, 5}, - {0xc016, 5}, - {0x8001, 6}, - {0xc016, 6}, - {0x8001, 7}, - {0xc016, 7}, - }, - /* 226 */ - { - {0x8003, 254}, - {0x8006, 254}, - {0x800a, 254}, - {0x800f, 254}, - {0x8018, 254}, - {0x801f, 254}, - {0x8029, 254}, - {0xc038, 254}, - {0x8002, 2}, - {0x8009, 2}, - {0x8017, 2}, - {0xc028, 2}, - {0x8002, 3}, - {0x8009, 3}, - {0x8017, 3}, - {0xc028, 3}, - }, - /* 227 */ - { - {0x8003, 2}, - {0x8006, 2}, - {0x800a, 2}, - {0x800f, 2}, - {0x8018, 2}, - {0x801f, 2}, - {0x8029, 2}, - {0xc038, 2}, - {0x8003, 3}, - {0x8006, 3}, - {0x800a, 3}, - {0x800f, 3}, - {0x8018, 3}, - {0x801f, 3}, - {0x8029, 3}, - {0xc038, 3}, - }, - /* 228 */ - { - {0x8002, 4}, - {0x8009, 4}, - {0x8017, 4}, - {0xc028, 4}, - {0x8002, 5}, - {0x8009, 5}, - {0x8017, 5}, - {0xc028, 5}, - {0x8002, 6}, - {0x8009, 6}, - {0x8017, 6}, - {0xc028, 6}, - {0x8002, 7}, - {0x8009, 7}, - {0x8017, 7}, - {0xc028, 7}, - }, - /* 229 */ - { - {0x8003, 4}, - {0x8006, 4}, - {0x800a, 4}, - {0x800f, 4}, - {0x8018, 4}, - {0x801f, 4}, - {0x8029, 4}, - {0xc038, 4}, - {0x8003, 5}, - {0x8006, 5}, - {0x800a, 5}, - {0x800f, 5}, - {0x8018, 5}, - {0x801f, 5}, - {0x8029, 5}, - {0xc038, 5}, - }, - /* 230 */ - { - {0x8003, 6}, - {0x8006, 6}, - {0x800a, 6}, - {0x800f, 6}, - {0x8018, 6}, - {0x801f, 6}, - {0x8029, 6}, - {0xc038, 6}, - {0x8003, 7}, - {0x8006, 7}, - {0x800a, 7}, - {0x800f, 7}, - {0x8018, 7}, - {0x801f, 7}, - {0x8029, 7}, - {0xc038, 7}, - }, - /* 231 */ - { - {0x8001, 8}, - {0xc016, 8}, - {0x8001, 11}, - {0xc016, 11}, - {0x8001, 12}, - {0xc016, 12}, - {0x8001, 14}, - {0xc016, 14}, - {0x8001, 15}, - {0xc016, 15}, - {0x8001, 16}, - {0xc016, 16}, - {0x8001, 17}, - {0xc016, 17}, - {0x8001, 18}, - {0xc016, 18}, - }, - /* 232 */ - { - {0x8002, 8}, - {0x8009, 8}, - {0x8017, 8}, - {0xc028, 8}, - {0x8002, 11}, - {0x8009, 11}, - {0x8017, 11}, - {0xc028, 11}, - {0x8002, 12}, - {0x8009, 12}, - {0x8017, 12}, - {0xc028, 12}, - {0x8002, 14}, - {0x8009, 14}, - {0x8017, 14}, - {0xc028, 14}, - }, - /* 233 */ - { - {0x8003, 8}, - {0x8006, 8}, - {0x800a, 8}, - {0x800f, 8}, - {0x8018, 8}, - {0x801f, 8}, - {0x8029, 8}, - {0xc038, 8}, - {0x8003, 11}, - {0x8006, 11}, - {0x800a, 11}, - {0x800f, 11}, - {0x8018, 11}, - {0x801f, 11}, - {0x8029, 11}, - {0xc038, 11}, - }, - /* 234 */ - { - {0x8003, 12}, - {0x8006, 12}, - {0x800a, 12}, - {0x800f, 12}, - {0x8018, 12}, - {0x801f, 12}, - {0x8029, 12}, - {0xc038, 12}, - {0x8003, 14}, - {0x8006, 14}, - {0x800a, 14}, - {0x800f, 14}, - {0x8018, 14}, - {0x801f, 14}, - {0x8029, 14}, - {0xc038, 14}, - }, - /* 235 */ - { - {0x8002, 15}, - {0x8009, 15}, - {0x8017, 15}, - {0xc028, 15}, - {0x8002, 16}, - {0x8009, 16}, - {0x8017, 16}, - {0xc028, 16}, - {0x8002, 17}, - {0x8009, 17}, - {0x8017, 17}, - {0xc028, 17}, - {0x8002, 18}, - {0x8009, 18}, - {0x8017, 18}, - {0xc028, 18}, - }, - /* 236 */ - { - {0x8003, 15}, - {0x8006, 15}, - {0x800a, 15}, - {0x800f, 15}, - {0x8018, 15}, - {0x801f, 15}, - {0x8029, 15}, - {0xc038, 15}, - {0x8003, 16}, - {0x8006, 16}, - {0x800a, 16}, - {0x800f, 16}, - {0x8018, 16}, - {0x801f, 16}, - {0x8029, 16}, - {0xc038, 16}, - }, - /* 237 */ - { - {0x8003, 17}, - {0x8006, 17}, - {0x800a, 17}, - {0x800f, 17}, - {0x8018, 17}, - {0x801f, 17}, - {0x8029, 17}, - {0xc038, 17}, - {0x8003, 18}, - {0x8006, 18}, - {0x800a, 18}, - {0x800f, 18}, - {0x8018, 18}, - {0x801f, 18}, - {0x8029, 18}, - {0xc038, 18}, - }, - /* 238 */ - { - {0xc000, 19}, - {0xc000, 20}, - {0xc000, 21}, - {0xc000, 23}, - {0xc000, 24}, - {0xc000, 25}, - {0xc000, 26}, - {0xc000, 27}, - {0xc000, 28}, - {0xc000, 29}, - {0xc000, 30}, - {0xc000, 31}, - {0xc000, 127}, - {0xc000, 220}, - {0xc000, 249}, - {0xfd, 0}, - }, - /* 239 */ - { - {0x8001, 19}, - {0xc016, 19}, - {0x8001, 20}, - {0xc016, 20}, - {0x8001, 21}, - {0xc016, 21}, - {0x8001, 23}, - {0xc016, 23}, - {0x8001, 24}, - {0xc016, 24}, - {0x8001, 25}, - {0xc016, 25}, - {0x8001, 26}, - {0xc016, 26}, - {0x8001, 27}, - {0xc016, 27}, - }, - /* 240 */ - { - {0x8002, 19}, - {0x8009, 19}, - {0x8017, 19}, - {0xc028, 19}, - {0x8002, 20}, - {0x8009, 20}, - {0x8017, 20}, - {0xc028, 20}, - {0x8002, 21}, - {0x8009, 21}, - {0x8017, 21}, - {0xc028, 21}, - {0x8002, 23}, - {0x8009, 23}, - {0x8017, 23}, - {0xc028, 23}, - }, - /* 241 */ - { - {0x8003, 19}, - {0x8006, 19}, - {0x800a, 19}, - {0x800f, 19}, - {0x8018, 19}, - {0x801f, 19}, - {0x8029, 19}, - {0xc038, 19}, - {0x8003, 20}, - {0x8006, 20}, - {0x800a, 20}, - {0x800f, 20}, - {0x8018, 20}, - {0x801f, 20}, - {0x8029, 20}, - {0xc038, 20}, - }, - /* 242 */ - { - {0x8003, 21}, - {0x8006, 21}, - {0x800a, 21}, - {0x800f, 21}, - {0x8018, 21}, - {0x801f, 21}, - {0x8029, 21}, - {0xc038, 21}, - {0x8003, 23}, - {0x8006, 23}, - {0x800a, 23}, - {0x800f, 23}, - {0x8018, 23}, - {0x801f, 23}, - {0x8029, 23}, - {0xc038, 23}, - }, - /* 243 */ - { - {0x8002, 24}, - {0x8009, 24}, - {0x8017, 24}, - {0xc028, 24}, - {0x8002, 25}, - {0x8009, 25}, - {0x8017, 25}, - {0xc028, 25}, - {0x8002, 26}, - {0x8009, 26}, - {0x8017, 26}, - {0xc028, 26}, - {0x8002, 27}, - {0x8009, 27}, - {0x8017, 27}, - {0xc028, 27}, - }, - /* 244 */ - { - {0x8003, 24}, - {0x8006, 24}, - {0x800a, 24}, - {0x800f, 24}, - {0x8018, 24}, - {0x801f, 24}, - {0x8029, 24}, - {0xc038, 24}, - {0x8003, 25}, - {0x8006, 25}, - {0x800a, 25}, - {0x800f, 25}, - {0x8018, 25}, - {0x801f, 25}, - {0x8029, 25}, - {0xc038, 25}, - }, - /* 245 */ - { - {0x8003, 26}, - {0x8006, 26}, - {0x800a, 26}, - {0x800f, 26}, - {0x8018, 26}, - {0x801f, 26}, - {0x8029, 26}, - {0xc038, 26}, - {0x8003, 27}, - {0x8006, 27}, - {0x800a, 27}, - {0x800f, 27}, - {0x8018, 27}, - {0x801f, 27}, - {0x8029, 27}, - {0xc038, 27}, - }, - /* 246 */ - { - {0x8001, 28}, - {0xc016, 28}, - {0x8001, 29}, - {0xc016, 29}, - {0x8001, 30}, - {0xc016, 30}, - {0x8001, 31}, - {0xc016, 31}, - {0x8001, 127}, - {0xc016, 127}, - {0x8001, 220}, - {0xc016, 220}, - {0x8001, 249}, - {0xc016, 249}, - {0xfe, 0}, - {0xff, 0}, - }, - /* 247 */ - { - {0x8002, 28}, - {0x8009, 28}, - {0x8017, 28}, - {0xc028, 28}, - {0x8002, 29}, - {0x8009, 29}, - {0x8017, 29}, - {0xc028, 29}, - {0x8002, 30}, - {0x8009, 30}, - {0x8017, 30}, - {0xc028, 30}, - {0x8002, 31}, - {0x8009, 31}, - {0x8017, 31}, - {0xc028, 31}, - }, - /* 248 */ - { - {0x8003, 28}, - {0x8006, 28}, - {0x800a, 28}, - {0x800f, 28}, - {0x8018, 28}, - {0x801f, 28}, - {0x8029, 28}, - {0xc038, 28}, - {0x8003, 29}, - {0x8006, 29}, - {0x800a, 29}, - {0x800f, 29}, - {0x8018, 29}, - {0x801f, 29}, - {0x8029, 29}, - {0xc038, 29}, - }, - /* 249 */ - { - {0x8003, 30}, - {0x8006, 30}, - {0x800a, 30}, - {0x800f, 30}, - {0x8018, 30}, - {0x801f, 30}, - {0x8029, 30}, - {0xc038, 30}, - {0x8003, 31}, - {0x8006, 31}, - {0x800a, 31}, - {0x800f, 31}, - {0x8018, 31}, - {0x801f, 31}, - {0x8029, 31}, - {0xc038, 31}, - }, - /* 250 */ - { - {0x8002, 127}, - {0x8009, 127}, - {0x8017, 127}, - {0xc028, 127}, - {0x8002, 220}, - {0x8009, 220}, - {0x8017, 220}, - {0xc028, 220}, - {0x8002, 249}, - {0x8009, 249}, - {0x8017, 249}, - {0xc028, 249}, - {0xc000, 10}, - {0xc000, 13}, - {0xc000, 22}, - {0x100, 0}, - }, - /* 251 */ - { - {0x8003, 127}, - {0x8006, 127}, - {0x800a, 127}, - {0x800f, 127}, - {0x8018, 127}, - {0x801f, 127}, - {0x8029, 127}, - {0xc038, 127}, - {0x8003, 220}, - {0x8006, 220}, - {0x800a, 220}, - {0x800f, 220}, - {0x8018, 220}, - {0x801f, 220}, - {0x8029, 220}, - {0xc038, 220}, - }, - /* 252 */ - { - {0x8003, 249}, - {0x8006, 249}, - {0x800a, 249}, - {0x800f, 249}, - {0x8018, 249}, - {0x801f, 249}, - {0x8029, 249}, - {0xc038, 249}, - {0x8001, 10}, - {0xc016, 10}, - {0x8001, 13}, - {0xc016, 13}, - {0x8001, 22}, - {0xc016, 22}, - {0x100, 0}, - {0x100, 0}, - }, - /* 253 */ - { - {0x8002, 10}, - {0x8009, 10}, - {0x8017, 10}, - {0xc028, 10}, - {0x8002, 13}, - {0x8009, 13}, - {0x8017, 13}, - {0xc028, 13}, - {0x8002, 22}, - {0x8009, 22}, - {0x8017, 22}, - {0xc028, 22}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - }, - /* 254 */ - { - {0x8003, 10}, - {0x8006, 10}, - {0x800a, 10}, - {0x800f, 10}, - {0x8018, 10}, - {0x801f, 10}, - {0x8029, 10}, - {0xc038, 10}, - {0x8003, 13}, - {0x8006, 13}, - {0x800a, 13}, - {0x800f, 13}, - {0x8018, 13}, - {0x801f, 13}, - {0x8029, 13}, - {0xc038, 13}, - }, - /* 255 */ - { - {0x8003, 22}, - {0x8006, 22}, - {0x800a, 22}, - {0x800f, 22}, - {0x8018, 22}, - {0x801f, 22}, - {0x8029, 22}, - {0xc038, 22}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - }, - /* 256 */ - { - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - {0x100, 0}, - }, + /* 0 */ + { + {0x04, 0}, + {0x05, 0}, + {0x07, 0}, + {0x08, 0}, + {0x0b, 0}, + {0x0c, 0}, + {0x10, 0}, + {0x13, 0}, + {0x19, 0}, + {0x1c, 0}, + {0x20, 0}, + {0x23, 0}, + {0x2a, 0}, + {0x31, 0}, + {0x39, 0}, + {0x4040, 0}, + }, + /* 1 */ + { + {0xc000, 48}, + {0xc000, 49}, + {0xc000, 50}, + {0xc000, 97}, + {0xc000, 99}, + {0xc000, 101}, + {0xc000, 105}, + {0xc000, 111}, + {0xc000, 115}, + {0xc000, 116}, + {0x0d, 0}, + {0x0e, 0}, + {0x11, 0}, + {0x12, 0}, + {0x14, 0}, + {0x15, 0}, + }, + /* 2 */ + { + {0x8001, 48}, + {0xc016, 48}, + {0x8001, 49}, + {0xc016, 49}, + {0x8001, 50}, + {0xc016, 50}, + {0x8001, 97}, + {0xc016, 97}, + {0x8001, 99}, + {0xc016, 99}, + {0x8001, 101}, + {0xc016, 101}, + {0x8001, 105}, + {0xc016, 105}, + {0x8001, 111}, + {0xc016, 111}, + }, + /* 3 */ + { + {0x8002, 48}, + {0x8009, 48}, + {0x8017, 48}, + {0xc028, 48}, + {0x8002, 49}, + {0x8009, 49}, + {0x8017, 49}, + {0xc028, 49}, + {0x8002, 50}, + {0x8009, 50}, + {0x8017, 50}, + {0xc028, 50}, + {0x8002, 97}, + {0x8009, 97}, + {0x8017, 97}, + {0xc028, 97}, + }, + /* 4 */ + { + {0x8003, 48}, + {0x8006, 48}, + {0x800a, 48}, + {0x800f, 48}, + {0x8018, 48}, + {0x801f, 48}, + {0x8029, 48}, + {0xc038, 48}, + {0x8003, 49}, + {0x8006, 49}, + {0x800a, 49}, + {0x800f, 49}, + {0x8018, 49}, + {0x801f, 49}, + {0x8029, 49}, + {0xc038, 49}, + }, + /* 5 */ + { + {0x8003, 50}, + {0x8006, 50}, + {0x800a, 50}, + {0x800f, 50}, + {0x8018, 50}, + {0x801f, 50}, + {0x8029, 50}, + {0xc038, 50}, + {0x8003, 97}, + {0x8006, 97}, + {0x800a, 97}, + {0x800f, 97}, + {0x8018, 97}, + {0x801f, 97}, + {0x8029, 97}, + {0xc038, 97}, + }, + /* 6 */ + { + {0x8002, 99}, + {0x8009, 99}, + {0x8017, 99}, + {0xc028, 99}, + {0x8002, 101}, + {0x8009, 101}, + {0x8017, 101}, + {0xc028, 101}, + {0x8002, 105}, + {0x8009, 105}, + {0x8017, 105}, + {0xc028, 105}, + {0x8002, 111}, + {0x8009, 111}, + {0x8017, 111}, + {0xc028, 111}, + }, + /* 7 */ + { + {0x8003, 99}, + {0x8006, 99}, + {0x800a, 99}, + {0x800f, 99}, + {0x8018, 99}, + {0x801f, 99}, + {0x8029, 99}, + {0xc038, 99}, + {0x8003, 101}, + {0x8006, 101}, + {0x800a, 101}, + {0x800f, 101}, + {0x8018, 101}, + {0x801f, 101}, + {0x8029, 101}, + {0xc038, 101}, + }, + /* 8 */ + { + {0x8003, 105}, + {0x8006, 105}, + {0x800a, 105}, + {0x800f, 105}, + {0x8018, 105}, + {0x801f, 105}, + {0x8029, 105}, + {0xc038, 105}, + {0x8003, 111}, + {0x8006, 111}, + {0x800a, 111}, + {0x800f, 111}, + {0x8018, 111}, + {0x801f, 111}, + {0x8029, 111}, + {0xc038, 111}, + }, + /* 9 */ + { + {0x8001, 115}, + {0xc016, 115}, + {0x8001, 116}, + {0xc016, 116}, + {0xc000, 32}, + {0xc000, 37}, + {0xc000, 45}, + {0xc000, 46}, + {0xc000, 47}, + {0xc000, 51}, + {0xc000, 52}, + {0xc000, 53}, + {0xc000, 54}, + {0xc000, 55}, + {0xc000, 56}, + {0xc000, 57}, + }, + /* 10 */ + { + {0x8002, 115}, + {0x8009, 115}, + {0x8017, 115}, + {0xc028, 115}, + {0x8002, 116}, + {0x8009, 116}, + {0x8017, 116}, + {0xc028, 116}, + {0x8001, 32}, + {0xc016, 32}, + {0x8001, 37}, + {0xc016, 37}, + {0x8001, 45}, + {0xc016, 45}, + {0x8001, 46}, + {0xc016, 46}, + }, + /* 11 */ + { + {0x8003, 115}, + {0x8006, 115}, + {0x800a, 115}, + {0x800f, 115}, + {0x8018, 115}, + {0x801f, 115}, + {0x8029, 115}, + {0xc038, 115}, + {0x8003, 116}, + {0x8006, 116}, + {0x800a, 116}, + {0x800f, 116}, + {0x8018, 116}, + {0x801f, 116}, + {0x8029, 116}, + {0xc038, 116}, + }, + /* 12 */ + { + {0x8002, 32}, + {0x8009, 32}, + {0x8017, 32}, + {0xc028, 32}, + {0x8002, 37}, + {0x8009, 37}, + {0x8017, 37}, + {0xc028, 37}, + {0x8002, 45}, + {0x8009, 45}, + {0x8017, 45}, + {0xc028, 45}, + {0x8002, 46}, + {0x8009, 46}, + {0x8017, 46}, + {0xc028, 46}, + }, + /* 13 */ + { + {0x8003, 32}, + {0x8006, 32}, + {0x800a, 32}, + {0x800f, 32}, + {0x8018, 32}, + {0x801f, 32}, + {0x8029, 32}, + {0xc038, 32}, + {0x8003, 37}, + {0x8006, 37}, + {0x800a, 37}, + {0x800f, 37}, + {0x8018, 37}, + {0x801f, 37}, + {0x8029, 37}, + {0xc038, 37}, + }, + /* 14 */ + { + {0x8003, 45}, + {0x8006, 45}, + {0x800a, 45}, + {0x800f, 45}, + {0x8018, 45}, + {0x801f, 45}, + {0x8029, 45}, + {0xc038, 45}, + {0x8003, 46}, + {0x8006, 46}, + {0x800a, 46}, + {0x800f, 46}, + {0x8018, 46}, + {0x801f, 46}, + {0x8029, 46}, + {0xc038, 46}, + }, + /* 15 */ + { + {0x8001, 47}, + {0xc016, 47}, + {0x8001, 51}, + {0xc016, 51}, + {0x8001, 52}, + {0xc016, 52}, + {0x8001, 53}, + {0xc016, 53}, + {0x8001, 54}, + {0xc016, 54}, + {0x8001, 55}, + {0xc016, 55}, + {0x8001, 56}, + {0xc016, 56}, + {0x8001, 57}, + {0xc016, 57}, + }, + /* 16 */ + { + {0x8002, 47}, + {0x8009, 47}, + {0x8017, 47}, + {0xc028, 47}, + {0x8002, 51}, + {0x8009, 51}, + {0x8017, 51}, + {0xc028, 51}, + {0x8002, 52}, + {0x8009, 52}, + {0x8017, 52}, + {0xc028, 52}, + {0x8002, 53}, + {0x8009, 53}, + {0x8017, 53}, + {0xc028, 53}, + }, + /* 17 */ + { + {0x8003, 47}, + {0x8006, 47}, + {0x800a, 47}, + {0x800f, 47}, + {0x8018, 47}, + {0x801f, 47}, + {0x8029, 47}, + {0xc038, 47}, + {0x8003, 51}, + {0x8006, 51}, + {0x800a, 51}, + {0x800f, 51}, + {0x8018, 51}, + {0x801f, 51}, + {0x8029, 51}, + {0xc038, 51}, + }, + /* 18 */ + { + {0x8003, 52}, + {0x8006, 52}, + {0x800a, 52}, + {0x800f, 52}, + {0x8018, 52}, + {0x801f, 52}, + {0x8029, 52}, + {0xc038, 52}, + {0x8003, 53}, + {0x8006, 53}, + {0x800a, 53}, + {0x800f, 53}, + {0x8018, 53}, + {0x801f, 53}, + {0x8029, 53}, + {0xc038, 53}, + }, + /* 19 */ + { + {0x8002, 54}, + {0x8009, 54}, + {0x8017, 54}, + {0xc028, 54}, + {0x8002, 55}, + {0x8009, 55}, + {0x8017, 55}, + {0xc028, 55}, + {0x8002, 56}, + {0x8009, 56}, + {0x8017, 56}, + {0xc028, 56}, + {0x8002, 57}, + {0x8009, 57}, + {0x8017, 57}, + {0xc028, 57}, + }, + /* 20 */ + { + {0x8003, 54}, + {0x8006, 54}, + {0x800a, 54}, + {0x800f, 54}, + {0x8018, 54}, + {0x801f, 54}, + {0x8029, 54}, + {0xc038, 54}, + {0x8003, 55}, + {0x8006, 55}, + {0x800a, 55}, + {0x800f, 55}, + {0x8018, 55}, + {0x801f, 55}, + {0x8029, 55}, + {0xc038, 55}, + }, + /* 21 */ + { + {0x8003, 56}, + {0x8006, 56}, + {0x800a, 56}, + {0x800f, 56}, + {0x8018, 56}, + {0x801f, 56}, + {0x8029, 56}, + {0xc038, 56}, + {0x8003, 57}, + {0x8006, 57}, + {0x800a, 57}, + {0x800f, 57}, + {0x8018, 57}, + {0x801f, 57}, + {0x8029, 57}, + {0xc038, 57}, + }, + /* 22 */ + { + {0x1a, 0}, + {0x1b, 0}, + {0x1d, 0}, + {0x1e, 0}, + {0x21, 0}, + {0x22, 0}, + {0x24, 0}, + {0x25, 0}, + {0x2b, 0}, + {0x2e, 0}, + {0x32, 0}, + {0x35, 0}, + {0x3a, 0}, + {0x3d, 0}, + {0x41, 0}, + {0x4044, 0}, + }, + /* 23 */ + { + {0xc000, 61}, + {0xc000, 65}, + {0xc000, 95}, + {0xc000, 98}, + {0xc000, 100}, + {0xc000, 102}, + {0xc000, 103}, + {0xc000, 104}, + {0xc000, 108}, + {0xc000, 109}, + {0xc000, 110}, + {0xc000, 112}, + {0xc000, 114}, + {0xc000, 117}, + {0x26, 0}, + {0x27, 0}, + }, + /* 24 */ + { + {0x8001, 61}, + {0xc016, 61}, + {0x8001, 65}, + {0xc016, 65}, + {0x8001, 95}, + {0xc016, 95}, + {0x8001, 98}, + {0xc016, 98}, + {0x8001, 100}, + {0xc016, 100}, + {0x8001, 102}, + {0xc016, 102}, + {0x8001, 103}, + {0xc016, 103}, + {0x8001, 104}, + {0xc016, 104}, + }, + /* 25 */ + { + {0x8002, 61}, + {0x8009, 61}, + {0x8017, 61}, + {0xc028, 61}, + {0x8002, 65}, + {0x8009, 65}, + {0x8017, 65}, + {0xc028, 65}, + {0x8002, 95}, + {0x8009, 95}, + {0x8017, 95}, + {0xc028, 95}, + {0x8002, 98}, + {0x8009, 98}, + {0x8017, 98}, + {0xc028, 98}, + }, + /* 26 */ + { + {0x8003, 61}, + {0x8006, 61}, + {0x800a, 61}, + {0x800f, 61}, + {0x8018, 61}, + {0x801f, 61}, + {0x8029, 61}, + {0xc038, 61}, + {0x8003, 65}, + {0x8006, 65}, + {0x800a, 65}, + {0x800f, 65}, + {0x8018, 65}, + {0x801f, 65}, + {0x8029, 65}, + {0xc038, 65}, + }, + /* 27 */ + { + {0x8003, 95}, + {0x8006, 95}, + {0x800a, 95}, + {0x800f, 95}, + {0x8018, 95}, + {0x801f, 95}, + {0x8029, 95}, + {0xc038, 95}, + {0x8003, 98}, + {0x8006, 98}, + {0x800a, 98}, + {0x800f, 98}, + {0x8018, 98}, + {0x801f, 98}, + {0x8029, 98}, + {0xc038, 98}, + }, + /* 28 */ + { + {0x8002, 100}, + {0x8009, 100}, + {0x8017, 100}, + {0xc028, 100}, + {0x8002, 102}, + {0x8009, 102}, + {0x8017, 102}, + {0xc028, 102}, + {0x8002, 103}, + {0x8009, 103}, + {0x8017, 103}, + {0xc028, 103}, + {0x8002, 104}, + {0x8009, 104}, + {0x8017, 104}, + {0xc028, 104}, + }, + /* 29 */ + { + {0x8003, 100}, + {0x8006, 100}, + {0x800a, 100}, + {0x800f, 100}, + {0x8018, 100}, + {0x801f, 100}, + {0x8029, 100}, + {0xc038, 100}, + {0x8003, 102}, + {0x8006, 102}, + {0x800a, 102}, + {0x800f, 102}, + {0x8018, 102}, + {0x801f, 102}, + {0x8029, 102}, + {0xc038, 102}, + }, + /* 30 */ + { + {0x8003, 103}, + {0x8006, 103}, + {0x800a, 103}, + {0x800f, 103}, + {0x8018, 103}, + {0x801f, 103}, + {0x8029, 103}, + {0xc038, 103}, + {0x8003, 104}, + {0x8006, 104}, + {0x800a, 104}, + {0x800f, 104}, + {0x8018, 104}, + {0x801f, 104}, + {0x8029, 104}, + {0xc038, 104}, + }, + /* 31 */ + { + {0x8001, 108}, + {0xc016, 108}, + {0x8001, 109}, + {0xc016, 109}, + {0x8001, 110}, + {0xc016, 110}, + {0x8001, 112}, + {0xc016, 112}, + {0x8001, 114}, + {0xc016, 114}, + {0x8001, 117}, + {0xc016, 117}, + {0xc000, 58}, + {0xc000, 66}, + {0xc000, 67}, + {0xc000, 68}, + }, + /* 32 */ + { + {0x8002, 108}, + {0x8009, 108}, + {0x8017, 108}, + {0xc028, 108}, + {0x8002, 109}, + {0x8009, 109}, + {0x8017, 109}, + {0xc028, 109}, + {0x8002, 110}, + {0x8009, 110}, + {0x8017, 110}, + {0xc028, 110}, + {0x8002, 112}, + {0x8009, 112}, + {0x8017, 112}, + {0xc028, 112}, + }, + /* 33 */ + { + {0x8003, 108}, + {0x8006, 108}, + {0x800a, 108}, + {0x800f, 108}, + {0x8018, 108}, + {0x801f, 108}, + {0x8029, 108}, + {0xc038, 108}, + {0x8003, 109}, + {0x8006, 109}, + {0x800a, 109}, + {0x800f, 109}, + {0x8018, 109}, + {0x801f, 109}, + {0x8029, 109}, + {0xc038, 109}, + }, + /* 34 */ + { + {0x8003, 110}, + {0x8006, 110}, + {0x800a, 110}, + {0x800f, 110}, + {0x8018, 110}, + {0x801f, 110}, + {0x8029, 110}, + {0xc038, 110}, + {0x8003, 112}, + {0x8006, 112}, + {0x800a, 112}, + {0x800f, 112}, + {0x8018, 112}, + {0x801f, 112}, + {0x8029, 112}, + {0xc038, 112}, + }, + /* 35 */ + { + {0x8002, 114}, + {0x8009, 114}, + {0x8017, 114}, + {0xc028, 114}, + {0x8002, 117}, + {0x8009, 117}, + {0x8017, 117}, + {0xc028, 117}, + {0x8001, 58}, + {0xc016, 58}, + {0x8001, 66}, + {0xc016, 66}, + {0x8001, 67}, + {0xc016, 67}, + {0x8001, 68}, + {0xc016, 68}, + }, + /* 36 */ + { + {0x8003, 114}, + {0x8006, 114}, + {0x800a, 114}, + {0x800f, 114}, + {0x8018, 114}, + {0x801f, 114}, + {0x8029, 114}, + {0xc038, 114}, + {0x8003, 117}, + {0x8006, 117}, + {0x800a, 117}, + {0x800f, 117}, + {0x8018, 117}, + {0x801f, 117}, + {0x8029, 117}, + {0xc038, 117}, + }, + /* 37 */ + { + {0x8002, 58}, + {0x8009, 58}, + {0x8017, 58}, + {0xc028, 58}, + {0x8002, 66}, + {0x8009, 66}, + {0x8017, 66}, + {0xc028, 66}, + {0x8002, 67}, + {0x8009, 67}, + {0x8017, 67}, + {0xc028, 67}, + {0x8002, 68}, + {0x8009, 68}, + {0x8017, 68}, + {0xc028, 68}, + }, + /* 38 */ + { + {0x8003, 58}, + {0x8006, 58}, + {0x800a, 58}, + {0x800f, 58}, + {0x8018, 58}, + {0x801f, 58}, + {0x8029, 58}, + {0xc038, 58}, + {0x8003, 66}, + {0x8006, 66}, + {0x800a, 66}, + {0x800f, 66}, + {0x8018, 66}, + {0x801f, 66}, + {0x8029, 66}, + {0xc038, 66}, + }, + /* 39 */ + { + {0x8003, 67}, + {0x8006, 67}, + {0x800a, 67}, + {0x800f, 67}, + {0x8018, 67}, + {0x801f, 67}, + {0x8029, 67}, + {0xc038, 67}, + {0x8003, 68}, + {0x8006, 68}, + {0x800a, 68}, + {0x800f, 68}, + {0x8018, 68}, + {0x801f, 68}, + {0x8029, 68}, + {0xc038, 68}, + }, + /* 40 */ + { + {0x2c, 0}, + {0x2d, 0}, + {0x2f, 0}, + {0x30, 0}, + {0x33, 0}, + {0x34, 0}, + {0x36, 0}, + {0x37, 0}, + {0x3b, 0}, + {0x3c, 0}, + {0x3e, 0}, + {0x3f, 0}, + {0x42, 0}, + {0x43, 0}, + {0x45, 0}, + {0x4048, 0}, + }, + /* 41 */ + { + {0xc000, 69}, + {0xc000, 70}, + {0xc000, 71}, + {0xc000, 72}, + {0xc000, 73}, + {0xc000, 74}, + {0xc000, 75}, + {0xc000, 76}, + {0xc000, 77}, + {0xc000, 78}, + {0xc000, 79}, + {0xc000, 80}, + {0xc000, 81}, + {0xc000, 82}, + {0xc000, 83}, + {0xc000, 84}, + }, + /* 42 */ + { + {0x8001, 69}, + {0xc016, 69}, + {0x8001, 70}, + {0xc016, 70}, + {0x8001, 71}, + {0xc016, 71}, + {0x8001, 72}, + {0xc016, 72}, + {0x8001, 73}, + {0xc016, 73}, + {0x8001, 74}, + {0xc016, 74}, + {0x8001, 75}, + {0xc016, 75}, + {0x8001, 76}, + {0xc016, 76}, + }, + /* 43 */ + { + {0x8002, 69}, + {0x8009, 69}, + {0x8017, 69}, + {0xc028, 69}, + {0x8002, 70}, + {0x8009, 70}, + {0x8017, 70}, + {0xc028, 70}, + {0x8002, 71}, + {0x8009, 71}, + {0x8017, 71}, + {0xc028, 71}, + {0x8002, 72}, + {0x8009, 72}, + {0x8017, 72}, + {0xc028, 72}, + }, + /* 44 */ + { + {0x8003, 69}, + {0x8006, 69}, + {0x800a, 69}, + {0x800f, 69}, + {0x8018, 69}, + {0x801f, 69}, + {0x8029, 69}, + {0xc038, 69}, + {0x8003, 70}, + {0x8006, 70}, + {0x800a, 70}, + {0x800f, 70}, + {0x8018, 70}, + {0x801f, 70}, + {0x8029, 70}, + {0xc038, 70}, + }, + /* 45 */ + { + {0x8003, 71}, + {0x8006, 71}, + {0x800a, 71}, + {0x800f, 71}, + {0x8018, 71}, + {0x801f, 71}, + {0x8029, 71}, + {0xc038, 71}, + {0x8003, 72}, + {0x8006, 72}, + {0x800a, 72}, + {0x800f, 72}, + {0x8018, 72}, + {0x801f, 72}, + {0x8029, 72}, + {0xc038, 72}, + }, + /* 46 */ + { + {0x8002, 73}, + {0x8009, 73}, + {0x8017, 73}, + {0xc028, 73}, + {0x8002, 74}, + {0x8009, 74}, + {0x8017, 74}, + {0xc028, 74}, + {0x8002, 75}, + {0x8009, 75}, + {0x8017, 75}, + {0xc028, 75}, + {0x8002, 76}, + {0x8009, 76}, + {0x8017, 76}, + {0xc028, 76}, + }, + /* 47 */ + { + {0x8003, 73}, + {0x8006, 73}, + {0x800a, 73}, + {0x800f, 73}, + {0x8018, 73}, + {0x801f, 73}, + {0x8029, 73}, + {0xc038, 73}, + {0x8003, 74}, + {0x8006, 74}, + {0x800a, 74}, + {0x800f, 74}, + {0x8018, 74}, + {0x801f, 74}, + {0x8029, 74}, + {0xc038, 74}, + }, + /* 48 */ + { + {0x8003, 75}, + {0x8006, 75}, + {0x800a, 75}, + {0x800f, 75}, + {0x8018, 75}, + {0x801f, 75}, + {0x8029, 75}, + {0xc038, 75}, + {0x8003, 76}, + {0x8006, 76}, + {0x800a, 76}, + {0x800f, 76}, + {0x8018, 76}, + {0x801f, 76}, + {0x8029, 76}, + {0xc038, 76}, + }, + /* 49 */ + { + {0x8001, 77}, + {0xc016, 77}, + {0x8001, 78}, + {0xc016, 78}, + {0x8001, 79}, + {0xc016, 79}, + {0x8001, 80}, + {0xc016, 80}, + {0x8001, 81}, + {0xc016, 81}, + {0x8001, 82}, + {0xc016, 82}, + {0x8001, 83}, + {0xc016, 83}, + {0x8001, 84}, + {0xc016, 84}, + }, + /* 50 */ + { + {0x8002, 77}, + {0x8009, 77}, + {0x8017, 77}, + {0xc028, 77}, + {0x8002, 78}, + {0x8009, 78}, + {0x8017, 78}, + {0xc028, 78}, + {0x8002, 79}, + {0x8009, 79}, + {0x8017, 79}, + {0xc028, 79}, + {0x8002, 80}, + {0x8009, 80}, + {0x8017, 80}, + {0xc028, 80}, + }, + /* 51 */ + { + {0x8003, 77}, + {0x8006, 77}, + {0x800a, 77}, + {0x800f, 77}, + {0x8018, 77}, + {0x801f, 77}, + {0x8029, 77}, + {0xc038, 77}, + {0x8003, 78}, + {0x8006, 78}, + {0x800a, 78}, + {0x800f, 78}, + {0x8018, 78}, + {0x801f, 78}, + {0x8029, 78}, + {0xc038, 78}, + }, + /* 52 */ + { + {0x8003, 79}, + {0x8006, 79}, + {0x800a, 79}, + {0x800f, 79}, + {0x8018, 79}, + {0x801f, 79}, + {0x8029, 79}, + {0xc038, 79}, + {0x8003, 80}, + {0x8006, 80}, + {0x800a, 80}, + {0x800f, 80}, + {0x8018, 80}, + {0x801f, 80}, + {0x8029, 80}, + {0xc038, 80}, + }, + /* 53 */ + { + {0x8002, 81}, + {0x8009, 81}, + {0x8017, 81}, + {0xc028, 81}, + {0x8002, 82}, + {0x8009, 82}, + {0x8017, 82}, + {0xc028, 82}, + {0x8002, 83}, + {0x8009, 83}, + {0x8017, 83}, + {0xc028, 83}, + {0x8002, 84}, + {0x8009, 84}, + {0x8017, 84}, + {0xc028, 84}, + }, + /* 54 */ + { + {0x8003, 81}, + {0x8006, 81}, + {0x800a, 81}, + {0x800f, 81}, + {0x8018, 81}, + {0x801f, 81}, + {0x8029, 81}, + {0xc038, 81}, + {0x8003, 82}, + {0x8006, 82}, + {0x800a, 82}, + {0x800f, 82}, + {0x8018, 82}, + {0x801f, 82}, + {0x8029, 82}, + {0xc038, 82}, + }, + /* 55 */ + { + {0x8003, 83}, + {0x8006, 83}, + {0x800a, 83}, + {0x800f, 83}, + {0x8018, 83}, + {0x801f, 83}, + {0x8029, 83}, + {0xc038, 83}, + {0x8003, 84}, + {0x8006, 84}, + {0x800a, 84}, + {0x800f, 84}, + {0x8018, 84}, + {0x801f, 84}, + {0x8029, 84}, + {0xc038, 84}, + }, + /* 56 */ + { + {0xc000, 85}, + {0xc000, 86}, + {0xc000, 87}, + {0xc000, 89}, + {0xc000, 106}, + {0xc000, 107}, + {0xc000, 113}, + {0xc000, 118}, + {0xc000, 119}, + {0xc000, 120}, + {0xc000, 121}, + {0xc000, 122}, + {0x46, 0}, + {0x47, 0}, + {0x49, 0}, + {0x404a, 0}, + }, + /* 57 */ + { + {0x8001, 85}, + {0xc016, 85}, + {0x8001, 86}, + {0xc016, 86}, + {0x8001, 87}, + {0xc016, 87}, + {0x8001, 89}, + {0xc016, 89}, + {0x8001, 106}, + {0xc016, 106}, + {0x8001, 107}, + {0xc016, 107}, + {0x8001, 113}, + {0xc016, 113}, + {0x8001, 118}, + {0xc016, 118}, + }, + /* 58 */ + { + {0x8002, 85}, + {0x8009, 85}, + {0x8017, 85}, + {0xc028, 85}, + {0x8002, 86}, + {0x8009, 86}, + {0x8017, 86}, + {0xc028, 86}, + {0x8002, 87}, + {0x8009, 87}, + {0x8017, 87}, + {0xc028, 87}, + {0x8002, 89}, + {0x8009, 89}, + {0x8017, 89}, + {0xc028, 89}, + }, + /* 59 */ + { + {0x8003, 85}, + {0x8006, 85}, + {0x800a, 85}, + {0x800f, 85}, + {0x8018, 85}, + {0x801f, 85}, + {0x8029, 85}, + {0xc038, 85}, + {0x8003, 86}, + {0x8006, 86}, + {0x800a, 86}, + {0x800f, 86}, + {0x8018, 86}, + {0x801f, 86}, + {0x8029, 86}, + {0xc038, 86}, + }, + /* 60 */ + { + {0x8003, 87}, + {0x8006, 87}, + {0x800a, 87}, + {0x800f, 87}, + {0x8018, 87}, + {0x801f, 87}, + {0x8029, 87}, + {0xc038, 87}, + {0x8003, 89}, + {0x8006, 89}, + {0x800a, 89}, + {0x800f, 89}, + {0x8018, 89}, + {0x801f, 89}, + {0x8029, 89}, + {0xc038, 89}, + }, + /* 61 */ + { + {0x8002, 106}, + {0x8009, 106}, + {0x8017, 106}, + {0xc028, 106}, + {0x8002, 107}, + {0x8009, 107}, + {0x8017, 107}, + {0xc028, 107}, + {0x8002, 113}, + {0x8009, 113}, + {0x8017, 113}, + {0xc028, 113}, + {0x8002, 118}, + {0x8009, 118}, + {0x8017, 118}, + {0xc028, 118}, + }, + /* 62 */ + { + {0x8003, 106}, + {0x8006, 106}, + {0x800a, 106}, + {0x800f, 106}, + {0x8018, 106}, + {0x801f, 106}, + {0x8029, 106}, + {0xc038, 106}, + {0x8003, 107}, + {0x8006, 107}, + {0x800a, 107}, + {0x800f, 107}, + {0x8018, 107}, + {0x801f, 107}, + {0x8029, 107}, + {0xc038, 107}, + }, + /* 63 */ + { + {0x8003, 113}, + {0x8006, 113}, + {0x800a, 113}, + {0x800f, 113}, + {0x8018, 113}, + {0x801f, 113}, + {0x8029, 113}, + {0xc038, 113}, + {0x8003, 118}, + {0x8006, 118}, + {0x800a, 118}, + {0x800f, 118}, + {0x8018, 118}, + {0x801f, 118}, + {0x8029, 118}, + {0xc038, 118}, + }, + /* 64 */ + { + {0x8001, 119}, + {0xc016, 119}, + {0x8001, 120}, + {0xc016, 120}, + {0x8001, 121}, + {0xc016, 121}, + {0x8001, 122}, + {0xc016, 122}, + {0xc000, 38}, + {0xc000, 42}, + {0xc000, 44}, + {0xc000, 59}, + {0xc000, 88}, + {0xc000, 90}, + {0x4b, 0}, + {0x4e, 0}, + }, + /* 65 */ + { + {0x8002, 119}, + {0x8009, 119}, + {0x8017, 119}, + {0xc028, 119}, + {0x8002, 120}, + {0x8009, 120}, + {0x8017, 120}, + {0xc028, 120}, + {0x8002, 121}, + {0x8009, 121}, + {0x8017, 121}, + {0xc028, 121}, + {0x8002, 122}, + {0x8009, 122}, + {0x8017, 122}, + {0xc028, 122}, + }, + /* 66 */ + { + {0x8003, 119}, + {0x8006, 119}, + {0x800a, 119}, + {0x800f, 119}, + {0x8018, 119}, + {0x801f, 119}, + {0x8029, 119}, + {0xc038, 119}, + {0x8003, 120}, + {0x8006, 120}, + {0x800a, 120}, + {0x800f, 120}, + {0x8018, 120}, + {0x801f, 120}, + {0x8029, 120}, + {0xc038, 120}, + }, + /* 67 */ + { + {0x8003, 121}, + {0x8006, 121}, + {0x800a, 121}, + {0x800f, 121}, + {0x8018, 121}, + {0x801f, 121}, + {0x8029, 121}, + {0xc038, 121}, + {0x8003, 122}, + {0x8006, 122}, + {0x800a, 122}, + {0x800f, 122}, + {0x8018, 122}, + {0x801f, 122}, + {0x8029, 122}, + {0xc038, 122}, + }, + /* 68 */ + { + {0x8001, 38}, + {0xc016, 38}, + {0x8001, 42}, + {0xc016, 42}, + {0x8001, 44}, + {0xc016, 44}, + {0x8001, 59}, + {0xc016, 59}, + {0x8001, 88}, + {0xc016, 88}, + {0x8001, 90}, + {0xc016, 90}, + {0x4c, 0}, + {0x4d, 0}, + {0x4f, 0}, + {0x51, 0}, + }, + /* 69 */ + { + {0x8002, 38}, + {0x8009, 38}, + {0x8017, 38}, + {0xc028, 38}, + {0x8002, 42}, + {0x8009, 42}, + {0x8017, 42}, + {0xc028, 42}, + {0x8002, 44}, + {0x8009, 44}, + {0x8017, 44}, + {0xc028, 44}, + {0x8002, 59}, + {0x8009, 59}, + {0x8017, 59}, + {0xc028, 59}, + }, + /* 70 */ + { + {0x8003, 38}, + {0x8006, 38}, + {0x800a, 38}, + {0x800f, 38}, + {0x8018, 38}, + {0x801f, 38}, + {0x8029, 38}, + {0xc038, 38}, + {0x8003, 42}, + {0x8006, 42}, + {0x800a, 42}, + {0x800f, 42}, + {0x8018, 42}, + {0x801f, 42}, + {0x8029, 42}, + {0xc038, 42}, + }, + /* 71 */ + { + {0x8003, 44}, + {0x8006, 44}, + {0x800a, 44}, + {0x800f, 44}, + {0x8018, 44}, + {0x801f, 44}, + {0x8029, 44}, + {0xc038, 44}, + {0x8003, 59}, + {0x8006, 59}, + {0x800a, 59}, + {0x800f, 59}, + {0x8018, 59}, + {0x801f, 59}, + {0x8029, 59}, + {0xc038, 59}, + }, + /* 72 */ + { + {0x8002, 88}, + {0x8009, 88}, + {0x8017, 88}, + {0xc028, 88}, + {0x8002, 90}, + {0x8009, 90}, + {0x8017, 90}, + {0xc028, 90}, + {0xc000, 33}, + {0xc000, 34}, + {0xc000, 40}, + {0xc000, 41}, + {0xc000, 63}, + {0x50, 0}, + {0x52, 0}, + {0x54, 0}, + }, + /* 73 */ + { + {0x8003, 88}, + {0x8006, 88}, + {0x800a, 88}, + {0x800f, 88}, + {0x8018, 88}, + {0x801f, 88}, + {0x8029, 88}, + {0xc038, 88}, + {0x8003, 90}, + {0x8006, 90}, + {0x800a, 90}, + {0x800f, 90}, + {0x8018, 90}, + {0x801f, 90}, + {0x8029, 90}, + {0xc038, 90}, + }, + /* 74 */ + { + {0x8001, 33}, + {0xc016, 33}, + {0x8001, 34}, + {0xc016, 34}, + {0x8001, 40}, + {0xc016, 40}, + {0x8001, 41}, + {0xc016, 41}, + {0x8001, 63}, + {0xc016, 63}, + {0xc000, 39}, + {0xc000, 43}, + {0xc000, 124}, + {0x53, 0}, + {0x55, 0}, + {0x58, 0}, + }, + /* 75 */ + { + {0x8002, 33}, + {0x8009, 33}, + {0x8017, 33}, + {0xc028, 33}, + {0x8002, 34}, + {0x8009, 34}, + {0x8017, 34}, + {0xc028, 34}, + {0x8002, 40}, + {0x8009, 40}, + {0x8017, 40}, + {0xc028, 40}, + {0x8002, 41}, + {0x8009, 41}, + {0x8017, 41}, + {0xc028, 41}, + }, + /* 76 */ + { + {0x8003, 33}, + {0x8006, 33}, + {0x800a, 33}, + {0x800f, 33}, + {0x8018, 33}, + {0x801f, 33}, + {0x8029, 33}, + {0xc038, 33}, + {0x8003, 34}, + {0x8006, 34}, + {0x800a, 34}, + {0x800f, 34}, + {0x8018, 34}, + {0x801f, 34}, + {0x8029, 34}, + {0xc038, 34}, + }, + /* 77 */ + { + {0x8003, 40}, + {0x8006, 40}, + {0x800a, 40}, + {0x800f, 40}, + {0x8018, 40}, + {0x801f, 40}, + {0x8029, 40}, + {0xc038, 40}, + {0x8003, 41}, + {0x8006, 41}, + {0x800a, 41}, + {0x800f, 41}, + {0x8018, 41}, + {0x801f, 41}, + {0x8029, 41}, + {0xc038, 41}, + }, + /* 78 */ + { + {0x8002, 63}, + {0x8009, 63}, + {0x8017, 63}, + {0xc028, 63}, + {0x8001, 39}, + {0xc016, 39}, + {0x8001, 43}, + {0xc016, 43}, + {0x8001, 124}, + {0xc016, 124}, + {0xc000, 35}, + {0xc000, 62}, + {0x56, 0}, + {0x57, 0}, + {0x59, 0}, + {0x5a, 0}, + }, + /* 79 */ + { + {0x8003, 63}, + {0x8006, 63}, + {0x800a, 63}, + {0x800f, 63}, + {0x8018, 63}, + {0x801f, 63}, + {0x8029, 63}, + {0xc038, 63}, + {0x8002, 39}, + {0x8009, 39}, + {0x8017, 39}, + {0xc028, 39}, + {0x8002, 43}, + {0x8009, 43}, + {0x8017, 43}, + {0xc028, 43}, + }, + /* 80 */ + { + {0x8003, 39}, + {0x8006, 39}, + {0x800a, 39}, + {0x800f, 39}, + {0x8018, 39}, + {0x801f, 39}, + {0x8029, 39}, + {0xc038, 39}, + {0x8003, 43}, + {0x8006, 43}, + {0x800a, 43}, + {0x800f, 43}, + {0x8018, 43}, + {0x801f, 43}, + {0x8029, 43}, + {0xc038, 43}, + }, + /* 81 */ + { + {0x8002, 124}, + {0x8009, 124}, + {0x8017, 124}, + {0xc028, 124}, + {0x8001, 35}, + {0xc016, 35}, + {0x8001, 62}, + {0xc016, 62}, + {0xc000, 0}, + {0xc000, 36}, + {0xc000, 64}, + {0xc000, 91}, + {0xc000, 93}, + {0xc000, 126}, + {0x5b, 0}, + {0x5c, 0}, + }, + /* 82 */ + { + {0x8003, 124}, + {0x8006, 124}, + {0x800a, 124}, + {0x800f, 124}, + {0x8018, 124}, + {0x801f, 124}, + {0x8029, 124}, + {0xc038, 124}, + {0x8002, 35}, + {0x8009, 35}, + {0x8017, 35}, + {0xc028, 35}, + {0x8002, 62}, + {0x8009, 62}, + {0x8017, 62}, + {0xc028, 62}, + }, + /* 83 */ + { + {0x8003, 35}, + {0x8006, 35}, + {0x800a, 35}, + {0x800f, 35}, + {0x8018, 35}, + {0x801f, 35}, + {0x8029, 35}, + {0xc038, 35}, + {0x8003, 62}, + {0x8006, 62}, + {0x800a, 62}, + {0x800f, 62}, + {0x8018, 62}, + {0x801f, 62}, + {0x8029, 62}, + {0xc038, 62}, + }, + /* 84 */ + { + {0x8001, 0}, + {0xc016, 0}, + {0x8001, 36}, + {0xc016, 36}, + {0x8001, 64}, + {0xc016, 64}, + {0x8001, 91}, + {0xc016, 91}, + {0x8001, 93}, + {0xc016, 93}, + {0x8001, 126}, + {0xc016, 126}, + {0xc000, 94}, + {0xc000, 125}, + {0x5d, 0}, + {0x5e, 0}, + }, + /* 85 */ + { + {0x8002, 0}, + {0x8009, 0}, + {0x8017, 0}, + {0xc028, 0}, + {0x8002, 36}, + {0x8009, 36}, + {0x8017, 36}, + {0xc028, 36}, + {0x8002, 64}, + {0x8009, 64}, + {0x8017, 64}, + {0xc028, 64}, + {0x8002, 91}, + {0x8009, 91}, + {0x8017, 91}, + {0xc028, 91}, + }, + /* 86 */ + { + {0x8003, 0}, + {0x8006, 0}, + {0x800a, 0}, + {0x800f, 0}, + {0x8018, 0}, + {0x801f, 0}, + {0x8029, 0}, + {0xc038, 0}, + {0x8003, 36}, + {0x8006, 36}, + {0x800a, 36}, + {0x800f, 36}, + {0x8018, 36}, + {0x801f, 36}, + {0x8029, 36}, + {0xc038, 36}, + }, + /* 87 */ + { + {0x8003, 64}, + {0x8006, 64}, + {0x800a, 64}, + {0x800f, 64}, + {0x8018, 64}, + {0x801f, 64}, + {0x8029, 64}, + {0xc038, 64}, + {0x8003, 91}, + {0x8006, 91}, + {0x800a, 91}, + {0x800f, 91}, + {0x8018, 91}, + {0x801f, 91}, + {0x8029, 91}, + {0xc038, 91}, + }, + /* 88 */ + { + {0x8002, 93}, + {0x8009, 93}, + {0x8017, 93}, + {0xc028, 93}, + {0x8002, 126}, + {0x8009, 126}, + {0x8017, 126}, + {0xc028, 126}, + {0x8001, 94}, + {0xc016, 94}, + {0x8001, 125}, + {0xc016, 125}, + {0xc000, 60}, + {0xc000, 96}, + {0xc000, 123}, + {0x5f, 0}, + }, + /* 89 */ + { + {0x8003, 93}, + {0x8006, 93}, + {0x800a, 93}, + {0x800f, 93}, + {0x8018, 93}, + {0x801f, 93}, + {0x8029, 93}, + {0xc038, 93}, + {0x8003, 126}, + {0x8006, 126}, + {0x800a, 126}, + {0x800f, 126}, + {0x8018, 126}, + {0x801f, 126}, + {0x8029, 126}, + {0xc038, 126}, + }, + /* 90 */ + { + {0x8002, 94}, + {0x8009, 94}, + {0x8017, 94}, + {0xc028, 94}, + {0x8002, 125}, + {0x8009, 125}, + {0x8017, 125}, + {0xc028, 125}, + {0x8001, 60}, + {0xc016, 60}, + {0x8001, 96}, + {0xc016, 96}, + {0x8001, 123}, + {0xc016, 123}, + {0x60, 0}, + {0x6e, 0}, + }, + /* 91 */ + { + {0x8003, 94}, + {0x8006, 94}, + {0x800a, 94}, + {0x800f, 94}, + {0x8018, 94}, + {0x801f, 94}, + {0x8029, 94}, + {0xc038, 94}, + {0x8003, 125}, + {0x8006, 125}, + {0x800a, 125}, + {0x800f, 125}, + {0x8018, 125}, + {0x801f, 125}, + {0x8029, 125}, + {0xc038, 125}, + }, + /* 92 */ + { + {0x8002, 60}, + {0x8009, 60}, + {0x8017, 60}, + {0xc028, 60}, + {0x8002, 96}, + {0x8009, 96}, + {0x8017, 96}, + {0xc028, 96}, + {0x8002, 123}, + {0x8009, 123}, + {0x8017, 123}, + {0xc028, 123}, + {0x61, 0}, + {0x65, 0}, + {0x6f, 0}, + {0x85, 0}, + }, + /* 93 */ + { + {0x8003, 60}, + {0x8006, 60}, + {0x800a, 60}, + {0x800f, 60}, + {0x8018, 60}, + {0x801f, 60}, + {0x8029, 60}, + {0xc038, 60}, + {0x8003, 96}, + {0x8006, 96}, + {0x800a, 96}, + {0x800f, 96}, + {0x8018, 96}, + {0x801f, 96}, + {0x8029, 96}, + {0xc038, 96}, + }, + /* 94 */ + { + {0x8003, 123}, + {0x8006, 123}, + {0x800a, 123}, + {0x800f, 123}, + {0x8018, 123}, + {0x801f, 123}, + {0x8029, 123}, + {0xc038, 123}, + {0x62, 0}, + {0x63, 0}, + {0x66, 0}, + {0x69, 0}, + {0x70, 0}, + {0x77, 0}, + {0x86, 0}, + {0x99, 0}, + }, + /* 95 */ + { + {0xc000, 92}, + {0xc000, 195}, + {0xc000, 208}, + {0x64, 0}, + {0x67, 0}, + {0x68, 0}, + {0x6a, 0}, + {0x6b, 0}, + {0x71, 0}, + {0x74, 0}, + {0x78, 0}, + {0x7e, 0}, + {0x87, 0}, + {0x8e, 0}, + {0x9a, 0}, + {0xa9, 0}, + }, + /* 96 */ + { + {0x8001, 92}, + {0xc016, 92}, + {0x8001, 195}, + {0xc016, 195}, + {0x8001, 208}, + {0xc016, 208}, + {0xc000, 128}, + {0xc000, 130}, + {0xc000, 131}, + {0xc000, 162}, + {0xc000, 184}, + {0xc000, 194}, + {0xc000, 224}, + {0xc000, 226}, + {0x6c, 0}, + {0x6d, 0}, + }, + /* 97 */ + { + {0x8002, 92}, + {0x8009, 92}, + {0x8017, 92}, + {0xc028, 92}, + {0x8002, 195}, + {0x8009, 195}, + {0x8017, 195}, + {0xc028, 195}, + {0x8002, 208}, + {0x8009, 208}, + {0x8017, 208}, + {0xc028, 208}, + {0x8001, 128}, + {0xc016, 128}, + {0x8001, 130}, + {0xc016, 130}, + }, + /* 98 */ + { + {0x8003, 92}, + {0x8006, 92}, + {0x800a, 92}, + {0x800f, 92}, + {0x8018, 92}, + {0x801f, 92}, + {0x8029, 92}, + {0xc038, 92}, + {0x8003, 195}, + {0x8006, 195}, + {0x800a, 195}, + {0x800f, 195}, + {0x8018, 195}, + {0x801f, 195}, + {0x8029, 195}, + {0xc038, 195}, + }, + /* 99 */ + { + {0x8003, 208}, + {0x8006, 208}, + {0x800a, 208}, + {0x800f, 208}, + {0x8018, 208}, + {0x801f, 208}, + {0x8029, 208}, + {0xc038, 208}, + {0x8002, 128}, + {0x8009, 128}, + {0x8017, 128}, + {0xc028, 128}, + {0x8002, 130}, + {0x8009, 130}, + {0x8017, 130}, + {0xc028, 130}, + }, + /* 100 */ + { + {0x8003, 128}, + {0x8006, 128}, + {0x800a, 128}, + {0x800f, 128}, + {0x8018, 128}, + {0x801f, 128}, + {0x8029, 128}, + {0xc038, 128}, + {0x8003, 130}, + {0x8006, 130}, + {0x800a, 130}, + {0x800f, 130}, + {0x8018, 130}, + {0x801f, 130}, + {0x8029, 130}, + {0xc038, 130}, + }, + /* 101 */ + { + {0x8001, 131}, + {0xc016, 131}, + {0x8001, 162}, + {0xc016, 162}, + {0x8001, 184}, + {0xc016, 184}, + {0x8001, 194}, + {0xc016, 194}, + {0x8001, 224}, + {0xc016, 224}, + {0x8001, 226}, + {0xc016, 226}, + {0xc000, 153}, + {0xc000, 161}, + {0xc000, 167}, + {0xc000, 172}, + }, + /* 102 */ + { + {0x8002, 131}, + {0x8009, 131}, + {0x8017, 131}, + {0xc028, 131}, + {0x8002, 162}, + {0x8009, 162}, + {0x8017, 162}, + {0xc028, 162}, + {0x8002, 184}, + {0x8009, 184}, + {0x8017, 184}, + {0xc028, 184}, + {0x8002, 194}, + {0x8009, 194}, + {0x8017, 194}, + {0xc028, 194}, + }, + /* 103 */ + { + {0x8003, 131}, + {0x8006, 131}, + {0x800a, 131}, + {0x800f, 131}, + {0x8018, 131}, + {0x801f, 131}, + {0x8029, 131}, + {0xc038, 131}, + {0x8003, 162}, + {0x8006, 162}, + {0x800a, 162}, + {0x800f, 162}, + {0x8018, 162}, + {0x801f, 162}, + {0x8029, 162}, + {0xc038, 162}, + }, + /* 104 */ + { + {0x8003, 184}, + {0x8006, 184}, + {0x800a, 184}, + {0x800f, 184}, + {0x8018, 184}, + {0x801f, 184}, + {0x8029, 184}, + {0xc038, 184}, + {0x8003, 194}, + {0x8006, 194}, + {0x800a, 194}, + {0x800f, 194}, + {0x8018, 194}, + {0x801f, 194}, + {0x8029, 194}, + {0xc038, 194}, + }, + /* 105 */ + { + {0x8002, 224}, + {0x8009, 224}, + {0x8017, 224}, + {0xc028, 224}, + {0x8002, 226}, + {0x8009, 226}, + {0x8017, 226}, + {0xc028, 226}, + {0x8001, 153}, + {0xc016, 153}, + {0x8001, 161}, + {0xc016, 161}, + {0x8001, 167}, + {0xc016, 167}, + {0x8001, 172}, + {0xc016, 172}, + }, + /* 106 */ + { + {0x8003, 224}, + {0x8006, 224}, + {0x800a, 224}, + {0x800f, 224}, + {0x8018, 224}, + {0x801f, 224}, + {0x8029, 224}, + {0xc038, 224}, + {0x8003, 226}, + {0x8006, 226}, + {0x800a, 226}, + {0x800f, 226}, + {0x8018, 226}, + {0x801f, 226}, + {0x8029, 226}, + {0xc038, 226}, + }, + /* 107 */ + { + {0x8002, 153}, + {0x8009, 153}, + {0x8017, 153}, + {0xc028, 153}, + {0x8002, 161}, + {0x8009, 161}, + {0x8017, 161}, + {0xc028, 161}, + {0x8002, 167}, + {0x8009, 167}, + {0x8017, 167}, + {0xc028, 167}, + {0x8002, 172}, + {0x8009, 172}, + {0x8017, 172}, + {0xc028, 172}, + }, + /* 108 */ + { + {0x8003, 153}, + {0x8006, 153}, + {0x800a, 153}, + {0x800f, 153}, + {0x8018, 153}, + {0x801f, 153}, + {0x8029, 153}, + {0xc038, 153}, + {0x8003, 161}, + {0x8006, 161}, + {0x800a, 161}, + {0x800f, 161}, + {0x8018, 161}, + {0x801f, 161}, + {0x8029, 161}, + {0xc038, 161}, + }, + /* 109 */ + { + {0x8003, 167}, + {0x8006, 167}, + {0x800a, 167}, + {0x800f, 167}, + {0x8018, 167}, + {0x801f, 167}, + {0x8029, 167}, + {0xc038, 167}, + {0x8003, 172}, + {0x8006, 172}, + {0x800a, 172}, + {0x800f, 172}, + {0x8018, 172}, + {0x801f, 172}, + {0x8029, 172}, + {0xc038, 172}, + }, + /* 110 */ + { + {0x72, 0}, + {0x73, 0}, + {0x75, 0}, + {0x76, 0}, + {0x79, 0}, + {0x7b, 0}, + {0x7f, 0}, + {0x82, 0}, + {0x88, 0}, + {0x8b, 0}, + {0x8f, 0}, + {0x92, 0}, + {0x9b, 0}, + {0xa2, 0}, + {0xaa, 0}, + {0xb4, 0}, + }, + /* 111 */ + { + {0xc000, 176}, + {0xc000, 177}, + {0xc000, 179}, + {0xc000, 209}, + {0xc000, 216}, + {0xc000, 217}, + {0xc000, 227}, + {0xc000, 229}, + {0xc000, 230}, + {0x7a, 0}, + {0x7c, 0}, + {0x7d, 0}, + {0x80, 0}, + {0x81, 0}, + {0x83, 0}, + {0x84, 0}, + }, + /* 112 */ + { + {0x8001, 176}, + {0xc016, 176}, + {0x8001, 177}, + {0xc016, 177}, + {0x8001, 179}, + {0xc016, 179}, + {0x8001, 209}, + {0xc016, 209}, + {0x8001, 216}, + {0xc016, 216}, + {0x8001, 217}, + {0xc016, 217}, + {0x8001, 227}, + {0xc016, 227}, + {0x8001, 229}, + {0xc016, 229}, + }, + /* 113 */ + { + {0x8002, 176}, + {0x8009, 176}, + {0x8017, 176}, + {0xc028, 176}, + {0x8002, 177}, + {0x8009, 177}, + {0x8017, 177}, + {0xc028, 177}, + {0x8002, 179}, + {0x8009, 179}, + {0x8017, 179}, + {0xc028, 179}, + {0x8002, 209}, + {0x8009, 209}, + {0x8017, 209}, + {0xc028, 209}, + }, + /* 114 */ + { + {0x8003, 176}, + {0x8006, 176}, + {0x800a, 176}, + {0x800f, 176}, + {0x8018, 176}, + {0x801f, 176}, + {0x8029, 176}, + {0xc038, 176}, + {0x8003, 177}, + {0x8006, 177}, + {0x800a, 177}, + {0x800f, 177}, + {0x8018, 177}, + {0x801f, 177}, + {0x8029, 177}, + {0xc038, 177}, + }, + /* 115 */ + { + {0x8003, 179}, + {0x8006, 179}, + {0x800a, 179}, + {0x800f, 179}, + {0x8018, 179}, + {0x801f, 179}, + {0x8029, 179}, + {0xc038, 179}, + {0x8003, 209}, + {0x8006, 209}, + {0x800a, 209}, + {0x800f, 209}, + {0x8018, 209}, + {0x801f, 209}, + {0x8029, 209}, + {0xc038, 209}, + }, + /* 116 */ + { + {0x8002, 216}, + {0x8009, 216}, + {0x8017, 216}, + {0xc028, 216}, + {0x8002, 217}, + {0x8009, 217}, + {0x8017, 217}, + {0xc028, 217}, + {0x8002, 227}, + {0x8009, 227}, + {0x8017, 227}, + {0xc028, 227}, + {0x8002, 229}, + {0x8009, 229}, + {0x8017, 229}, + {0xc028, 229}, + }, + /* 117 */ + { + {0x8003, 216}, + {0x8006, 216}, + {0x800a, 216}, + {0x800f, 216}, + {0x8018, 216}, + {0x801f, 216}, + {0x8029, 216}, + {0xc038, 216}, + {0x8003, 217}, + {0x8006, 217}, + {0x800a, 217}, + {0x800f, 217}, + {0x8018, 217}, + {0x801f, 217}, + {0x8029, 217}, + {0xc038, 217}, + }, + /* 118 */ + { + {0x8003, 227}, + {0x8006, 227}, + {0x800a, 227}, + {0x800f, 227}, + {0x8018, 227}, + {0x801f, 227}, + {0x8029, 227}, + {0xc038, 227}, + {0x8003, 229}, + {0x8006, 229}, + {0x800a, 229}, + {0x800f, 229}, + {0x8018, 229}, + {0x801f, 229}, + {0x8029, 229}, + {0xc038, 229}, + }, + /* 119 */ + { + {0x8001, 230}, + {0xc016, 230}, + {0xc000, 129}, + {0xc000, 132}, + {0xc000, 133}, + {0xc000, 134}, + {0xc000, 136}, + {0xc000, 146}, + {0xc000, 154}, + {0xc000, 156}, + {0xc000, 160}, + {0xc000, 163}, + {0xc000, 164}, + {0xc000, 169}, + {0xc000, 170}, + {0xc000, 173}, + }, + /* 120 */ + { + {0x8002, 230}, + {0x8009, 230}, + {0x8017, 230}, + {0xc028, 230}, + {0x8001, 129}, + {0xc016, 129}, + {0x8001, 132}, + {0xc016, 132}, + {0x8001, 133}, + {0xc016, 133}, + {0x8001, 134}, + {0xc016, 134}, + {0x8001, 136}, + {0xc016, 136}, + {0x8001, 146}, + {0xc016, 146}, + }, + /* 121 */ + { + {0x8003, 230}, + {0x8006, 230}, + {0x800a, 230}, + {0x800f, 230}, + {0x8018, 230}, + {0x801f, 230}, + {0x8029, 230}, + {0xc038, 230}, + {0x8002, 129}, + {0x8009, 129}, + {0x8017, 129}, + {0xc028, 129}, + {0x8002, 132}, + {0x8009, 132}, + {0x8017, 132}, + {0xc028, 132}, + }, + /* 122 */ + { + {0x8003, 129}, + {0x8006, 129}, + {0x800a, 129}, + {0x800f, 129}, + {0x8018, 129}, + {0x801f, 129}, + {0x8029, 129}, + {0xc038, 129}, + {0x8003, 132}, + {0x8006, 132}, + {0x800a, 132}, + {0x800f, 132}, + {0x8018, 132}, + {0x801f, 132}, + {0x8029, 132}, + {0xc038, 132}, + }, + /* 123 */ + { + {0x8002, 133}, + {0x8009, 133}, + {0x8017, 133}, + {0xc028, 133}, + {0x8002, 134}, + {0x8009, 134}, + {0x8017, 134}, + {0xc028, 134}, + {0x8002, 136}, + {0x8009, 136}, + {0x8017, 136}, + {0xc028, 136}, + {0x8002, 146}, + {0x8009, 146}, + {0x8017, 146}, + {0xc028, 146}, + }, + /* 124 */ + { + {0x8003, 133}, + {0x8006, 133}, + {0x800a, 133}, + {0x800f, 133}, + {0x8018, 133}, + {0x801f, 133}, + {0x8029, 133}, + {0xc038, 133}, + {0x8003, 134}, + {0x8006, 134}, + {0x800a, 134}, + {0x800f, 134}, + {0x8018, 134}, + {0x801f, 134}, + {0x8029, 134}, + {0xc038, 134}, + }, + /* 125 */ + { + {0x8003, 136}, + {0x8006, 136}, + {0x800a, 136}, + {0x800f, 136}, + {0x8018, 136}, + {0x801f, 136}, + {0x8029, 136}, + {0xc038, 136}, + {0x8003, 146}, + {0x8006, 146}, + {0x800a, 146}, + {0x800f, 146}, + {0x8018, 146}, + {0x801f, 146}, + {0x8029, 146}, + {0xc038, 146}, + }, + /* 126 */ + { + {0x8001, 154}, + {0xc016, 154}, + {0x8001, 156}, + {0xc016, 156}, + {0x8001, 160}, + {0xc016, 160}, + {0x8001, 163}, + {0xc016, 163}, + {0x8001, 164}, + {0xc016, 164}, + {0x8001, 169}, + {0xc016, 169}, + {0x8001, 170}, + {0xc016, 170}, + {0x8001, 173}, + {0xc016, 173}, + }, + /* 127 */ + { + {0x8002, 154}, + {0x8009, 154}, + {0x8017, 154}, + {0xc028, 154}, + {0x8002, 156}, + {0x8009, 156}, + {0x8017, 156}, + {0xc028, 156}, + {0x8002, 160}, + {0x8009, 160}, + {0x8017, 160}, + {0xc028, 160}, + {0x8002, 163}, + {0x8009, 163}, + {0x8017, 163}, + {0xc028, 163}, + }, + /* 128 */ + { + {0x8003, 154}, + {0x8006, 154}, + {0x800a, 154}, + {0x800f, 154}, + {0x8018, 154}, + {0x801f, 154}, + {0x8029, 154}, + {0xc038, 154}, + {0x8003, 156}, + {0x8006, 156}, + {0x800a, 156}, + {0x800f, 156}, + {0x8018, 156}, + {0x801f, 156}, + {0x8029, 156}, + {0xc038, 156}, + }, + /* 129 */ + { + {0x8003, 160}, + {0x8006, 160}, + {0x800a, 160}, + {0x800f, 160}, + {0x8018, 160}, + {0x801f, 160}, + {0x8029, 160}, + {0xc038, 160}, + {0x8003, 163}, + {0x8006, 163}, + {0x800a, 163}, + {0x800f, 163}, + {0x8018, 163}, + {0x801f, 163}, + {0x8029, 163}, + {0xc038, 163}, + }, + /* 130 */ + { + {0x8002, 164}, + {0x8009, 164}, + {0x8017, 164}, + {0xc028, 164}, + {0x8002, 169}, + {0x8009, 169}, + {0x8017, 169}, + {0xc028, 169}, + {0x8002, 170}, + {0x8009, 170}, + {0x8017, 170}, + {0xc028, 170}, + {0x8002, 173}, + {0x8009, 173}, + {0x8017, 173}, + {0xc028, 173}, + }, + /* 131 */ + { + {0x8003, 164}, + {0x8006, 164}, + {0x800a, 164}, + {0x800f, 164}, + {0x8018, 164}, + {0x801f, 164}, + {0x8029, 164}, + {0xc038, 164}, + {0x8003, 169}, + {0x8006, 169}, + {0x800a, 169}, + {0x800f, 169}, + {0x8018, 169}, + {0x801f, 169}, + {0x8029, 169}, + {0xc038, 169}, + }, + /* 132 */ + { + {0x8003, 170}, + {0x8006, 170}, + {0x800a, 170}, + {0x800f, 170}, + {0x8018, 170}, + {0x801f, 170}, + {0x8029, 170}, + {0xc038, 170}, + {0x8003, 173}, + {0x8006, 173}, + {0x800a, 173}, + {0x800f, 173}, + {0x8018, 173}, + {0x801f, 173}, + {0x8029, 173}, + {0xc038, 173}, + }, + /* 133 */ + { + {0x89, 0}, + {0x8a, 0}, + {0x8c, 0}, + {0x8d, 0}, + {0x90, 0}, + {0x91, 0}, + {0x93, 0}, + {0x96, 0}, + {0x9c, 0}, + {0x9f, 0}, + {0xa3, 0}, + {0xa6, 0}, + {0xab, 0}, + {0xae, 0}, + {0xb5, 0}, + {0xbe, 0}, + }, + /* 134 */ + { + {0xc000, 178}, + {0xc000, 181}, + {0xc000, 185}, + {0xc000, 186}, + {0xc000, 187}, + {0xc000, 189}, + {0xc000, 190}, + {0xc000, 196}, + {0xc000, 198}, + {0xc000, 228}, + {0xc000, 232}, + {0xc000, 233}, + {0x94, 0}, + {0x95, 0}, + {0x97, 0}, + {0x98, 0}, + }, + /* 135 */ + { + {0x8001, 178}, + {0xc016, 178}, + {0x8001, 181}, + {0xc016, 181}, + {0x8001, 185}, + {0xc016, 185}, + {0x8001, 186}, + {0xc016, 186}, + {0x8001, 187}, + {0xc016, 187}, + {0x8001, 189}, + {0xc016, 189}, + {0x8001, 190}, + {0xc016, 190}, + {0x8001, 196}, + {0xc016, 196}, + }, + /* 136 */ + { + {0x8002, 178}, + {0x8009, 178}, + {0x8017, 178}, + {0xc028, 178}, + {0x8002, 181}, + {0x8009, 181}, + {0x8017, 181}, + {0xc028, 181}, + {0x8002, 185}, + {0x8009, 185}, + {0x8017, 185}, + {0xc028, 185}, + {0x8002, 186}, + {0x8009, 186}, + {0x8017, 186}, + {0xc028, 186}, + }, + /* 137 */ + { + {0x8003, 178}, + {0x8006, 178}, + {0x800a, 178}, + {0x800f, 178}, + {0x8018, 178}, + {0x801f, 178}, + {0x8029, 178}, + {0xc038, 178}, + {0x8003, 181}, + {0x8006, 181}, + {0x800a, 181}, + {0x800f, 181}, + {0x8018, 181}, + {0x801f, 181}, + {0x8029, 181}, + {0xc038, 181}, + }, + /* 138 */ + { + {0x8003, 185}, + {0x8006, 185}, + {0x800a, 185}, + {0x800f, 185}, + {0x8018, 185}, + {0x801f, 185}, + {0x8029, 185}, + {0xc038, 185}, + {0x8003, 186}, + {0x8006, 186}, + {0x800a, 186}, + {0x800f, 186}, + {0x8018, 186}, + {0x801f, 186}, + {0x8029, 186}, + {0xc038, 186}, + }, + /* 139 */ + { + {0x8002, 187}, + {0x8009, 187}, + {0x8017, 187}, + {0xc028, 187}, + {0x8002, 189}, + {0x8009, 189}, + {0x8017, 189}, + {0xc028, 189}, + {0x8002, 190}, + {0x8009, 190}, + {0x8017, 190}, + {0xc028, 190}, + {0x8002, 196}, + {0x8009, 196}, + {0x8017, 196}, + {0xc028, 196}, + }, + /* 140 */ + { + {0x8003, 187}, + {0x8006, 187}, + {0x800a, 187}, + {0x800f, 187}, + {0x8018, 187}, + {0x801f, 187}, + {0x8029, 187}, + {0xc038, 187}, + {0x8003, 189}, + {0x8006, 189}, + {0x800a, 189}, + {0x800f, 189}, + {0x8018, 189}, + {0x801f, 189}, + {0x8029, 189}, + {0xc038, 189}, + }, + /* 141 */ + { + {0x8003, 190}, + {0x8006, 190}, + {0x800a, 190}, + {0x800f, 190}, + {0x8018, 190}, + {0x801f, 190}, + {0x8029, 190}, + {0xc038, 190}, + {0x8003, 196}, + {0x8006, 196}, + {0x800a, 196}, + {0x800f, 196}, + {0x8018, 196}, + {0x801f, 196}, + {0x8029, 196}, + {0xc038, 196}, + }, + /* 142 */ + { + {0x8001, 198}, + {0xc016, 198}, + {0x8001, 228}, + {0xc016, 228}, + {0x8001, 232}, + {0xc016, 232}, + {0x8001, 233}, + {0xc016, 233}, + {0xc000, 1}, + {0xc000, 135}, + {0xc000, 137}, + {0xc000, 138}, + {0xc000, 139}, + {0xc000, 140}, + {0xc000, 141}, + {0xc000, 143}, + }, + /* 143 */ + { + {0x8002, 198}, + {0x8009, 198}, + {0x8017, 198}, + {0xc028, 198}, + {0x8002, 228}, + {0x8009, 228}, + {0x8017, 228}, + {0xc028, 228}, + {0x8002, 232}, + {0x8009, 232}, + {0x8017, 232}, + {0xc028, 232}, + {0x8002, 233}, + {0x8009, 233}, + {0x8017, 233}, + {0xc028, 233}, + }, + /* 144 */ + { + {0x8003, 198}, + {0x8006, 198}, + {0x800a, 198}, + {0x800f, 198}, + {0x8018, 198}, + {0x801f, 198}, + {0x8029, 198}, + {0xc038, 198}, + {0x8003, 228}, + {0x8006, 228}, + {0x800a, 228}, + {0x800f, 228}, + {0x8018, 228}, + {0x801f, 228}, + {0x8029, 228}, + {0xc038, 228}, + }, + /* 145 */ + { + {0x8003, 232}, + {0x8006, 232}, + {0x800a, 232}, + {0x800f, 232}, + {0x8018, 232}, + {0x801f, 232}, + {0x8029, 232}, + {0xc038, 232}, + {0x8003, 233}, + {0x8006, 233}, + {0x800a, 233}, + {0x800f, 233}, + {0x8018, 233}, + {0x801f, 233}, + {0x8029, 233}, + {0xc038, 233}, + }, + /* 146 */ + { + {0x8001, 1}, + {0xc016, 1}, + {0x8001, 135}, + {0xc016, 135}, + {0x8001, 137}, + {0xc016, 137}, + {0x8001, 138}, + {0xc016, 138}, + {0x8001, 139}, + {0xc016, 139}, + {0x8001, 140}, + {0xc016, 140}, + {0x8001, 141}, + {0xc016, 141}, + {0x8001, 143}, + {0xc016, 143}, + }, + /* 147 */ + { + {0x8002, 1}, + {0x8009, 1}, + {0x8017, 1}, + {0xc028, 1}, + {0x8002, 135}, + {0x8009, 135}, + {0x8017, 135}, + {0xc028, 135}, + {0x8002, 137}, + {0x8009, 137}, + {0x8017, 137}, + {0xc028, 137}, + {0x8002, 138}, + {0x8009, 138}, + {0x8017, 138}, + {0xc028, 138}, + }, + /* 148 */ + { + {0x8003, 1}, + {0x8006, 1}, + {0x800a, 1}, + {0x800f, 1}, + {0x8018, 1}, + {0x801f, 1}, + {0x8029, 1}, + {0xc038, 1}, + {0x8003, 135}, + {0x8006, 135}, + {0x800a, 135}, + {0x800f, 135}, + {0x8018, 135}, + {0x801f, 135}, + {0x8029, 135}, + {0xc038, 135}, + }, + /* 149 */ + { + {0x8003, 137}, + {0x8006, 137}, + {0x800a, 137}, + {0x800f, 137}, + {0x8018, 137}, + {0x801f, 137}, + {0x8029, 137}, + {0xc038, 137}, + {0x8003, 138}, + {0x8006, 138}, + {0x800a, 138}, + {0x800f, 138}, + {0x8018, 138}, + {0x801f, 138}, + {0x8029, 138}, + {0xc038, 138}, + }, + /* 150 */ + { + {0x8002, 139}, + {0x8009, 139}, + {0x8017, 139}, + {0xc028, 139}, + {0x8002, 140}, + {0x8009, 140}, + {0x8017, 140}, + {0xc028, 140}, + {0x8002, 141}, + {0x8009, 141}, + {0x8017, 141}, + {0xc028, 141}, + {0x8002, 143}, + {0x8009, 143}, + {0x8017, 143}, + {0xc028, 143}, + }, + /* 151 */ + { + {0x8003, 139}, + {0x8006, 139}, + {0x800a, 139}, + {0x800f, 139}, + {0x8018, 139}, + {0x801f, 139}, + {0x8029, 139}, + {0xc038, 139}, + {0x8003, 140}, + {0x8006, 140}, + {0x800a, 140}, + {0x800f, 140}, + {0x8018, 140}, + {0x801f, 140}, + {0x8029, 140}, + {0xc038, 140}, + }, + /* 152 */ + { + {0x8003, 141}, + {0x8006, 141}, + {0x800a, 141}, + {0x800f, 141}, + {0x8018, 141}, + {0x801f, 141}, + {0x8029, 141}, + {0xc038, 141}, + {0x8003, 143}, + {0x8006, 143}, + {0x800a, 143}, + {0x800f, 143}, + {0x8018, 143}, + {0x801f, 143}, + {0x8029, 143}, + {0xc038, 143}, + }, + /* 153 */ + { + {0x9d, 0}, + {0x9e, 0}, + {0xa0, 0}, + {0xa1, 0}, + {0xa4, 0}, + {0xa5, 0}, + {0xa7, 0}, + {0xa8, 0}, + {0xac, 0}, + {0xad, 0}, + {0xaf, 0}, + {0xb1, 0}, + {0xb6, 0}, + {0xb9, 0}, + {0xbf, 0}, + {0xcf, 0}, + }, + /* 154 */ + { + {0xc000, 147}, + {0xc000, 149}, + {0xc000, 150}, + {0xc000, 151}, + {0xc000, 152}, + {0xc000, 155}, + {0xc000, 157}, + {0xc000, 158}, + {0xc000, 165}, + {0xc000, 166}, + {0xc000, 168}, + {0xc000, 174}, + {0xc000, 175}, + {0xc000, 180}, + {0xc000, 182}, + {0xc000, 183}, + }, + /* 155 */ + { + {0x8001, 147}, + {0xc016, 147}, + {0x8001, 149}, + {0xc016, 149}, + {0x8001, 150}, + {0xc016, 150}, + {0x8001, 151}, + {0xc016, 151}, + {0x8001, 152}, + {0xc016, 152}, + {0x8001, 155}, + {0xc016, 155}, + {0x8001, 157}, + {0xc016, 157}, + {0x8001, 158}, + {0xc016, 158}, + }, + /* 156 */ + { + {0x8002, 147}, + {0x8009, 147}, + {0x8017, 147}, + {0xc028, 147}, + {0x8002, 149}, + {0x8009, 149}, + {0x8017, 149}, + {0xc028, 149}, + {0x8002, 150}, + {0x8009, 150}, + {0x8017, 150}, + {0xc028, 150}, + {0x8002, 151}, + {0x8009, 151}, + {0x8017, 151}, + {0xc028, 151}, + }, + /* 157 */ + { + {0x8003, 147}, + {0x8006, 147}, + {0x800a, 147}, + {0x800f, 147}, + {0x8018, 147}, + {0x801f, 147}, + {0x8029, 147}, + {0xc038, 147}, + {0x8003, 149}, + {0x8006, 149}, + {0x800a, 149}, + {0x800f, 149}, + {0x8018, 149}, + {0x801f, 149}, + {0x8029, 149}, + {0xc038, 149}, + }, + /* 158 */ + { + {0x8003, 150}, + {0x8006, 150}, + {0x800a, 150}, + {0x800f, 150}, + {0x8018, 150}, + {0x801f, 150}, + {0x8029, 150}, + {0xc038, 150}, + {0x8003, 151}, + {0x8006, 151}, + {0x800a, 151}, + {0x800f, 151}, + {0x8018, 151}, + {0x801f, 151}, + {0x8029, 151}, + {0xc038, 151}, + }, + /* 159 */ + { + {0x8002, 152}, + {0x8009, 152}, + {0x8017, 152}, + {0xc028, 152}, + {0x8002, 155}, + {0x8009, 155}, + {0x8017, 155}, + {0xc028, 155}, + {0x8002, 157}, + {0x8009, 157}, + {0x8017, 157}, + {0xc028, 157}, + {0x8002, 158}, + {0x8009, 158}, + {0x8017, 158}, + {0xc028, 158}, + }, + /* 160 */ + { + {0x8003, 152}, + {0x8006, 152}, + {0x800a, 152}, + {0x800f, 152}, + {0x8018, 152}, + {0x801f, 152}, + {0x8029, 152}, + {0xc038, 152}, + {0x8003, 155}, + {0x8006, 155}, + {0x800a, 155}, + {0x800f, 155}, + {0x8018, 155}, + {0x801f, 155}, + {0x8029, 155}, + {0xc038, 155}, + }, + /* 161 */ + { + {0x8003, 157}, + {0x8006, 157}, + {0x800a, 157}, + {0x800f, 157}, + {0x8018, 157}, + {0x801f, 157}, + {0x8029, 157}, + {0xc038, 157}, + {0x8003, 158}, + {0x8006, 158}, + {0x800a, 158}, + {0x800f, 158}, + {0x8018, 158}, + {0x801f, 158}, + {0x8029, 158}, + {0xc038, 158}, + }, + /* 162 */ + { + {0x8001, 165}, + {0xc016, 165}, + {0x8001, 166}, + {0xc016, 166}, + {0x8001, 168}, + {0xc016, 168}, + {0x8001, 174}, + {0xc016, 174}, + {0x8001, 175}, + {0xc016, 175}, + {0x8001, 180}, + {0xc016, 180}, + {0x8001, 182}, + {0xc016, 182}, + {0x8001, 183}, + {0xc016, 183}, + }, + /* 163 */ + { + {0x8002, 165}, + {0x8009, 165}, + {0x8017, 165}, + {0xc028, 165}, + {0x8002, 166}, + {0x8009, 166}, + {0x8017, 166}, + {0xc028, 166}, + {0x8002, 168}, + {0x8009, 168}, + {0x8017, 168}, + {0xc028, 168}, + {0x8002, 174}, + {0x8009, 174}, + {0x8017, 174}, + {0xc028, 174}, + }, + /* 164 */ + { + {0x8003, 165}, + {0x8006, 165}, + {0x800a, 165}, + {0x800f, 165}, + {0x8018, 165}, + {0x801f, 165}, + {0x8029, 165}, + {0xc038, 165}, + {0x8003, 166}, + {0x8006, 166}, + {0x800a, 166}, + {0x800f, 166}, + {0x8018, 166}, + {0x801f, 166}, + {0x8029, 166}, + {0xc038, 166}, + }, + /* 165 */ + { + {0x8003, 168}, + {0x8006, 168}, + {0x800a, 168}, + {0x800f, 168}, + {0x8018, 168}, + {0x801f, 168}, + {0x8029, 168}, + {0xc038, 168}, + {0x8003, 174}, + {0x8006, 174}, + {0x800a, 174}, + {0x800f, 174}, + {0x8018, 174}, + {0x801f, 174}, + {0x8029, 174}, + {0xc038, 174}, + }, + /* 166 */ + { + {0x8002, 175}, + {0x8009, 175}, + {0x8017, 175}, + {0xc028, 175}, + {0x8002, 180}, + {0x8009, 180}, + {0x8017, 180}, + {0xc028, 180}, + {0x8002, 182}, + {0x8009, 182}, + {0x8017, 182}, + {0xc028, 182}, + {0x8002, 183}, + {0x8009, 183}, + {0x8017, 183}, + {0xc028, 183}, + }, + /* 167 */ + { + {0x8003, 175}, + {0x8006, 175}, + {0x800a, 175}, + {0x800f, 175}, + {0x8018, 175}, + {0x801f, 175}, + {0x8029, 175}, + {0xc038, 175}, + {0x8003, 180}, + {0x8006, 180}, + {0x800a, 180}, + {0x800f, 180}, + {0x8018, 180}, + {0x801f, 180}, + {0x8029, 180}, + {0xc038, 180}, + }, + /* 168 */ + { + {0x8003, 182}, + {0x8006, 182}, + {0x800a, 182}, + {0x800f, 182}, + {0x8018, 182}, + {0x801f, 182}, + {0x8029, 182}, + {0xc038, 182}, + {0x8003, 183}, + {0x8006, 183}, + {0x800a, 183}, + {0x800f, 183}, + {0x8018, 183}, + {0x801f, 183}, + {0x8029, 183}, + {0xc038, 183}, + }, + /* 169 */ + { + {0xc000, 188}, + {0xc000, 191}, + {0xc000, 197}, + {0xc000, 231}, + {0xc000, 239}, + {0xb0, 0}, + {0xb2, 0}, + {0xb3, 0}, + {0xb7, 0}, + {0xb8, 0}, + {0xba, 0}, + {0xbb, 0}, + {0xc0, 0}, + {0xc7, 0}, + {0xd0, 0}, + {0xdf, 0}, + }, + /* 170 */ + { + {0x8001, 188}, + {0xc016, 188}, + {0x8001, 191}, + {0xc016, 191}, + {0x8001, 197}, + {0xc016, 197}, + {0x8001, 231}, + {0xc016, 231}, + {0x8001, 239}, + {0xc016, 239}, + {0xc000, 9}, + {0xc000, 142}, + {0xc000, 144}, + {0xc000, 145}, + {0xc000, 148}, + {0xc000, 159}, + }, + /* 171 */ + { + {0x8002, 188}, + {0x8009, 188}, + {0x8017, 188}, + {0xc028, 188}, + {0x8002, 191}, + {0x8009, 191}, + {0x8017, 191}, + {0xc028, 191}, + {0x8002, 197}, + {0x8009, 197}, + {0x8017, 197}, + {0xc028, 197}, + {0x8002, 231}, + {0x8009, 231}, + {0x8017, 231}, + {0xc028, 231}, + }, + /* 172 */ + { + {0x8003, 188}, + {0x8006, 188}, + {0x800a, 188}, + {0x800f, 188}, + {0x8018, 188}, + {0x801f, 188}, + {0x8029, 188}, + {0xc038, 188}, + {0x8003, 191}, + {0x8006, 191}, + {0x800a, 191}, + {0x800f, 191}, + {0x8018, 191}, + {0x801f, 191}, + {0x8029, 191}, + {0xc038, 191}, + }, + /* 173 */ + { + {0x8003, 197}, + {0x8006, 197}, + {0x800a, 197}, + {0x800f, 197}, + {0x8018, 197}, + {0x801f, 197}, + {0x8029, 197}, + {0xc038, 197}, + {0x8003, 231}, + {0x8006, 231}, + {0x800a, 231}, + {0x800f, 231}, + {0x8018, 231}, + {0x801f, 231}, + {0x8029, 231}, + {0xc038, 231}, + }, + /* 174 */ + { + {0x8002, 239}, + {0x8009, 239}, + {0x8017, 239}, + {0xc028, 239}, + {0x8001, 9}, + {0xc016, 9}, + {0x8001, 142}, + {0xc016, 142}, + {0x8001, 144}, + {0xc016, 144}, + {0x8001, 145}, + {0xc016, 145}, + {0x8001, 148}, + {0xc016, 148}, + {0x8001, 159}, + {0xc016, 159}, + }, + /* 175 */ + { + {0x8003, 239}, + {0x8006, 239}, + {0x800a, 239}, + {0x800f, 239}, + {0x8018, 239}, + {0x801f, 239}, + {0x8029, 239}, + {0xc038, 239}, + {0x8002, 9}, + {0x8009, 9}, + {0x8017, 9}, + {0xc028, 9}, + {0x8002, 142}, + {0x8009, 142}, + {0x8017, 142}, + {0xc028, 142}, + }, + /* 176 */ + { + {0x8003, 9}, + {0x8006, 9}, + {0x800a, 9}, + {0x800f, 9}, + {0x8018, 9}, + {0x801f, 9}, + {0x8029, 9}, + {0xc038, 9}, + {0x8003, 142}, + {0x8006, 142}, + {0x800a, 142}, + {0x800f, 142}, + {0x8018, 142}, + {0x801f, 142}, + {0x8029, 142}, + {0xc038, 142}, + }, + /* 177 */ + { + {0x8002, 144}, + {0x8009, 144}, + {0x8017, 144}, + {0xc028, 144}, + {0x8002, 145}, + {0x8009, 145}, + {0x8017, 145}, + {0xc028, 145}, + {0x8002, 148}, + {0x8009, 148}, + {0x8017, 148}, + {0xc028, 148}, + {0x8002, 159}, + {0x8009, 159}, + {0x8017, 159}, + {0xc028, 159}, + }, + /* 178 */ + { + {0x8003, 144}, + {0x8006, 144}, + {0x800a, 144}, + {0x800f, 144}, + {0x8018, 144}, + {0x801f, 144}, + {0x8029, 144}, + {0xc038, 144}, + {0x8003, 145}, + {0x8006, 145}, + {0x800a, 145}, + {0x800f, 145}, + {0x8018, 145}, + {0x801f, 145}, + {0x8029, 145}, + {0xc038, 145}, + }, + /* 179 */ + { + {0x8003, 148}, + {0x8006, 148}, + {0x800a, 148}, + {0x800f, 148}, + {0x8018, 148}, + {0x801f, 148}, + {0x8029, 148}, + {0xc038, 148}, + {0x8003, 159}, + {0x8006, 159}, + {0x800a, 159}, + {0x800f, 159}, + {0x8018, 159}, + {0x801f, 159}, + {0x8029, 159}, + {0xc038, 159}, + }, + /* 180 */ + { + {0xc000, 171}, + {0xc000, 206}, + {0xc000, 215}, + {0xc000, 225}, + {0xc000, 236}, + {0xc000, 237}, + {0xbc, 0}, + {0xbd, 0}, + {0xc1, 0}, + {0xc4, 0}, + {0xc8, 0}, + {0xcb, 0}, + {0xd1, 0}, + {0xd8, 0}, + {0xe0, 0}, + {0xee, 0}, + }, + /* 181 */ + { + {0x8001, 171}, + {0xc016, 171}, + {0x8001, 206}, + {0xc016, 206}, + {0x8001, 215}, + {0xc016, 215}, + {0x8001, 225}, + {0xc016, 225}, + {0x8001, 236}, + {0xc016, 236}, + {0x8001, 237}, + {0xc016, 237}, + {0xc000, 199}, + {0xc000, 207}, + {0xc000, 234}, + {0xc000, 235}, + }, + /* 182 */ + { + {0x8002, 171}, + {0x8009, 171}, + {0x8017, 171}, + {0xc028, 171}, + {0x8002, 206}, + {0x8009, 206}, + {0x8017, 206}, + {0xc028, 206}, + {0x8002, 215}, + {0x8009, 215}, + {0x8017, 215}, + {0xc028, 215}, + {0x8002, 225}, + {0x8009, 225}, + {0x8017, 225}, + {0xc028, 225}, + }, + /* 183 */ + { + {0x8003, 171}, + {0x8006, 171}, + {0x800a, 171}, + {0x800f, 171}, + {0x8018, 171}, + {0x801f, 171}, + {0x8029, 171}, + {0xc038, 171}, + {0x8003, 206}, + {0x8006, 206}, + {0x800a, 206}, + {0x800f, 206}, + {0x8018, 206}, + {0x801f, 206}, + {0x8029, 206}, + {0xc038, 206}, + }, + /* 184 */ + { + {0x8003, 215}, + {0x8006, 215}, + {0x800a, 215}, + {0x800f, 215}, + {0x8018, 215}, + {0x801f, 215}, + {0x8029, 215}, + {0xc038, 215}, + {0x8003, 225}, + {0x8006, 225}, + {0x800a, 225}, + {0x800f, 225}, + {0x8018, 225}, + {0x801f, 225}, + {0x8029, 225}, + {0xc038, 225}, + }, + /* 185 */ + { + {0x8002, 236}, + {0x8009, 236}, + {0x8017, 236}, + {0xc028, 236}, + {0x8002, 237}, + {0x8009, 237}, + {0x8017, 237}, + {0xc028, 237}, + {0x8001, 199}, + {0xc016, 199}, + {0x8001, 207}, + {0xc016, 207}, + {0x8001, 234}, + {0xc016, 234}, + {0x8001, 235}, + {0xc016, 235}, + }, + /* 186 */ + { + {0x8003, 236}, + {0x8006, 236}, + {0x800a, 236}, + {0x800f, 236}, + {0x8018, 236}, + {0x801f, 236}, + {0x8029, 236}, + {0xc038, 236}, + {0x8003, 237}, + {0x8006, 237}, + {0x800a, 237}, + {0x800f, 237}, + {0x8018, 237}, + {0x801f, 237}, + {0x8029, 237}, + {0xc038, 237}, + }, + /* 187 */ + { + {0x8002, 199}, + {0x8009, 199}, + {0x8017, 199}, + {0xc028, 199}, + {0x8002, 207}, + {0x8009, 207}, + {0x8017, 207}, + {0xc028, 207}, + {0x8002, 234}, + {0x8009, 234}, + {0x8017, 234}, + {0xc028, 234}, + {0x8002, 235}, + {0x8009, 235}, + {0x8017, 235}, + {0xc028, 235}, + }, + /* 188 */ + { + {0x8003, 199}, + {0x8006, 199}, + {0x800a, 199}, + {0x800f, 199}, + {0x8018, 199}, + {0x801f, 199}, + {0x8029, 199}, + {0xc038, 199}, + {0x8003, 207}, + {0x8006, 207}, + {0x800a, 207}, + {0x800f, 207}, + {0x8018, 207}, + {0x801f, 207}, + {0x8029, 207}, + {0xc038, 207}, + }, + /* 189 */ + { + {0x8003, 234}, + {0x8006, 234}, + {0x800a, 234}, + {0x800f, 234}, + {0x8018, 234}, + {0x801f, 234}, + {0x8029, 234}, + {0xc038, 234}, + {0x8003, 235}, + {0x8006, 235}, + {0x800a, 235}, + {0x800f, 235}, + {0x8018, 235}, + {0x801f, 235}, + {0x8029, 235}, + {0xc038, 235}, + }, + /* 190 */ + { + {0xc2, 0}, + {0xc3, 0}, + {0xc5, 0}, + {0xc6, 0}, + {0xc9, 0}, + {0xca, 0}, + {0xcc, 0}, + {0xcd, 0}, + {0xd2, 0}, + {0xd5, 0}, + {0xd9, 0}, + {0xdc, 0}, + {0xe1, 0}, + {0xe7, 0}, + {0xef, 0}, + {0xf6, 0}, + }, + /* 191 */ + { + {0xc000, 192}, + {0xc000, 193}, + {0xc000, 200}, + {0xc000, 201}, + {0xc000, 202}, + {0xc000, 205}, + {0xc000, 210}, + {0xc000, 213}, + {0xc000, 218}, + {0xc000, 219}, + {0xc000, 238}, + {0xc000, 240}, + {0xc000, 242}, + {0xc000, 243}, + {0xc000, 255}, + {0xce, 0}, + }, + /* 192 */ + { + {0x8001, 192}, + {0xc016, 192}, + {0x8001, 193}, + {0xc016, 193}, + {0x8001, 200}, + {0xc016, 200}, + {0x8001, 201}, + {0xc016, 201}, + {0x8001, 202}, + {0xc016, 202}, + {0x8001, 205}, + {0xc016, 205}, + {0x8001, 210}, + {0xc016, 210}, + {0x8001, 213}, + {0xc016, 213}, + }, + /* 193 */ + { + {0x8002, 192}, + {0x8009, 192}, + {0x8017, 192}, + {0xc028, 192}, + {0x8002, 193}, + {0x8009, 193}, + {0x8017, 193}, + {0xc028, 193}, + {0x8002, 200}, + {0x8009, 200}, + {0x8017, 200}, + {0xc028, 200}, + {0x8002, 201}, + {0x8009, 201}, + {0x8017, 201}, + {0xc028, 201}, + }, + /* 194 */ + { + {0x8003, 192}, + {0x8006, 192}, + {0x800a, 192}, + {0x800f, 192}, + {0x8018, 192}, + {0x801f, 192}, + {0x8029, 192}, + {0xc038, 192}, + {0x8003, 193}, + {0x8006, 193}, + {0x800a, 193}, + {0x800f, 193}, + {0x8018, 193}, + {0x801f, 193}, + {0x8029, 193}, + {0xc038, 193}, + }, + /* 195 */ + { + {0x8003, 200}, + {0x8006, 200}, + {0x800a, 200}, + {0x800f, 200}, + {0x8018, 200}, + {0x801f, 200}, + {0x8029, 200}, + {0xc038, 200}, + {0x8003, 201}, + {0x8006, 201}, + {0x800a, 201}, + {0x800f, 201}, + {0x8018, 201}, + {0x801f, 201}, + {0x8029, 201}, + {0xc038, 201}, + }, + /* 196 */ + { + {0x8002, 202}, + {0x8009, 202}, + {0x8017, 202}, + {0xc028, 202}, + {0x8002, 205}, + {0x8009, 205}, + {0x8017, 205}, + {0xc028, 205}, + {0x8002, 210}, + {0x8009, 210}, + {0x8017, 210}, + {0xc028, 210}, + {0x8002, 213}, + {0x8009, 213}, + {0x8017, 213}, + {0xc028, 213}, + }, + /* 197 */ + { + {0x8003, 202}, + {0x8006, 202}, + {0x800a, 202}, + {0x800f, 202}, + {0x8018, 202}, + {0x801f, 202}, + {0x8029, 202}, + {0xc038, 202}, + {0x8003, 205}, + {0x8006, 205}, + {0x800a, 205}, + {0x800f, 205}, + {0x8018, 205}, + {0x801f, 205}, + {0x8029, 205}, + {0xc038, 205}, + }, + /* 198 */ + { + {0x8003, 210}, + {0x8006, 210}, + {0x800a, 210}, + {0x800f, 210}, + {0x8018, 210}, + {0x801f, 210}, + {0x8029, 210}, + {0xc038, 210}, + {0x8003, 213}, + {0x8006, 213}, + {0x800a, 213}, + {0x800f, 213}, + {0x8018, 213}, + {0x801f, 213}, + {0x8029, 213}, + {0xc038, 213}, + }, + /* 199 */ + { + {0x8001, 218}, + {0xc016, 218}, + {0x8001, 219}, + {0xc016, 219}, + {0x8001, 238}, + {0xc016, 238}, + {0x8001, 240}, + {0xc016, 240}, + {0x8001, 242}, + {0xc016, 242}, + {0x8001, 243}, + {0xc016, 243}, + {0x8001, 255}, + {0xc016, 255}, + {0xc000, 203}, + {0xc000, 204}, + }, + /* 200 */ + { + {0x8002, 218}, + {0x8009, 218}, + {0x8017, 218}, + {0xc028, 218}, + {0x8002, 219}, + {0x8009, 219}, + {0x8017, 219}, + {0xc028, 219}, + {0x8002, 238}, + {0x8009, 238}, + {0x8017, 238}, + {0xc028, 238}, + {0x8002, 240}, + {0x8009, 240}, + {0x8017, 240}, + {0xc028, 240}, + }, + /* 201 */ + { + {0x8003, 218}, + {0x8006, 218}, + {0x800a, 218}, + {0x800f, 218}, + {0x8018, 218}, + {0x801f, 218}, + {0x8029, 218}, + {0xc038, 218}, + {0x8003, 219}, + {0x8006, 219}, + {0x800a, 219}, + {0x800f, 219}, + {0x8018, 219}, + {0x801f, 219}, + {0x8029, 219}, + {0xc038, 219}, + }, + /* 202 */ + { + {0x8003, 238}, + {0x8006, 238}, + {0x800a, 238}, + {0x800f, 238}, + {0x8018, 238}, + {0x801f, 238}, + {0x8029, 238}, + {0xc038, 238}, + {0x8003, 240}, + {0x8006, 240}, + {0x800a, 240}, + {0x800f, 240}, + {0x8018, 240}, + {0x801f, 240}, + {0x8029, 240}, + {0xc038, 240}, + }, + /* 203 */ + { + {0x8002, 242}, + {0x8009, 242}, + {0x8017, 242}, + {0xc028, 242}, + {0x8002, 243}, + {0x8009, 243}, + {0x8017, 243}, + {0xc028, 243}, + {0x8002, 255}, + {0x8009, 255}, + {0x8017, 255}, + {0xc028, 255}, + {0x8001, 203}, + {0xc016, 203}, + {0x8001, 204}, + {0xc016, 204}, + }, + /* 204 */ + { + {0x8003, 242}, + {0x8006, 242}, + {0x800a, 242}, + {0x800f, 242}, + {0x8018, 242}, + {0x801f, 242}, + {0x8029, 242}, + {0xc038, 242}, + {0x8003, 243}, + {0x8006, 243}, + {0x800a, 243}, + {0x800f, 243}, + {0x8018, 243}, + {0x801f, 243}, + {0x8029, 243}, + {0xc038, 243}, + }, + /* 205 */ + { + {0x8003, 255}, + {0x8006, 255}, + {0x800a, 255}, + {0x800f, 255}, + {0x8018, 255}, + {0x801f, 255}, + {0x8029, 255}, + {0xc038, 255}, + {0x8002, 203}, + {0x8009, 203}, + {0x8017, 203}, + {0xc028, 203}, + {0x8002, 204}, + {0x8009, 204}, + {0x8017, 204}, + {0xc028, 204}, + }, + /* 206 */ + { + {0x8003, 203}, + {0x8006, 203}, + {0x800a, 203}, + {0x800f, 203}, + {0x8018, 203}, + {0x801f, 203}, + {0x8029, 203}, + {0xc038, 203}, + {0x8003, 204}, + {0x8006, 204}, + {0x800a, 204}, + {0x800f, 204}, + {0x8018, 204}, + {0x801f, 204}, + {0x8029, 204}, + {0xc038, 204}, + }, + /* 207 */ + { + {0xd3, 0}, + {0xd4, 0}, + {0xd6, 0}, + {0xd7, 0}, + {0xda, 0}, + {0xdb, 0}, + {0xdd, 0}, + {0xde, 0}, + {0xe2, 0}, + {0xe4, 0}, + {0xe8, 0}, + {0xeb, 0}, + {0xf0, 0}, + {0xf3, 0}, + {0xf7, 0}, + {0xfa, 0}, + }, + /* 208 */ + { + {0xc000, 211}, + {0xc000, 212}, + {0xc000, 214}, + {0xc000, 221}, + {0xc000, 222}, + {0xc000, 223}, + {0xc000, 241}, + {0xc000, 244}, + {0xc000, 245}, + {0xc000, 246}, + {0xc000, 247}, + {0xc000, 248}, + {0xc000, 250}, + {0xc000, 251}, + {0xc000, 252}, + {0xc000, 253}, + }, + /* 209 */ + { + {0x8001, 211}, + {0xc016, 211}, + {0x8001, 212}, + {0xc016, 212}, + {0x8001, 214}, + {0xc016, 214}, + {0x8001, 221}, + {0xc016, 221}, + {0x8001, 222}, + {0xc016, 222}, + {0x8001, 223}, + {0xc016, 223}, + {0x8001, 241}, + {0xc016, 241}, + {0x8001, 244}, + {0xc016, 244}, + }, + /* 210 */ + { + {0x8002, 211}, + {0x8009, 211}, + {0x8017, 211}, + {0xc028, 211}, + {0x8002, 212}, + {0x8009, 212}, + {0x8017, 212}, + {0xc028, 212}, + {0x8002, 214}, + {0x8009, 214}, + {0x8017, 214}, + {0xc028, 214}, + {0x8002, 221}, + {0x8009, 221}, + {0x8017, 221}, + {0xc028, 221}, + }, + /* 211 */ + { + {0x8003, 211}, + {0x8006, 211}, + {0x800a, 211}, + {0x800f, 211}, + {0x8018, 211}, + {0x801f, 211}, + {0x8029, 211}, + {0xc038, 211}, + {0x8003, 212}, + {0x8006, 212}, + {0x800a, 212}, + {0x800f, 212}, + {0x8018, 212}, + {0x801f, 212}, + {0x8029, 212}, + {0xc038, 212}, + }, + /* 212 */ + { + {0x8003, 214}, + {0x8006, 214}, + {0x800a, 214}, + {0x800f, 214}, + {0x8018, 214}, + {0x801f, 214}, + {0x8029, 214}, + {0xc038, 214}, + {0x8003, 221}, + {0x8006, 221}, + {0x800a, 221}, + {0x800f, 221}, + {0x8018, 221}, + {0x801f, 221}, + {0x8029, 221}, + {0xc038, 221}, + }, + /* 213 */ + { + {0x8002, 222}, + {0x8009, 222}, + {0x8017, 222}, + {0xc028, 222}, + {0x8002, 223}, + {0x8009, 223}, + {0x8017, 223}, + {0xc028, 223}, + {0x8002, 241}, + {0x8009, 241}, + {0x8017, 241}, + {0xc028, 241}, + {0x8002, 244}, + {0x8009, 244}, + {0x8017, 244}, + {0xc028, 244}, + }, + /* 214 */ + { + {0x8003, 222}, + {0x8006, 222}, + {0x800a, 222}, + {0x800f, 222}, + {0x8018, 222}, + {0x801f, 222}, + {0x8029, 222}, + {0xc038, 222}, + {0x8003, 223}, + {0x8006, 223}, + {0x800a, 223}, + {0x800f, 223}, + {0x8018, 223}, + {0x801f, 223}, + {0x8029, 223}, + {0xc038, 223}, + }, + /* 215 */ + { + {0x8003, 241}, + {0x8006, 241}, + {0x800a, 241}, + {0x800f, 241}, + {0x8018, 241}, + {0x801f, 241}, + {0x8029, 241}, + {0xc038, 241}, + {0x8003, 244}, + {0x8006, 244}, + {0x800a, 244}, + {0x800f, 244}, + {0x8018, 244}, + {0x801f, 244}, + {0x8029, 244}, + {0xc038, 244}, + }, + /* 216 */ + { + {0x8001, 245}, + {0xc016, 245}, + {0x8001, 246}, + {0xc016, 246}, + {0x8001, 247}, + {0xc016, 247}, + {0x8001, 248}, + {0xc016, 248}, + {0x8001, 250}, + {0xc016, 250}, + {0x8001, 251}, + {0xc016, 251}, + {0x8001, 252}, + {0xc016, 252}, + {0x8001, 253}, + {0xc016, 253}, + }, + /* 217 */ + { + {0x8002, 245}, + {0x8009, 245}, + {0x8017, 245}, + {0xc028, 245}, + {0x8002, 246}, + {0x8009, 246}, + {0x8017, 246}, + {0xc028, 246}, + {0x8002, 247}, + {0x8009, 247}, + {0x8017, 247}, + {0xc028, 247}, + {0x8002, 248}, + {0x8009, 248}, + {0x8017, 248}, + {0xc028, 248}, + }, + /* 218 */ + { + {0x8003, 245}, + {0x8006, 245}, + {0x800a, 245}, + {0x800f, 245}, + {0x8018, 245}, + {0x801f, 245}, + {0x8029, 245}, + {0xc038, 245}, + {0x8003, 246}, + {0x8006, 246}, + {0x800a, 246}, + {0x800f, 246}, + {0x8018, 246}, + {0x801f, 246}, + {0x8029, 246}, + {0xc038, 246}, + }, + /* 219 */ + { + {0x8003, 247}, + {0x8006, 247}, + {0x800a, 247}, + {0x800f, 247}, + {0x8018, 247}, + {0x801f, 247}, + {0x8029, 247}, + {0xc038, 247}, + {0x8003, 248}, + {0x8006, 248}, + {0x800a, 248}, + {0x800f, 248}, + {0x8018, 248}, + {0x801f, 248}, + {0x8029, 248}, + {0xc038, 248}, + }, + /* 220 */ + { + {0x8002, 250}, + {0x8009, 250}, + {0x8017, 250}, + {0xc028, 250}, + {0x8002, 251}, + {0x8009, 251}, + {0x8017, 251}, + {0xc028, 251}, + {0x8002, 252}, + {0x8009, 252}, + {0x8017, 252}, + {0xc028, 252}, + {0x8002, 253}, + {0x8009, 253}, + {0x8017, 253}, + {0xc028, 253}, + }, + /* 221 */ + { + {0x8003, 250}, + {0x8006, 250}, + {0x800a, 250}, + {0x800f, 250}, + {0x8018, 250}, + {0x801f, 250}, + {0x8029, 250}, + {0xc038, 250}, + {0x8003, 251}, + {0x8006, 251}, + {0x800a, 251}, + {0x800f, 251}, + {0x8018, 251}, + {0x801f, 251}, + {0x8029, 251}, + {0xc038, 251}, + }, + /* 222 */ + { + {0x8003, 252}, + {0x8006, 252}, + {0x800a, 252}, + {0x800f, 252}, + {0x8018, 252}, + {0x801f, 252}, + {0x8029, 252}, + {0xc038, 252}, + {0x8003, 253}, + {0x8006, 253}, + {0x800a, 253}, + {0x800f, 253}, + {0x8018, 253}, + {0x801f, 253}, + {0x8029, 253}, + {0xc038, 253}, + }, + /* 223 */ + { + {0xc000, 254}, + {0xe3, 0}, + {0xe5, 0}, + {0xe6, 0}, + {0xe9, 0}, + {0xea, 0}, + {0xec, 0}, + {0xed, 0}, + {0xf1, 0}, + {0xf2, 0}, + {0xf4, 0}, + {0xf5, 0}, + {0xf8, 0}, + {0xf9, 0}, + {0xfb, 0}, + {0xfc, 0}, + }, + /* 224 */ + { + {0x8001, 254}, + {0xc016, 254}, + {0xc000, 2}, + {0xc000, 3}, + {0xc000, 4}, + {0xc000, 5}, + {0xc000, 6}, + {0xc000, 7}, + {0xc000, 8}, + {0xc000, 11}, + {0xc000, 12}, + {0xc000, 14}, + {0xc000, 15}, + {0xc000, 16}, + {0xc000, 17}, + {0xc000, 18}, + }, + /* 225 */ + { + {0x8002, 254}, + {0x8009, 254}, + {0x8017, 254}, + {0xc028, 254}, + {0x8001, 2}, + {0xc016, 2}, + {0x8001, 3}, + {0xc016, 3}, + {0x8001, 4}, + {0xc016, 4}, + {0x8001, 5}, + {0xc016, 5}, + {0x8001, 6}, + {0xc016, 6}, + {0x8001, 7}, + {0xc016, 7}, + }, + /* 226 */ + { + {0x8003, 254}, + {0x8006, 254}, + {0x800a, 254}, + {0x800f, 254}, + {0x8018, 254}, + {0x801f, 254}, + {0x8029, 254}, + {0xc038, 254}, + {0x8002, 2}, + {0x8009, 2}, + {0x8017, 2}, + {0xc028, 2}, + {0x8002, 3}, + {0x8009, 3}, + {0x8017, 3}, + {0xc028, 3}, + }, + /* 227 */ + { + {0x8003, 2}, + {0x8006, 2}, + {0x800a, 2}, + {0x800f, 2}, + {0x8018, 2}, + {0x801f, 2}, + {0x8029, 2}, + {0xc038, 2}, + {0x8003, 3}, + {0x8006, 3}, + {0x800a, 3}, + {0x800f, 3}, + {0x8018, 3}, + {0x801f, 3}, + {0x8029, 3}, + {0xc038, 3}, + }, + /* 228 */ + { + {0x8002, 4}, + {0x8009, 4}, + {0x8017, 4}, + {0xc028, 4}, + {0x8002, 5}, + {0x8009, 5}, + {0x8017, 5}, + {0xc028, 5}, + {0x8002, 6}, + {0x8009, 6}, + {0x8017, 6}, + {0xc028, 6}, + {0x8002, 7}, + {0x8009, 7}, + {0x8017, 7}, + {0xc028, 7}, + }, + /* 229 */ + { + {0x8003, 4}, + {0x8006, 4}, + {0x800a, 4}, + {0x800f, 4}, + {0x8018, 4}, + {0x801f, 4}, + {0x8029, 4}, + {0xc038, 4}, + {0x8003, 5}, + {0x8006, 5}, + {0x800a, 5}, + {0x800f, 5}, + {0x8018, 5}, + {0x801f, 5}, + {0x8029, 5}, + {0xc038, 5}, + }, + /* 230 */ + { + {0x8003, 6}, + {0x8006, 6}, + {0x800a, 6}, + {0x800f, 6}, + {0x8018, 6}, + {0x801f, 6}, + {0x8029, 6}, + {0xc038, 6}, + {0x8003, 7}, + {0x8006, 7}, + {0x800a, 7}, + {0x800f, 7}, + {0x8018, 7}, + {0x801f, 7}, + {0x8029, 7}, + {0xc038, 7}, + }, + /* 231 */ + { + {0x8001, 8}, + {0xc016, 8}, + {0x8001, 11}, + {0xc016, 11}, + {0x8001, 12}, + {0xc016, 12}, + {0x8001, 14}, + {0xc016, 14}, + {0x8001, 15}, + {0xc016, 15}, + {0x8001, 16}, + {0xc016, 16}, + {0x8001, 17}, + {0xc016, 17}, + {0x8001, 18}, + {0xc016, 18}, + }, + /* 232 */ + { + {0x8002, 8}, + {0x8009, 8}, + {0x8017, 8}, + {0xc028, 8}, + {0x8002, 11}, + {0x8009, 11}, + {0x8017, 11}, + {0xc028, 11}, + {0x8002, 12}, + {0x8009, 12}, + {0x8017, 12}, + {0xc028, 12}, + {0x8002, 14}, + {0x8009, 14}, + {0x8017, 14}, + {0xc028, 14}, + }, + /* 233 */ + { + {0x8003, 8}, + {0x8006, 8}, + {0x800a, 8}, + {0x800f, 8}, + {0x8018, 8}, + {0x801f, 8}, + {0x8029, 8}, + {0xc038, 8}, + {0x8003, 11}, + {0x8006, 11}, + {0x800a, 11}, + {0x800f, 11}, + {0x8018, 11}, + {0x801f, 11}, + {0x8029, 11}, + {0xc038, 11}, + }, + /* 234 */ + { + {0x8003, 12}, + {0x8006, 12}, + {0x800a, 12}, + {0x800f, 12}, + {0x8018, 12}, + {0x801f, 12}, + {0x8029, 12}, + {0xc038, 12}, + {0x8003, 14}, + {0x8006, 14}, + {0x800a, 14}, + {0x800f, 14}, + {0x8018, 14}, + {0x801f, 14}, + {0x8029, 14}, + {0xc038, 14}, + }, + /* 235 */ + { + {0x8002, 15}, + {0x8009, 15}, + {0x8017, 15}, + {0xc028, 15}, + {0x8002, 16}, + {0x8009, 16}, + {0x8017, 16}, + {0xc028, 16}, + {0x8002, 17}, + {0x8009, 17}, + {0x8017, 17}, + {0xc028, 17}, + {0x8002, 18}, + {0x8009, 18}, + {0x8017, 18}, + {0xc028, 18}, + }, + /* 236 */ + { + {0x8003, 15}, + {0x8006, 15}, + {0x800a, 15}, + {0x800f, 15}, + {0x8018, 15}, + {0x801f, 15}, + {0x8029, 15}, + {0xc038, 15}, + {0x8003, 16}, + {0x8006, 16}, + {0x800a, 16}, + {0x800f, 16}, + {0x8018, 16}, + {0x801f, 16}, + {0x8029, 16}, + {0xc038, 16}, + }, + /* 237 */ + { + {0x8003, 17}, + {0x8006, 17}, + {0x800a, 17}, + {0x800f, 17}, + {0x8018, 17}, + {0x801f, 17}, + {0x8029, 17}, + {0xc038, 17}, + {0x8003, 18}, + {0x8006, 18}, + {0x800a, 18}, + {0x800f, 18}, + {0x8018, 18}, + {0x801f, 18}, + {0x8029, 18}, + {0xc038, 18}, + }, + /* 238 */ + { + {0xc000, 19}, + {0xc000, 20}, + {0xc000, 21}, + {0xc000, 23}, + {0xc000, 24}, + {0xc000, 25}, + {0xc000, 26}, + {0xc000, 27}, + {0xc000, 28}, + {0xc000, 29}, + {0xc000, 30}, + {0xc000, 31}, + {0xc000, 127}, + {0xc000, 220}, + {0xc000, 249}, + {0xfd, 0}, + }, + /* 239 */ + { + {0x8001, 19}, + {0xc016, 19}, + {0x8001, 20}, + {0xc016, 20}, + {0x8001, 21}, + {0xc016, 21}, + {0x8001, 23}, + {0xc016, 23}, + {0x8001, 24}, + {0xc016, 24}, + {0x8001, 25}, + {0xc016, 25}, + {0x8001, 26}, + {0xc016, 26}, + {0x8001, 27}, + {0xc016, 27}, + }, + /* 240 */ + { + {0x8002, 19}, + {0x8009, 19}, + {0x8017, 19}, + {0xc028, 19}, + {0x8002, 20}, + {0x8009, 20}, + {0x8017, 20}, + {0xc028, 20}, + {0x8002, 21}, + {0x8009, 21}, + {0x8017, 21}, + {0xc028, 21}, + {0x8002, 23}, + {0x8009, 23}, + {0x8017, 23}, + {0xc028, 23}, + }, + /* 241 */ + { + {0x8003, 19}, + {0x8006, 19}, + {0x800a, 19}, + {0x800f, 19}, + {0x8018, 19}, + {0x801f, 19}, + {0x8029, 19}, + {0xc038, 19}, + {0x8003, 20}, + {0x8006, 20}, + {0x800a, 20}, + {0x800f, 20}, + {0x8018, 20}, + {0x801f, 20}, + {0x8029, 20}, + {0xc038, 20}, + }, + /* 242 */ + { + {0x8003, 21}, + {0x8006, 21}, + {0x800a, 21}, + {0x800f, 21}, + {0x8018, 21}, + {0x801f, 21}, + {0x8029, 21}, + {0xc038, 21}, + {0x8003, 23}, + {0x8006, 23}, + {0x800a, 23}, + {0x800f, 23}, + {0x8018, 23}, + {0x801f, 23}, + {0x8029, 23}, + {0xc038, 23}, + }, + /* 243 */ + { + {0x8002, 24}, + {0x8009, 24}, + {0x8017, 24}, + {0xc028, 24}, + {0x8002, 25}, + {0x8009, 25}, + {0x8017, 25}, + {0xc028, 25}, + {0x8002, 26}, + {0x8009, 26}, + {0x8017, 26}, + {0xc028, 26}, + {0x8002, 27}, + {0x8009, 27}, + {0x8017, 27}, + {0xc028, 27}, + }, + /* 244 */ + { + {0x8003, 24}, + {0x8006, 24}, + {0x800a, 24}, + {0x800f, 24}, + {0x8018, 24}, + {0x801f, 24}, + {0x8029, 24}, + {0xc038, 24}, + {0x8003, 25}, + {0x8006, 25}, + {0x800a, 25}, + {0x800f, 25}, + {0x8018, 25}, + {0x801f, 25}, + {0x8029, 25}, + {0xc038, 25}, + }, + /* 245 */ + { + {0x8003, 26}, + {0x8006, 26}, + {0x800a, 26}, + {0x800f, 26}, + {0x8018, 26}, + {0x801f, 26}, + {0x8029, 26}, + {0xc038, 26}, + {0x8003, 27}, + {0x8006, 27}, + {0x800a, 27}, + {0x800f, 27}, + {0x8018, 27}, + {0x801f, 27}, + {0x8029, 27}, + {0xc038, 27}, + }, + /* 246 */ + { + {0x8001, 28}, + {0xc016, 28}, + {0x8001, 29}, + {0xc016, 29}, + {0x8001, 30}, + {0xc016, 30}, + {0x8001, 31}, + {0xc016, 31}, + {0x8001, 127}, + {0xc016, 127}, + {0x8001, 220}, + {0xc016, 220}, + {0x8001, 249}, + {0xc016, 249}, + {0xfe, 0}, + {0xff, 0}, + }, + /* 247 */ + { + {0x8002, 28}, + {0x8009, 28}, + {0x8017, 28}, + {0xc028, 28}, + {0x8002, 29}, + {0x8009, 29}, + {0x8017, 29}, + {0xc028, 29}, + {0x8002, 30}, + {0x8009, 30}, + {0x8017, 30}, + {0xc028, 30}, + {0x8002, 31}, + {0x8009, 31}, + {0x8017, 31}, + {0xc028, 31}, + }, + /* 248 */ + { + {0x8003, 28}, + {0x8006, 28}, + {0x800a, 28}, + {0x800f, 28}, + {0x8018, 28}, + {0x801f, 28}, + {0x8029, 28}, + {0xc038, 28}, + {0x8003, 29}, + {0x8006, 29}, + {0x800a, 29}, + {0x800f, 29}, + {0x8018, 29}, + {0x801f, 29}, + {0x8029, 29}, + {0xc038, 29}, + }, + /* 249 */ + { + {0x8003, 30}, + {0x8006, 30}, + {0x800a, 30}, + {0x800f, 30}, + {0x8018, 30}, + {0x801f, 30}, + {0x8029, 30}, + {0xc038, 30}, + {0x8003, 31}, + {0x8006, 31}, + {0x800a, 31}, + {0x800f, 31}, + {0x8018, 31}, + {0x801f, 31}, + {0x8029, 31}, + {0xc038, 31}, + }, + /* 250 */ + { + {0x8002, 127}, + {0x8009, 127}, + {0x8017, 127}, + {0xc028, 127}, + {0x8002, 220}, + {0x8009, 220}, + {0x8017, 220}, + {0xc028, 220}, + {0x8002, 249}, + {0x8009, 249}, + {0x8017, 249}, + {0xc028, 249}, + {0xc000, 10}, + {0xc000, 13}, + {0xc000, 22}, + {0x100, 0}, + }, + /* 251 */ + { + {0x8003, 127}, + {0x8006, 127}, + {0x800a, 127}, + {0x800f, 127}, + {0x8018, 127}, + {0x801f, 127}, + {0x8029, 127}, + {0xc038, 127}, + {0x8003, 220}, + {0x8006, 220}, + {0x800a, 220}, + {0x800f, 220}, + {0x8018, 220}, + {0x801f, 220}, + {0x8029, 220}, + {0xc038, 220}, + }, + /* 252 */ + { + {0x8003, 249}, + {0x8006, 249}, + {0x800a, 249}, + {0x800f, 249}, + {0x8018, 249}, + {0x801f, 249}, + {0x8029, 249}, + {0xc038, 249}, + {0x8001, 10}, + {0xc016, 10}, + {0x8001, 13}, + {0xc016, 13}, + {0x8001, 22}, + {0xc016, 22}, + {0x100, 0}, + {0x100, 0}, + }, + /* 253 */ + { + {0x8002, 10}, + {0x8009, 10}, + {0x8017, 10}, + {0xc028, 10}, + {0x8002, 13}, + {0x8009, 13}, + {0x8017, 13}, + {0xc028, 13}, + {0x8002, 22}, + {0x8009, 22}, + {0x8017, 22}, + {0xc028, 22}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + }, + /* 254 */ + { + {0x8003, 10}, + {0x8006, 10}, + {0x800a, 10}, + {0x800f, 10}, + {0x8018, 10}, + {0x801f, 10}, + {0x8029, 10}, + {0xc038, 10}, + {0x8003, 13}, + {0x8006, 13}, + {0x800a, 13}, + {0x800f, 13}, + {0x8018, 13}, + {0x801f, 13}, + {0x8029, 13}, + {0xc038, 13}, + }, + /* 255 */ + { + {0x8003, 22}, + {0x8006, 22}, + {0x800a, 22}, + {0x800f, 22}, + {0x8018, 22}, + {0x801f, 22}, + {0x8029, 22}, + {0xc038, 22}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + }, + /* 256 */ + { + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + {0x100, 0}, + }, }; diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_range.c b/deps/ngtcp2/nghttp3/lib/nghttp3_range.c index 0ce71480d72fec..af810a2c5929db 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_range.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_range.c @@ -34,11 +34,13 @@ void nghttp3_range_init(nghttp3_range *r, uint64_t begin, uint64_t end) { nghttp3_range nghttp3_range_intersect(const nghttp3_range *a, const nghttp3_range *b) { nghttp3_range r = {0, 0}; - uint64_t begin = nghttp3_max(a->begin, b->begin); - uint64_t end = nghttp3_min(a->end, b->end); + uint64_t begin = nghttp3_max_uint64(a->begin, b->begin); + uint64_t end = nghttp3_min_uint64(a->end, b->end); + if (begin < end) { nghttp3_range_init(&r, begin, end); } + return r; } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_range.h b/deps/ngtcp2/nghttp3/lib/nghttp3_range.h index 20dab69aa62db5..e52e1966b870ec 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_range.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_range.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -59,7 +59,7 @@ uint64_t nghttp3_range_len(const nghttp3_range *r); /* * nghttp3_range_eq returns nonzero if |a| equals |b|, such that - * a->begin == b->begin, and a->end == b->end hold. + * a->begin == b->begin and a->end == b->end hold. */ int nghttp3_range_eq(const nghttp3_range *a, const nghttp3_range *b); @@ -78,4 +78,4 @@ void nghttp3_range_cut(nghttp3_range *left, nghttp3_range *right, */ int nghttp3_range_not_after(const nghttp3_range *a, const nghttp3_range *b); -#endif /* NGHTTP3_RANGE_H */ +#endif /* !defined(NGHTTP3_RANGE_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_rcbuf.h b/deps/ngtcp2/nghttp3/lib/nghttp3_rcbuf.h index f589c377bf6ea7..97f83234ab5a8e 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_rcbuf.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_rcbuf.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -78,4 +78,4 @@ int nghttp3_rcbuf_new2(nghttp3_rcbuf **rcbuf_ptr, const uint8_t *src, */ void nghttp3_rcbuf_del(nghttp3_rcbuf *rcbuf); -#endif /* NGHTTP3_RCBUF_H */ +#endif /* !defined(NGHTTP3_RCBUF_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_ringbuf.c b/deps/ngtcp2/nghttp3/lib/nghttp3_ringbuf.c index 38b5460837190b..7d3ab39bf82a7f 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_ringbuf.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_ringbuf.c @@ -29,29 +29,27 @@ #include #ifdef WIN32 # include -#endif +#endif /* defined(WIN32) */ #include "nghttp3_macro.h" -#if defined(_MSC_VER) && _MSC_VER < 1941 && !defined(__clang__) && \ - (defined(_M_ARM) || defined(_M_ARM64)) -unsigned int __popcnt(unsigned int x) { - unsigned int c = 0; - for (; x; ++c) { - x &= x - 1; - } - return c; +static int ispow2(size_t n) { +#if defined(_MSC_VER) && !defined(__clang__) && \ + (defined(_M_ARM) || (defined(_M_ARM64) && _MSC_VER < 1941)) + return n && !(n & (n - 1)); +#elif defined(WIN32) + return 1 == __popcnt((unsigned int)n); +#else /* !((defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || \ + (defined(_M_ARM64) && _MSC_VER < 1941))) || defined(WIN32)) */ + return 1 == __builtin_popcount((unsigned int)n); +#endif /* !((defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || \ + (defined(_M_ARM64) && _MSC_VER < 1941))) || defined(WIN32)) */ } -#endif int nghttp3_ringbuf_init(nghttp3_ringbuf *rb, size_t nmemb, size_t size, const nghttp3_mem *mem) { if (nmemb) { -#ifdef WIN32 - assert(1 == __popcnt((unsigned int)nmemb)); -#else - assert(1 == __builtin_popcount((unsigned int)nmemb)); -#endif + assert(ispow2(nmemb)); rb->buf = nghttp3_mem_malloc(mem, nmemb * size); if (rb->buf == NULL) { @@ -80,7 +78,7 @@ void nghttp3_ringbuf_free(nghttp3_ringbuf *rb) { void *nghttp3_ringbuf_push_front(nghttp3_ringbuf *rb) { rb->first = (rb->first - 1) & (rb->nmemb - 1); - rb->len = nghttp3_min(rb->nmemb, rb->len + 1); + rb->len = nghttp3_min_size(rb->nmemb, rb->len + 1); return (void *)&rb->buf[rb->first * rb->size]; } @@ -127,11 +125,7 @@ int nghttp3_ringbuf_reserve(nghttp3_ringbuf *rb, size_t nmemb) { return 0; } -#ifdef WIN32 - assert(1 == __popcnt((unsigned int)nmemb)); -#else - assert(1 == __builtin_popcount((unsigned int)nmemb)); -#endif + assert(ispow2(nmemb)); buf = nghttp3_mem_malloc(rb->mem, nmemb * rb->size); if (buf == NULL) { diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_ringbuf.h b/deps/ngtcp2/nghttp3/lib/nghttp3_ringbuf.h index 8e05ec55b24724..b154290a51d5a5 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_ringbuf.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_ringbuf.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -110,4 +110,4 @@ int nghttp3_ringbuf_full(nghttp3_ringbuf *rb); int nghttp3_ringbuf_reserve(nghttp3_ringbuf *rb, size_t nmemb); -#endif /* NGHTTP3_RINGBUF_H */ +#endif /* !defined(NGHTTP3_RINGBUF_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_str.c b/deps/ngtcp2/nghttp3/lib/nghttp3_str.c index 3782aa72cd6e81..fc131404d13754 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_str.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_str.c @@ -36,70 +36,70 @@ uint8_t *nghttp3_cpymem(uint8_t *dest, const uint8_t *src, size_t n) { /* Generated by gendowncasetbl.py */ static const uint8_t DOWNCASE_TBL[] = { - 0 /* NUL */, 1 /* SOH */, 2 /* STX */, 3 /* ETX */, - 4 /* EOT */, 5 /* ENQ */, 6 /* ACK */, 7 /* BEL */, - 8 /* BS */, 9 /* HT */, 10 /* LF */, 11 /* VT */, - 12 /* FF */, 13 /* CR */, 14 /* SO */, 15 /* SI */, - 16 /* DLE */, 17 /* DC1 */, 18 /* DC2 */, 19 /* DC3 */, - 20 /* DC4 */, 21 /* NAK */, 22 /* SYN */, 23 /* ETB */, - 24 /* CAN */, 25 /* EM */, 26 /* SUB */, 27 /* ESC */, - 28 /* FS */, 29 /* GS */, 30 /* RS */, 31 /* US */, - 32 /* SPC */, 33 /* ! */, 34 /* " */, 35 /* # */, - 36 /* $ */, 37 /* % */, 38 /* & */, 39 /* ' */, - 40 /* ( */, 41 /* ) */, 42 /* * */, 43 /* + */, - 44 /* , */, 45 /* - */, 46 /* . */, 47 /* / */, - 48 /* 0 */, 49 /* 1 */, 50 /* 2 */, 51 /* 3 */, - 52 /* 4 */, 53 /* 5 */, 54 /* 6 */, 55 /* 7 */, - 56 /* 8 */, 57 /* 9 */, 58 /* : */, 59 /* ; */, - 60 /* < */, 61 /* = */, 62 /* > */, 63 /* ? */, - 64 /* @ */, 97 /* A */, 98 /* B */, 99 /* C */, - 100 /* D */, 101 /* E */, 102 /* F */, 103 /* G */, - 104 /* H */, 105 /* I */, 106 /* J */, 107 /* K */, - 108 /* L */, 109 /* M */, 110 /* N */, 111 /* O */, - 112 /* P */, 113 /* Q */, 114 /* R */, 115 /* S */, - 116 /* T */, 117 /* U */, 118 /* V */, 119 /* W */, - 120 /* X */, 121 /* Y */, 122 /* Z */, 91 /* [ */, - 92 /* \ */, 93 /* ] */, 94 /* ^ */, 95 /* _ */, - 96 /* ` */, 97 /* a */, 98 /* b */, 99 /* c */, - 100 /* d */, 101 /* e */, 102 /* f */, 103 /* g */, - 104 /* h */, 105 /* i */, 106 /* j */, 107 /* k */, - 108 /* l */, 109 /* m */, 110 /* n */, 111 /* o */, - 112 /* p */, 113 /* q */, 114 /* r */, 115 /* s */, - 116 /* t */, 117 /* u */, 118 /* v */, 119 /* w */, - 120 /* x */, 121 /* y */, 122 /* z */, 123 /* { */, - 124 /* | */, 125 /* } */, 126 /* ~ */, 127 /* DEL */, - 128 /* 0x80 */, 129 /* 0x81 */, 130 /* 0x82 */, 131 /* 0x83 */, - 132 /* 0x84 */, 133 /* 0x85 */, 134 /* 0x86 */, 135 /* 0x87 */, - 136 /* 0x88 */, 137 /* 0x89 */, 138 /* 0x8a */, 139 /* 0x8b */, - 140 /* 0x8c */, 141 /* 0x8d */, 142 /* 0x8e */, 143 /* 0x8f */, - 144 /* 0x90 */, 145 /* 0x91 */, 146 /* 0x92 */, 147 /* 0x93 */, - 148 /* 0x94 */, 149 /* 0x95 */, 150 /* 0x96 */, 151 /* 0x97 */, - 152 /* 0x98 */, 153 /* 0x99 */, 154 /* 0x9a */, 155 /* 0x9b */, - 156 /* 0x9c */, 157 /* 0x9d */, 158 /* 0x9e */, 159 /* 0x9f */, - 160 /* 0xa0 */, 161 /* 0xa1 */, 162 /* 0xa2 */, 163 /* 0xa3 */, - 164 /* 0xa4 */, 165 /* 0xa5 */, 166 /* 0xa6 */, 167 /* 0xa7 */, - 168 /* 0xa8 */, 169 /* 0xa9 */, 170 /* 0xaa */, 171 /* 0xab */, - 172 /* 0xac */, 173 /* 0xad */, 174 /* 0xae */, 175 /* 0xaf */, - 176 /* 0xb0 */, 177 /* 0xb1 */, 178 /* 0xb2 */, 179 /* 0xb3 */, - 180 /* 0xb4 */, 181 /* 0xb5 */, 182 /* 0xb6 */, 183 /* 0xb7 */, - 184 /* 0xb8 */, 185 /* 0xb9 */, 186 /* 0xba */, 187 /* 0xbb */, - 188 /* 0xbc */, 189 /* 0xbd */, 190 /* 0xbe */, 191 /* 0xbf */, - 192 /* 0xc0 */, 193 /* 0xc1 */, 194 /* 0xc2 */, 195 /* 0xc3 */, - 196 /* 0xc4 */, 197 /* 0xc5 */, 198 /* 0xc6 */, 199 /* 0xc7 */, - 200 /* 0xc8 */, 201 /* 0xc9 */, 202 /* 0xca */, 203 /* 0xcb */, - 204 /* 0xcc */, 205 /* 0xcd */, 206 /* 0xce */, 207 /* 0xcf */, - 208 /* 0xd0 */, 209 /* 0xd1 */, 210 /* 0xd2 */, 211 /* 0xd3 */, - 212 /* 0xd4 */, 213 /* 0xd5 */, 214 /* 0xd6 */, 215 /* 0xd7 */, - 216 /* 0xd8 */, 217 /* 0xd9 */, 218 /* 0xda */, 219 /* 0xdb */, - 220 /* 0xdc */, 221 /* 0xdd */, 222 /* 0xde */, 223 /* 0xdf */, - 224 /* 0xe0 */, 225 /* 0xe1 */, 226 /* 0xe2 */, 227 /* 0xe3 */, - 228 /* 0xe4 */, 229 /* 0xe5 */, 230 /* 0xe6 */, 231 /* 0xe7 */, - 232 /* 0xe8 */, 233 /* 0xe9 */, 234 /* 0xea */, 235 /* 0xeb */, - 236 /* 0xec */, 237 /* 0xed */, 238 /* 0xee */, 239 /* 0xef */, - 240 /* 0xf0 */, 241 /* 0xf1 */, 242 /* 0xf2 */, 243 /* 0xf3 */, - 244 /* 0xf4 */, 245 /* 0xf5 */, 246 /* 0xf6 */, 247 /* 0xf7 */, - 248 /* 0xf8 */, 249 /* 0xf9 */, 250 /* 0xfa */, 251 /* 0xfb */, - 252 /* 0xfc */, 253 /* 0xfd */, 254 /* 0xfe */, 255 /* 0xff */, + 0 /* NUL */, 1 /* SOH */, 2 /* STX */, 3 /* ETX */, + 4 /* EOT */, 5 /* ENQ */, 6 /* ACK */, 7 /* BEL */, + 8 /* BS */, 9 /* HT */, 10 /* LF */, 11 /* VT */, + 12 /* FF */, 13 /* CR */, 14 /* SO */, 15 /* SI */, + 16 /* DLE */, 17 /* DC1 */, 18 /* DC2 */, 19 /* DC3 */, + 20 /* DC4 */, 21 /* NAK */, 22 /* SYN */, 23 /* ETB */, + 24 /* CAN */, 25 /* EM */, 26 /* SUB */, 27 /* ESC */, + 28 /* FS */, 29 /* GS */, 30 /* RS */, 31 /* US */, + 32 /* SPC */, 33 /* ! */, 34 /* " */, 35 /* # */, + 36 /* $ */, 37 /* % */, 38 /* & */, 39 /* ' */, + 40 /* ( */, 41 /* ) */, 42 /* * */, 43 /* + */, + 44 /* , */, 45 /* - */, 46 /* . */, 47 /* / */, + 48 /* 0 */, 49 /* 1 */, 50 /* 2 */, 51 /* 3 */, + 52 /* 4 */, 53 /* 5 */, 54 /* 6 */, 55 /* 7 */, + 56 /* 8 */, 57 /* 9 */, 58 /* : */, 59 /* ; */, + 60 /* < */, 61 /* = */, 62 /* > */, 63 /* ? */, + 64 /* @ */, 97 /* A */, 98 /* B */, 99 /* C */, + 100 /* D */, 101 /* E */, 102 /* F */, 103 /* G */, + 104 /* H */, 105 /* I */, 106 /* J */, 107 /* K */, + 108 /* L */, 109 /* M */, 110 /* N */, 111 /* O */, + 112 /* P */, 113 /* Q */, 114 /* R */, 115 /* S */, + 116 /* T */, 117 /* U */, 118 /* V */, 119 /* W */, + 120 /* X */, 121 /* Y */, 122 /* Z */, 91 /* [ */, + 92 /* \ */, 93 /* ] */, 94 /* ^ */, 95 /* _ */, + 96 /* ` */, 97 /* a */, 98 /* b */, 99 /* c */, + 100 /* d */, 101 /* e */, 102 /* f */, 103 /* g */, + 104 /* h */, 105 /* i */, 106 /* j */, 107 /* k */, + 108 /* l */, 109 /* m */, 110 /* n */, 111 /* o */, + 112 /* p */, 113 /* q */, 114 /* r */, 115 /* s */, + 116 /* t */, 117 /* u */, 118 /* v */, 119 /* w */, + 120 /* x */, 121 /* y */, 122 /* z */, 123 /* { */, + 124 /* | */, 125 /* } */, 126 /* ~ */, 127 /* DEL */, + 128 /* 0x80 */, 129 /* 0x81 */, 130 /* 0x82 */, 131 /* 0x83 */, + 132 /* 0x84 */, 133 /* 0x85 */, 134 /* 0x86 */, 135 /* 0x87 */, + 136 /* 0x88 */, 137 /* 0x89 */, 138 /* 0x8a */, 139 /* 0x8b */, + 140 /* 0x8c */, 141 /* 0x8d */, 142 /* 0x8e */, 143 /* 0x8f */, + 144 /* 0x90 */, 145 /* 0x91 */, 146 /* 0x92 */, 147 /* 0x93 */, + 148 /* 0x94 */, 149 /* 0x95 */, 150 /* 0x96 */, 151 /* 0x97 */, + 152 /* 0x98 */, 153 /* 0x99 */, 154 /* 0x9a */, 155 /* 0x9b */, + 156 /* 0x9c */, 157 /* 0x9d */, 158 /* 0x9e */, 159 /* 0x9f */, + 160 /* 0xa0 */, 161 /* 0xa1 */, 162 /* 0xa2 */, 163 /* 0xa3 */, + 164 /* 0xa4 */, 165 /* 0xa5 */, 166 /* 0xa6 */, 167 /* 0xa7 */, + 168 /* 0xa8 */, 169 /* 0xa9 */, 170 /* 0xaa */, 171 /* 0xab */, + 172 /* 0xac */, 173 /* 0xad */, 174 /* 0xae */, 175 /* 0xaf */, + 176 /* 0xb0 */, 177 /* 0xb1 */, 178 /* 0xb2 */, 179 /* 0xb3 */, + 180 /* 0xb4 */, 181 /* 0xb5 */, 182 /* 0xb6 */, 183 /* 0xb7 */, + 184 /* 0xb8 */, 185 /* 0xb9 */, 186 /* 0xba */, 187 /* 0xbb */, + 188 /* 0xbc */, 189 /* 0xbd */, 190 /* 0xbe */, 191 /* 0xbf */, + 192 /* 0xc0 */, 193 /* 0xc1 */, 194 /* 0xc2 */, 195 /* 0xc3 */, + 196 /* 0xc4 */, 197 /* 0xc5 */, 198 /* 0xc6 */, 199 /* 0xc7 */, + 200 /* 0xc8 */, 201 /* 0xc9 */, 202 /* 0xca */, 203 /* 0xcb */, + 204 /* 0xcc */, 205 /* 0xcd */, 206 /* 0xce */, 207 /* 0xcf */, + 208 /* 0xd0 */, 209 /* 0xd1 */, 210 /* 0xd2 */, 211 /* 0xd3 */, + 212 /* 0xd4 */, 213 /* 0xd5 */, 214 /* 0xd6 */, 215 /* 0xd7 */, + 216 /* 0xd8 */, 217 /* 0xd9 */, 218 /* 0xda */, 219 /* 0xdb */, + 220 /* 0xdc */, 221 /* 0xdd */, 222 /* 0xde */, 223 /* 0xdf */, + 224 /* 0xe0 */, 225 /* 0xe1 */, 226 /* 0xe2 */, 227 /* 0xe3 */, + 228 /* 0xe4 */, 229 /* 0xe5 */, 230 /* 0xe6 */, 231 /* 0xe7 */, + 232 /* 0xe8 */, 233 /* 0xe9 */, 234 /* 0xea */, 235 /* 0xeb */, + 236 /* 0xec */, 237 /* 0xed */, 238 /* 0xee */, 239 /* 0xef */, + 240 /* 0xf0 */, 241 /* 0xf1 */, 242 /* 0xf2 */, 243 /* 0xf3 */, + 244 /* 0xf4 */, 245 /* 0xf5 */, 246 /* 0xf6 */, 247 /* 0xf7 */, + 248 /* 0xf8 */, 249 /* 0xf9 */, 250 /* 0xfa */, 251 /* 0xfb */, + 252 /* 0xfc */, 253 /* 0xfd */, 254 /* 0xfe */, 255 /* 0xff */, }; void nghttp3_downcase(uint8_t *s, size_t len) { diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_str.h b/deps/ngtcp2/nghttp3/lib/nghttp3_str.h index 19c1d2c71b559b..280749a3a9a3d9 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_str.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_str.h @@ -29,7 +29,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -37,4 +37,4 @@ uint8_t *nghttp3_cpymem(uint8_t *dest, const uint8_t *src, size_t n); void nghttp3_downcase(uint8_t *s, size_t len); -#endif /* NGHTTP3_STR_H */ +#endif /* !defined(NGHTTP3_STR_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_stream.c b/deps/ngtcp2/nghttp3/lib/nghttp3_stream.c index 6188a141dd123b..328cddd488fd6f 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_stream.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_stream.c @@ -73,7 +73,6 @@ int nghttp3_stream_new(nghttp3_stream **pstream, int64_t stream_id, stream->qpack_blocked_pe.index = NGHTTP3_PQ_BAD_INDEX; stream->mem = mem; - stream->tx.offset = 0; stream->rx.http.status_code = -1; stream->rx.http.content_length = -1; stream->rx.http.pri.urgency = NGHTTP3_DEFAULT_URGENCY; @@ -180,48 +179,44 @@ void nghttp3_stream_read_state_reset(nghttp3_stream_read_state *rstate) { } nghttp3_ssize nghttp3_read_varint(nghttp3_varint_read_state *rvint, - const uint8_t *src, size_t srclen, int fin) { - size_t nread = 0; - size_t n; - size_t i; + const uint8_t *begin, const uint8_t *end, + int fin) { + const uint8_t *orig_begin = begin; + size_t len; - assert(srclen > 0); + assert(begin != end); if (rvint->left == 0) { assert(rvint->acc == 0); - rvint->left = nghttp3_get_varintlen(src); - if (rvint->left <= srclen) { - rvint->acc = nghttp3_get_varint(&nread, src); - rvint->left = 0; - return (nghttp3_ssize)nread; + len = nghttp3_get_varintlen(begin); + if (len <= (size_t)(end - begin)) { + nghttp3_get_varint(&rvint->acc, begin); + return (nghttp3_ssize)len; } if (fin) { return NGHTTP3_ERR_INVALID_ARGUMENT; } - rvint->acc = nghttp3_get_varint_fb(src); - nread = 1; - ++src; - --srclen; - --rvint->left; + rvint->acc = nghttp3_get_varint_fb(begin++); + rvint->left = len - 1; } - n = nghttp3_min(rvint->left, srclen); + len = nghttp3_min_size(rvint->left, (size_t)(end - begin)); + end = begin + len; - for (i = 0; i < n; ++i) { - rvint->acc = (rvint->acc << 8) + src[i]; + for (; begin != end;) { + rvint->acc = (rvint->acc << 8) + *begin++; } - rvint->left -= n; - nread += n; + rvint->left -= len; if (fin && rvint->left) { return NGHTTP3_ERR_INVALID_ARGUMENT; } - return (nghttp3_ssize)nread; + return (nghttp3_ssize)(begin - orig_begin); } int nghttp3_stream_frq_add(nghttp3_stream *stream, @@ -231,7 +226,8 @@ int nghttp3_stream_frq_add(nghttp3_stream *stream, int rv; if (nghttp3_ringbuf_full(frq)) { - size_t nlen = nghttp3_max(NGHTTP3_MIN_RBLEN, nghttp3_ringbuf_len(frq) * 2); + size_t nlen = + nghttp3_max_size(NGHTTP3_MIN_RBLEN, nghttp3_ringbuf_len(frq) * 2); rv = nghttp3_ringbuf_reserve(frq, nlen); if (rv != 0) { return rv; @@ -444,8 +440,8 @@ int nghttp3_stream_write_headers(nghttp3_stream *stream, assert(conn); return nghttp3_stream_write_header_block( - stream, &conn->qenc, conn->tx.qenc, &conn->tx.qpack.rbuf, - &conn->tx.qpack.ebuf, NGHTTP3_FRAME_HEADERS, fr->nva, fr->nvlen); + stream, &conn->qenc, conn->tx.qenc, &conn->tx.qpack.rbuf, + &conn->tx.qpack.ebuf, NGHTTP3_FRAME_HEADERS, fr->nva, fr->nvlen); } int nghttp3_stream_write_header_block(nghttp3_stream *stream, @@ -738,7 +734,7 @@ int nghttp3_stream_outq_add(nghttp3_stream *stream, } if (nghttp3_ringbuf_full(outq)) { - size_t nlen = nghttp3_max(NGHTTP3_MIN_RBLEN, len * 2); + size_t nlen = nghttp3_max_size(NGHTTP3_MIN_RBLEN, len * 2); rv = nghttp3_ringbuf_reserve(outq, nlen); if (rv != 0) { return rv; @@ -770,8 +766,8 @@ int nghttp3_stream_ensure_chunk(nghttp3_stream *stream, size_t need) { ; if (n == NGHTTP3_STREAM_MIN_CHUNK_SIZE) { - p = (uint8_t *)nghttp3_objalloc_chunk_len_get(stream->out_chunk_objalloc, - n); + p = + (uint8_t *)nghttp3_objalloc_chunk_len_get(stream->out_chunk_objalloc, n); } else { p = nghttp3_mem_malloc(stream->mem, n); } @@ -780,7 +776,7 @@ int nghttp3_stream_ensure_chunk(nghttp3_stream *stream, size_t need) { } if (nghttp3_ringbuf_full(chunks)) { - size_t nlen = nghttp3_max(NGHTTP3_MIN_RBLEN, len * 2); + size_t nlen = nghttp3_max_size(NGHTTP3_MIN_RBLEN, len * 2); rv = nghttp3_ringbuf_reserve(chunks, nlen); if (rv != 0) { return rv; @@ -928,9 +924,8 @@ static void stream_pop_outq_entry(nghttp3_stream *stream, nghttp3_ringbuf_pop_front(&stream->outq); } -int nghttp3_stream_add_ack_offset(nghttp3_stream *stream, uint64_t n) { +int nghttp3_stream_update_ack_offset(nghttp3_stream *stream, uint64_t offset) { nghttp3_ringbuf *outq = &stream->outq; - uint64_t offset = stream->ack_offset + n; size_t buflen; size_t npopped = 0; uint64_t nack; @@ -941,24 +936,25 @@ int nghttp3_stream_add_ack_offset(nghttp3_stream *stream, uint64_t n) { tbuf = nghttp3_ringbuf_get(outq, 0); buflen = nghttp3_buf_len(&tbuf->buf); - if (tbuf->type == NGHTTP3_BUF_TYPE_ALIEN) { - nack = nghttp3_min(offset, (uint64_t)buflen) - stream->ack_done; - if (stream->callbacks.acked_data) { - rv = stream->callbacks.acked_data(stream, stream->node.id, nack, - stream->user_data); - if (rv != 0) { - return NGHTTP3_ERR_CALLBACK_FAILURE; - } + /* For NGHTTP3_BUF_TYPE_ALIEN, we never add 0 length buffer. */ + if (tbuf->type == NGHTTP3_BUF_TYPE_ALIEN && stream->ack_offset < offset && + stream->callbacks.acked_data) { + nack = nghttp3_min_uint64(offset, stream->ack_base + buflen) - + stream->ack_offset; + + rv = stream->callbacks.acked_data(stream, stream->node.id, nack, + stream->user_data); + if (rv != 0) { + return NGHTTP3_ERR_CALLBACK_FAILURE; } - stream->ack_done += nack; } - if (offset >= buflen) { + if (offset >= stream->ack_base + buflen) { stream_pop_outq_entry(stream, tbuf); - offset -= buflen; + stream->ack_base += buflen; + stream->ack_offset = stream->ack_base; ++npopped; - stream->ack_done = 0; if (stream->outq_idx + 1 == npopped) { stream->outq_offset = 0; @@ -996,7 +992,7 @@ int nghttp3_stream_buffer_data(nghttp3_stream *stream, const uint8_t *data, if (len) { buf = nghttp3_ringbuf_get(inq, len - 1); bufleft = nghttp3_buf_left(buf); - nwrite = nghttp3_min(datalen, bufleft); + nwrite = nghttp3_min_size(datalen, bufleft); buf->last = nghttp3_cpymem(buf->last, data, nwrite); data += nwrite; datalen -= nwrite; @@ -1005,7 +1001,7 @@ int nghttp3_stream_buffer_data(nghttp3_stream *stream, const uint8_t *data, for (; datalen;) { if (nghttp3_ringbuf_full(inq)) { size_t nlen = - nghttp3_max(NGHTTP3_MIN_RBLEN, nghttp3_ringbuf_len(inq) * 2); + nghttp3_max_size(NGHTTP3_MIN_RBLEN, nghttp3_ringbuf_len(inq) * 2); rv = nghttp3_ringbuf_reserve(inq, nlen); if (rv != 0) { return rv; @@ -1020,7 +1016,7 @@ int nghttp3_stream_buffer_data(nghttp3_stream *stream, const uint8_t *data, buf = nghttp3_ringbuf_push_back(inq); nghttp3_buf_wrap_init(buf, rawbuf, 16384); bufleft = nghttp3_buf_left(buf); - nwrite = nghttp3_min(datalen, bufleft); + nwrite = nghttp3_min_size(datalen, bufleft); buf->last = nghttp3_cpymem(buf->last, data, nwrite); data += nwrite; datalen -= nwrite; diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_stream.h b/deps/ngtcp2/nghttp3/lib/nghttp3_stream.h index 03a57697b232b3..7d296febf9135f 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_stream.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_stream.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -88,8 +88,8 @@ typedef struct nghttp3_varint_read_state { typedef struct nghttp3_stream_read_state { nghttp3_varint_read_state rvint; nghttp3_frame fr; - int state; int64_t left; + int state; } nghttp3_stream_read_state; /* NGHTTP3_STREAM_FLAG_NONE indicates that no flag is set. */ @@ -186,9 +186,6 @@ typedef struct nghttp3_stream_callbacks { } nghttp3_stream_callbacks; typedef struct nghttp3_http_state { - /* status_code is HTTP status code received. This field is used - if connection is initialized as client. */ - int32_t status_code; /* content_length is the value of received content-length header field. */ int64_t content_length; @@ -196,6 +193,9 @@ typedef struct nghttp3_http_state { far. */ int64_t recv_content_length; nghttp3_pri pri; + /* status_code is HTTP status code received. This field is used + if connection is initialized as client. */ + int32_t status_code; uint32_t flags; } nghttp3_http_state; @@ -226,13 +226,12 @@ struct nghttp3_stream { /* outq_offset is write offset relative to the element at outq_idx in outq. */ uint64_t outq_offset; - /* ack_offset is offset acknowledged by peer relative to the first - element in outq. */ + /* ack_base is the number of bytes acknowledged by a remote + endpoint where the first element in outq is positioned at. */ + uint64_t ack_base; + /* ack_offset is the number of bytes acknowledged by a remote + endpoint so far. */ uint64_t ack_offset; - /* ack_done is the number of bytes notified to an application that - they are acknowledged inside the first outq element if it is of - type NGHTTP3_BUF_TYPE_ALIEN. */ - uint64_t ack_done; uint64_t unscheduled_nwrite; nghttp3_stream_type type; nghttp3_stream_read_state rstate; @@ -283,7 +282,8 @@ void nghttp3_varint_read_state_reset(nghttp3_varint_read_state *rvint); void nghttp3_stream_read_state_reset(nghttp3_stream_read_state *rstate); nghttp3_ssize nghttp3_read_varint(nghttp3_varint_read_state *rvint, - const uint8_t *src, size_t srclen, int fin); + const uint8_t *begin, const uint8_t *end, + int fin); int nghttp3_stream_frq_add(nghttp3_stream *stream, const nghttp3_frame_entry *frent); @@ -336,7 +336,11 @@ void nghttp3_stream_add_outq_offset(nghttp3_stream *stream, size_t n); */ int nghttp3_stream_outq_write_done(nghttp3_stream *stream); -int nghttp3_stream_add_ack_offset(nghttp3_stream *stream, uint64_t n); +/* + * nghttp2_stream_update_ack_offset updates the last acknowledged + * offset to |offset|. + */ +int nghttp3_stream_update_ack_offset(nghttp3_stream *stream, uint64_t offset); /* * nghttp3_stream_is_active returns nonzero if |stream| is active. In @@ -390,4 +394,4 @@ int nghttp3_client_stream_uni(int64_t stream_id); */ int nghttp3_server_stream_uni(int64_t stream_id); -#endif /* NGHTTP3_STREAM_H */ +#endif /* !defined(NGHTTP3_STREAM_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_tnode.c b/deps/ngtcp2/nghttp3/lib/nghttp3_tnode.c index d9c5e598699512..eae847e7a9236a 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_tnode.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_tnode.c @@ -73,8 +73,8 @@ int nghttp3_tnode_schedule(nghttp3_tnode *tnode, nghttp3_pq *pq, if (tnode->pe.index == NGHTTP3_PQ_BAD_INDEX) { tnode->cycle = - pq_get_first_cycle(pq) + - ((nwrite == 0 || !tnode->pri.inc) ? 0 : nghttp3_max(1, penalty)); + pq_get_first_cycle(pq) + + ((nwrite == 0 || !tnode->pri.inc) ? 0 : nghttp3_max_uint64(1, penalty)); } else if (nwrite > 0) { if (!tnode->pri.inc || nghttp3_pq_size(pq) == 1) { return 0; @@ -82,7 +82,7 @@ int nghttp3_tnode_schedule(nghttp3_tnode *tnode, nghttp3_pq *pq, nghttp3_pq_remove(pq, &tnode->pe); tnode->pe.index = NGHTTP3_PQ_BAD_INDEX; - tnode->cycle += nghttp3_max(1, penalty); + tnode->cycle += nghttp3_max_uint64(1, penalty); } else { return 0; } diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_tnode.h b/deps/ngtcp2/nghttp3/lib/nghttp3_tnode.h index 1abc1e62519381..c13af52fdc6bc7 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_tnode.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_tnode.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -63,4 +63,4 @@ int nghttp3_tnode_schedule(nghttp3_tnode *tnode, nghttp3_pq *pq, */ int nghttp3_tnode_is_scheduled(nghttp3_tnode *tnode); -#endif /* NGHTTP3_TNODE_H */ +#endif /* !defined(NGHTTP3_TNODE_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_unreachable.c b/deps/ngtcp2/nghttp3/lib/nghttp3_unreachable.c index 6fea89b802b12d..8adeeb4931dc57 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_unreachable.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_unreachable.c @@ -29,11 +29,11 @@ #include #ifdef HAVE_UNISTD_H # include -#endif /* HAVE_UNISTD_H */ +#endif /* defined(HAVE_UNISTD_H) */ #include #ifdef WIN32 # include -#endif /* WIN32 */ +#endif /* defined(WIN32) */ void nghttp3_unreachable_fail(const char *file, int line, const char *func) { char *buf; @@ -62,9 +62,9 @@ void nghttp3_unreachable_fail(const char *file, int line, const char *func) { #ifndef WIN32 while (write(STDERR_FILENO, buf, (size_t)rv) == -1 && errno == EINTR) ; -#else /* WIN32 */ +#else /* defined(WIN32) */ _write(_fileno(stderr), buf, (unsigned int)rv); -#endif /* WIN32 */ +#endif /* defined(WIN32) */ free(buf); diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_unreachable.h b/deps/ngtcp2/nghttp3/lib/nghttp3_unreachable.h index 6360f52d3aa857..c609d7ed72f3cb 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_unreachable.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_unreachable.h @@ -28,26 +28,26 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include #ifdef __FILE_NAME__ # define NGHTTP3_FILE_NAME __FILE_NAME__ -#else /* !__FILE_NAME__ */ +#else /* !defined(__FILE_NAME__) */ # define NGHTTP3_FILE_NAME "(file)" -#endif /* !__FILE_NAME__ */ +#endif /* !defined(__FILE_NAME__) */ #define nghttp3_unreachable() \ nghttp3_unreachable_fail(NGHTTP3_FILE_NAME, __LINE__, __func__) #ifdef _MSC_VER __declspec(noreturn) -#endif /* _MSC_VER */ +#endif /* defined(_MSC_VER) */ void nghttp3_unreachable_fail(const char *file, int line, const char *func) #ifndef _MSC_VER __attribute__((noreturn)) -#endif /* !_MSC_VER */ +#endif /* !defined(_MSC_VER) */ ; -#endif /* NGHTTP3_UNREACHABLE_H */ +#endif /* !defined(NGHTTP3_UNREACHABLE_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_vec.h b/deps/ngtcp2/nghttp3/lib/nghttp3_vec.h index 473d1467310062..f36eabc1052621 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_vec.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_vec.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -38,4 +38,4 @@ */ int64_t nghttp3_vec_len_varint(const nghttp3_vec *vec, size_t n); -#endif /* NGHTTP3_VEC_H */ +#endif /* !defined(NGHTTP3_VEC_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_version.c b/deps/ngtcp2/nghttp3/lib/nghttp3_version.c index c460cc72835b1d..939821d84eac3d 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_version.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_version.c @@ -24,7 +24,7 @@ */ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include diff --git a/deps/ngtcp2/nghttp3/lib/sfparse/COPYING b/deps/ngtcp2/nghttp3/lib/sfparse/COPYING new file mode 100644 index 00000000000000..8212d82d83ab74 --- /dev/null +++ b/deps/ngtcp2/nghttp3/lib/sfparse/COPYING @@ -0,0 +1,22 @@ +The MIT License + +Copyright (c) 2023 sfparse contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/ngtcp2/nghttp3/lib/sfparse.c b/deps/ngtcp2/nghttp3/lib/sfparse/sfparse.c similarity index 55% rename from deps/ngtcp2/nghttp3/lib/sfparse.c rename to deps/ngtcp2/nghttp3/lib/sfparse/sfparse.c index efa2850c9d661d..d0328cf40c21ea 100644 --- a/deps/ngtcp2/nghttp3/lib/sfparse.c +++ b/deps/ngtcp2/nghttp3/lib/sfparse/sfparse.c @@ -135,6 +135,70 @@ UCALPHA_CASES: \ LCALPHA_CASES +#define TOKEN_CASES \ + case '!': \ + case '#': \ + case '$': \ + case '%': \ + case '&': \ + case '\'': \ + case '*': \ + case '+': \ + case '-': \ + case '.': \ + case '/': \ + DIGIT_CASES: \ + case ':': \ + UCALPHA_CASES: \ + case '^': \ + case '_': \ + case '`': \ + LCALPHA_CASES: \ + case '|': \ + case '~' + +#define LCHEXALPHA_CASES \ + case 'a': \ + case 'b': \ + case 'c': \ + case 'd': \ + case 'e': \ + case 'f' + +#define X00_1F_CASES \ + case 0x00: \ + case 0x01: \ + case 0x02: \ + case 0x03: \ + case 0x04: \ + case 0x05: \ + case 0x06: \ + case 0x07: \ + case 0x08: \ + case 0x09: \ + case 0x0a: \ + case 0x0b: \ + case 0x0c: \ + case 0x0d: \ + case 0x0e: \ + case 0x0f: \ + case 0x10: \ + case 0x11: \ + case 0x12: \ + case 0x13: \ + case 0x14: \ + case 0x15: \ + case 0x16: \ + case 0x17: \ + case 0x18: \ + case 0x19: \ + case 0x1a: \ + case 0x1b: \ + case 0x1c: \ + case 0x1d: \ + case 0x1e: \ + case 0x1f + #define X20_21_CASES \ case ' ': \ case '!' @@ -175,6 +239,137 @@ case '}': \ case '~' +#define X7F_FF_CASES \ + case 0x7f: \ + case 0x80: \ + case 0x81: \ + case 0x82: \ + case 0x83: \ + case 0x84: \ + case 0x85: \ + case 0x86: \ + case 0x87: \ + case 0x88: \ + case 0x89: \ + case 0x8a: \ + case 0x8b: \ + case 0x8c: \ + case 0x8d: \ + case 0x8e: \ + case 0x8f: \ + case 0x90: \ + case 0x91: \ + case 0x92: \ + case 0x93: \ + case 0x94: \ + case 0x95: \ + case 0x96: \ + case 0x97: \ + case 0x98: \ + case 0x99: \ + case 0x9a: \ + case 0x9b: \ + case 0x9c: \ + case 0x9d: \ + case 0x9e: \ + case 0x9f: \ + case 0xa0: \ + case 0xa1: \ + case 0xa2: \ + case 0xa3: \ + case 0xa4: \ + case 0xa5: \ + case 0xa6: \ + case 0xa7: \ + case 0xa8: \ + case 0xa9: \ + case 0xaa: \ + case 0xab: \ + case 0xac: \ + case 0xad: \ + case 0xae: \ + case 0xaf: \ + case 0xb0: \ + case 0xb1: \ + case 0xb2: \ + case 0xb3: \ + case 0xb4: \ + case 0xb5: \ + case 0xb6: \ + case 0xb7: \ + case 0xb8: \ + case 0xb9: \ + case 0xba: \ + case 0xbb: \ + case 0xbc: \ + case 0xbd: \ + case 0xbe: \ + case 0xbf: \ + case 0xc0: \ + case 0xc1: \ + case 0xc2: \ + case 0xc3: \ + case 0xc4: \ + case 0xc5: \ + case 0xc6: \ + case 0xc7: \ + case 0xc8: \ + case 0xc9: \ + case 0xca: \ + case 0xcb: \ + case 0xcc: \ + case 0xcd: \ + case 0xce: \ + case 0xcf: \ + case 0xd0: \ + case 0xd1: \ + case 0xd2: \ + case 0xd3: \ + case 0xd4: \ + case 0xd5: \ + case 0xd6: \ + case 0xd7: \ + case 0xd8: \ + case 0xd9: \ + case 0xda: \ + case 0xdb: \ + case 0xdc: \ + case 0xdd: \ + case 0xde: \ + case 0xdf: \ + case 0xe0: \ + case 0xe1: \ + case 0xe2: \ + case 0xe3: \ + case 0xe4: \ + case 0xe5: \ + case 0xe6: \ + case 0xe7: \ + case 0xe8: \ + case 0xe9: \ + case 0xea: \ + case 0xeb: \ + case 0xec: \ + case 0xed: \ + case 0xee: \ + case 0xef: \ + case 0xf0: \ + case 0xf1: \ + case 0xf2: \ + case 0xf3: \ + case 0xf4: \ + case 0xf5: \ + case 0xf6: \ + case 0xf7: \ + case 0xf8: \ + case 0xf9: \ + case 0xfa: \ + case 0xfb: \ + case 0xfc: \ + case 0xfd: \ + case 0xfe: \ + case 0xff + static int is_ws(uint8_t c) { switch (c) { case ' ': @@ -431,25 +626,7 @@ static int parser_token(sf_parser *sfp, sf_value *dest) { for (; !parser_eof(sfp); ++sfp->pos) { switch (*sfp->pos) { - case '!': - case '#': - case '$': - case '%': - case '&': - case '\'': - case '*': - case '+': - case '-': - case '.': - case '^': - case '_': - case '`': - case '|': - case '~': - case ':': - case '/': - DIGIT_CASES: - ALPHA_CASES: + TOKEN_CASES: continue; } @@ -487,58 +664,30 @@ static int parser_byteseq(sf_parser *sfp, sf_value *dest) { case 1: return SF_ERR_PARSE_ERROR; case 2: - switch (*(sfp->pos - 1)) { - case 'A': - case 'Q': - case 'g': - case 'w': - break; - default: - return SF_ERR_PARSE_ERROR; - } - ++sfp->pos; - if (parser_eof(sfp) || *sfp->pos != '=') { + if (parser_eof(sfp)) { return SF_ERR_PARSE_ERROR; } + if (*sfp->pos == '=') { + ++sfp->pos; + } + break; case 3: - switch (*(sfp->pos - 1)) { - case 'A': - case 'E': - case 'I': - case 'M': - case 'Q': - case 'U': - case 'Y': - case 'c': - case 'g': - case 'k': - case 'o': - case 's': - case 'w': - case '0': - case '4': - case '8': - break; - default: - return SF_ERR_PARSE_ERROR; - } + ++sfp->pos; break; } - ++sfp->pos; - if (parser_eof(sfp) || *sfp->pos != ':') { return SF_ERR_PARSE_ERROR; } goto fin; case ':': - if ((sfp->pos - base) & 0x3) { + if (((sfp->pos - base) & 0x3) == 1) { return SF_ERR_PARSE_ERROR; } @@ -599,6 +748,169 @@ static int parser_boolean(sf_parser *sfp, sf_value *dest) { return 0; } +static int pctdecode(uint8_t *pc, const uint8_t **ppos) { + uint8_t c, b = **ppos; + + switch (b) { + DIGIT_CASES: + c = (uint8_t)((b - '0') << 4); + + break; + LCHEXALPHA_CASES: + c = (uint8_t)((b - 'a' + 10) << 4); + + break; + default: + return -1; + } + + b = *++*ppos; + + switch (b) { + DIGIT_CASES: + c |= (uint8_t)(b - '0'); + + break; + LCHEXALPHA_CASES: + c |= (uint8_t)(b - 'a' + 10); + + break; + default: + return -1; + } + + *pc = c; + ++*ppos; + + return 0; +} + +/* Start of utf8 dfa */ +/* Copyright (c) 2008-2010 Bjoern Hoehrmann + * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. + * + * Copyright (c) 2008-2009 Bjoern Hoehrmann + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define UTF8_ACCEPT 0 +#define UTF8_REJECT 12 + +/* clang-format off */ +static const uint8_t utf8d[] = { + /* + * The first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks. + */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + + /* + * The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state. + */ + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, +}; +/* clang-format on */ + +static void utf8_decode(uint32_t *state, uint8_t byte) { + *state = utf8d[256 + *state + utf8d[byte]]; +} + +/* End of utf8 dfa */ + +static int parser_dispstring(sf_parser *sfp, sf_value *dest) { + const uint8_t *base; + uint8_t c; + uint32_t utf8state = UTF8_ACCEPT; + + assert('%' == *sfp->pos); + + ++sfp->pos; + + if (parser_eof(sfp) || *sfp->pos != '"') { + return SF_ERR_PARSE_ERROR; + } + + base = ++sfp->pos; + + for (; !parser_eof(sfp);) { + switch (*sfp->pos) { + X00_1F_CASES: + X7F_FF_CASES: + return SF_ERR_PARSE_ERROR; + case '%': + ++sfp->pos; + + if (sfp->pos + 2 > sfp->end) { + return SF_ERR_PARSE_ERROR; + } + + if (pctdecode(&c, &sfp->pos) != 0) { + return SF_ERR_PARSE_ERROR; + } + + utf8_decode(&utf8state, c); + if (utf8state == UTF8_REJECT) { + return SF_ERR_PARSE_ERROR; + } + + break; + case '"': + if (utf8state != UTF8_ACCEPT) { + return SF_ERR_PARSE_ERROR; + } + + if (dest) { + dest->type = SF_TYPE_DISPSTRING; + dest->flags = SF_VALUE_FLAG_NONE; + dest->vec.len = (size_t)(sfp->pos - base); + dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; + } + + ++sfp->pos; + + return 0; + default: + if (utf8state != UTF8_ACCEPT) { + return SF_ERR_PARSE_ERROR; + } + + ++sfp->pos; + } + } + + return SF_ERR_PARSE_ERROR; +} + static int parser_bare_item(sf_parser *sfp, sf_value *dest) { switch (*sfp->pos) { case '"': @@ -615,6 +927,8 @@ static int parser_bare_item(sf_parser *sfp, sf_value *dest) { case '*': ALPHA_CASES: return parser_token(sfp, dest); + case '%': + return parser_dispstring(sfp, dest); default: return SF_ERR_PARSE_ERROR; } @@ -1037,7 +1351,7 @@ void sf_unescape(sf_vec *dest, const sf_vec *src) { size_t len, slen; if (src->len == 0) { - *dest = *src; + dest->len = 0; return; } @@ -1049,16 +1363,12 @@ void sf_unescape(sf_vec *dest, const sf_vec *src) { for (;;) { q = memchr(p, '\\', len); if (q == NULL) { - if (len == src->len) { - *dest = *src; - - return; - } - memcpy(o, p, len); o += len; - break; + dest->len = (size_t)(o - dest->base); + + return; } slen = (size_t)(q - p); @@ -1069,8 +1379,6 @@ void sf_unescape(sf_vec *dest, const sf_vec *src) { *o++ = *p++; len -= slen + 2; } - - dest->len = (size_t)(o - dest->base); } void sf_base64decode(sf_vec *dest, const sf_vec *src) { @@ -1093,20 +1401,22 @@ void sf_base64decode(sf_vec *dest, const sf_vec *src) { uint8_t *o; const uint8_t *p, *end; uint32_t n; - size_t i; + size_t i, left; int idx; - assert((src->len & 0x3) == 0); - if (src->len == 0) { - *dest = *src; + dest->len = 0; return; } o = dest->base; p = src->base; - end = src->base + src->len; + left = src->len & 0x3; + if (left == 0 && src->base[src->len - 1] == '=') { + left = 4; + } + end = src->base + src->len - left; for (; p != end;) { n = 0; @@ -1114,33 +1424,94 @@ void sf_base64decode(sf_vec *dest, const sf_vec *src) { for (i = 1; i <= 4; ++i, ++p) { idx = index_tbl[*p]; - if (idx == -1) { - assert(i > 2); + assert(idx != -1); - if (i == 3) { - assert(*p == '=' && *(p + 1) == '=' && p + 2 == end); + n += (uint32_t)(idx << (24 - i * 6)); + } - *o++ = (uint8_t)(n >> 16); + *o++ = (uint8_t)(n >> 16); + *o++ = (n >> 8) & 0xffu; + *o++ = n & 0xffu; + } - goto fin; - } + switch (left) { + case 0: + goto fin; + case 1: + assert(0); + abort(); + case 3: + if (src->base[src->len - 1] == '=') { + left = 2; + } - assert(*p == '=' && p + 1 == end); + break; + case 4: + assert('=' == src->base[src->len - 1]); - *o++ = (uint8_t)(n >> 16); - *o++ = (n >> 8) & 0xffu; + if (src->base[src->len - 2] == '=') { + left = 2; + } else { + left = 3; + } - goto fin; - } + break; + } - n += (uint32_t)(idx << (24 - i * 6)); - } + switch (left) { + case 2: + *o = (uint8_t)(index_tbl[*p++] << 2); + *o++ |= (uint8_t)(index_tbl[*p++] >> 4); - *o++ = (uint8_t)(n >> 16); + break; + case 3: + n = (uint32_t)(index_tbl[*p++] << 10); + n += (uint32_t)(index_tbl[*p++] << 4); + n += (uint32_t)(index_tbl[*p++] >> 2); *o++ = (n >> 8) & 0xffu; *o++ = n & 0xffu; + + break; } fin: dest->len = (size_t)(o - dest->base); } + +void sf_pctdecode(sf_vec *dest, const sf_vec *src) { + const uint8_t *p, *q; + uint8_t *o; + size_t len, slen; + + if (src->len == 0) { + dest->len = 0; + + return; + } + + o = dest->base; + p = src->base; + len = src->len; + + for (;;) { + q = memchr(p, '%', len); + if (q == NULL) { + memcpy(o, p, len); + o += len; + + dest->len = (size_t)(o - dest->base); + + return; + } + + slen = (size_t)(q - p); + memcpy(o, p, slen); + o += slen; + + p = q + 1; + + pctdecode(o++, &p); + + len -= slen + 3; + } +} diff --git a/deps/ngtcp2/nghttp3/lib/sfparse.h b/deps/ngtcp2/nghttp3/lib/sfparse/sfparse.h similarity index 91% rename from deps/ngtcp2/nghttp3/lib/sfparse.h rename to deps/ngtcp2/nghttp3/lib/sfparse/sfparse.h index 1474db1429acea..01cc947d4d61bc 100644 --- a/deps/ngtcp2/nghttp3/lib/sfparse.h +++ b/deps/ngtcp2/nghttp3/lib/sfparse/sfparse.h @@ -85,7 +85,11 @@ typedef enum sf_type { /** * :enum:`SF_TYPE_DATE` indicates date type. */ - SF_TYPE_DATE + SF_TYPE_DATE, + /** + * :enum:`SF_TYPE_DISPSTRING` indicates display string type. + */ + SF_TYPE_DISPSTRING } sf_type; /** @@ -197,8 +201,8 @@ typedef struct sf_value { /** * :member:`vec` contains sequence of bytes if :member:`type` is * either :enum:`sf_type.SF_TYPE_STRING`, - * :enum:`sf_type.SF_TYPE_TOKEN`, or - * :enum:`sf_type.SF_TYPE_BYTESEQ`. + * :enum:`sf_type.SF_TYPE_TOKEN`, :enum:`sf_type.SF_TYPE_BYTESEQ`, + * or :enum:`sf_type.SF_TYPE_DISPSTRING`. * * For :enum:`sf_type.SF_TYPE_STRING`, this field contains one or * more escaped characters if :member:`flags` has @@ -209,6 +213,10 @@ typedef struct sf_value { * encoded string. To decode this byte string, use * `sf_base64decode`. * + * For :enum:`sf_type.SF_TYPE_DISPSTRING`, this field may contain + * percent-encoded UTF-8 byte sequences. To decode it, use + * `sf_pctdecode`. + * * If :member:`vec.len ` == 0, :member:`vec.base * ` is guaranteed to be NULL. */ @@ -372,10 +380,6 @@ int sf_parser_inner_list(sf_parser *sfp, sf_value *dest); * :member:`dest->base ` must point to the buffer that * has sufficient space to store the unescaped string. * - * If there is no escape character in |src|, |*src| is assigned to - * |*dest|. This includes the case that :member:`src->len - * ` == 0. - * * This function sets the length of unescaped string to * :member:`dest->len `. */ @@ -394,14 +398,29 @@ void sf_unescape(sf_vec *dest, const sf_vec *src); * :member:`dest->base ` must point to the buffer that * has sufficient space to store the decoded byte string. * - * If :member:`src->len ` == 0, |*src| is assigned to - * |*dest|. - * * This function sets the length of decoded byte string to * :member:`dest->len `. */ void sf_base64decode(sf_vec *dest, const sf_vec *src); +/** + * @function + * + * `sf_pctdecode` decodes percent-encoded string |src| and writes the + * result into |dest|. |src| should be the pointer to + * :member:`sf_value.vec` of type :enum:`sf_type.SF_TYPE_DISPSTRING` + * produced by either `sf_parser_dict`, `sf_parser_list`, + * `sf_parser_inner_list`, `sf_parser_item`, or `sf_parser_param`, + * otherwise the behavior is undefined. + * + * :member:`dest->base ` must point to the buffer that + * has sufficient space to store the decoded byte string. + * + * This function sets the length of decoded byte string to + * :member:`dest->len `. + */ +void sf_pctdecode(sf_vec *dest, const sf_vec *src); + #ifdef __cplusplus } #endif diff --git a/deps/ngtcp2/ngtcp2.gyp b/deps/ngtcp2/ngtcp2.gyp index 0f2929b75478f1..68013580874b09 100644 --- a/deps/ngtcp2/ngtcp2.gyp +++ b/deps/ngtcp2/ngtcp2.gyp @@ -13,7 +13,6 @@ 'ngtcp2/lib/ngtcp2_cid.c', 'ngtcp2/lib/ngtcp2_conn.c', 'ngtcp2/lib/ngtcp2_conv.c', - 'ngtcp2/lib/ngtcp2_conversion.c', 'ngtcp2/lib/ngtcp2_crypto.c', 'ngtcp2/lib/ngtcp2_err.c', 'ngtcp2/lib/ngtcp2_frame_chain.c', @@ -37,8 +36,10 @@ 'ngtcp2/lib/ngtcp2_rob.c', 'ngtcp2/lib/ngtcp2_rst.c', 'ngtcp2/lib/ngtcp2_rtb.c', + 'ngtcp2/lib/ngtcp2_settings.c', 'ngtcp2/lib/ngtcp2_str.c', 'ngtcp2/lib/ngtcp2_strm.c', + 'ngtcp2/lib/ngtcp2_transport_params.c', 'ngtcp2/lib/ngtcp2_unreachable.c', 'ngtcp2/lib/ngtcp2_vec.c', 'ngtcp2/lib/ngtcp2_version.c', diff --git a/deps/ngtcp2/ngtcp2/crypto/boringssl/boringssl.c b/deps/ngtcp2/ngtcp2/crypto/boringssl/boringssl.c index 50b89110e36ff7..283063f738e2af 100644 --- a/deps/ngtcp2/ngtcp2/crypto/boringssl/boringssl.c +++ b/deps/ngtcp2/ngtcp2/crypto/boringssl/boringssl.c @@ -24,7 +24,7 @@ */ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include #include @@ -52,15 +52,15 @@ typedef struct ngtcp2_crypto_boringssl_cipher { } ngtcp2_crypto_boringssl_cipher; static ngtcp2_crypto_boringssl_cipher crypto_cipher_aes_128 = { - NGTCP2_CRYPTO_BORINGSSL_CIPHER_TYPE_AES_128, + NGTCP2_CRYPTO_BORINGSSL_CIPHER_TYPE_AES_128, }; static ngtcp2_crypto_boringssl_cipher crypto_cipher_aes_256 = { - NGTCP2_CRYPTO_BORINGSSL_CIPHER_TYPE_AES_256, + NGTCP2_CRYPTO_BORINGSSL_CIPHER_TYPE_AES_256, }; static ngtcp2_crypto_boringssl_cipher crypto_cipher_chacha20 = { - NGTCP2_CRYPTO_BORINGSSL_CIPHER_TYPE_CHACHA20, + NGTCP2_CRYPTO_BORINGSSL_CIPHER_TYPE_CHACHA20, }; ngtcp2_crypto_aead *ngtcp2_crypto_aead_aes_128_gcm(ngtcp2_crypto_aead *aead) { @@ -175,7 +175,7 @@ static ngtcp2_crypto_ctx *crypto_ctx_cipher_id(ngtcp2_crypto_ctx *ctx, ctx->hp.native_handle = (void *)crypto_cipher_id_get_hp(cipher_id); ctx->max_encryption = crypto_cipher_id_get_aead_max_encryption(cipher_id); ctx->max_decryption_failure = - crypto_cipher_id_get_aead_max_decryption_failure(cipher_id); + crypto_cipher_id_get_aead_max_decryption_failure(cipher_id); return ctx; } @@ -413,12 +413,12 @@ int ngtcp2_crypto_hp_mask(uint8_t *dest, const ngtcp2_crypto_cipher *hp, AES_ecb_encrypt(sample, dest, &ctx->aes_key, 1); return 0; case NGTCP2_CRYPTO_BORINGSSL_CIPHER_TYPE_CHACHA20: -#if defined(WORDS_BIGENDIAN) +#ifdef WORDS_BIGENDIAN counter = (uint32_t)sample[0] + (uint32_t)(sample[1] << 8) + (uint32_t)(sample[2] << 16) + (uint32_t)(sample[3] << 24); -#else /* !WORDS_BIGENDIAN */ +#else /* !defined(WORDS_BIGENDIAN) */ memcpy(&counter, sample, sizeof(counter)); -#endif /* !WORDS_BIGENDIAN */ +#endif /* !defined(WORDS_BIGENDIAN) */ CRYPTO_chacha_20(dest, PLAINTEXT, sizeof(PLAINTEXT) - 1, ctx->key, sample + sizeof(counter), counter); return 0; @@ -429,17 +429,16 @@ int ngtcp2_crypto_hp_mask(uint8_t *dest, const ngtcp2_crypto_cipher *hp, } int ngtcp2_crypto_read_write_crypto_data( - ngtcp2_conn *conn, ngtcp2_encryption_level encryption_level, - const uint8_t *data, size_t datalen) { + ngtcp2_conn *conn, ngtcp2_encryption_level encryption_level, + const uint8_t *data, size_t datalen) { SSL *ssl = ngtcp2_conn_get_tls_native_handle(conn); int rv; int err; if (SSL_provide_quic_data( - ssl, - ngtcp2_crypto_boringssl_from_ngtcp2_encryption_level( - encryption_level), - data, datalen) != 1) { + ssl, + ngtcp2_crypto_boringssl_from_ngtcp2_encryption_level(encryption_level), + data, datalen) != 1) { return -1; } @@ -522,7 +521,7 @@ int ngtcp2_crypto_set_local_transport_params(void *tls, const uint8_t *buf, } ngtcp2_encryption_level ngtcp2_crypto_boringssl_from_ssl_encryption_level( - enum ssl_encryption_level_t ssl_level) { + enum ssl_encryption_level_t ssl_level) { switch (ssl_level) { case ssl_encryption_initial: return NGTCP2_ENCRYPTION_LEVEL_INITIAL; @@ -540,7 +539,7 @@ ngtcp2_encryption_level ngtcp2_crypto_boringssl_from_ssl_encryption_level( enum ssl_encryption_level_t ngtcp2_crypto_boringssl_from_ngtcp2_encryption_level( - ngtcp2_encryption_level encryption_level) { + ngtcp2_encryption_level encryption_level) { switch (encryption_level) { case NGTCP2_ENCRYPTION_LEVEL_INITIAL: return ssl_encryption_initial; @@ -582,7 +581,7 @@ static int set_read_secret(SSL *ssl, enum ssl_encryption_level_t bssl_level, ngtcp2_crypto_conn_ref *conn_ref = SSL_get_app_data(ssl); ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); ngtcp2_encryption_level level = - ngtcp2_crypto_boringssl_from_ssl_encryption_level(bssl_level); + ngtcp2_crypto_boringssl_from_ssl_encryption_level(bssl_level); (void)cipher; if (ngtcp2_crypto_derive_and_install_rx_key(conn, NULL, NULL, NULL, level, @@ -599,7 +598,7 @@ static int set_write_secret(SSL *ssl, enum ssl_encryption_level_t bssl_level, ngtcp2_crypto_conn_ref *conn_ref = SSL_get_app_data(ssl); ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); ngtcp2_encryption_level level = - ngtcp2_crypto_boringssl_from_ssl_encryption_level(bssl_level); + ngtcp2_crypto_boringssl_from_ssl_encryption_level(bssl_level); (void)cipher; if (ngtcp2_crypto_derive_and_install_tx_key(conn, NULL, NULL, NULL, level, @@ -615,7 +614,7 @@ static int add_handshake_data(SSL *ssl, enum ssl_encryption_level_t bssl_level, ngtcp2_crypto_conn_ref *conn_ref = SSL_get_app_data(ssl); ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); ngtcp2_encryption_level level = - ngtcp2_crypto_boringssl_from_ssl_encryption_level(bssl_level); + ngtcp2_crypto_boringssl_from_ssl_encryption_level(bssl_level); int rv; rv = ngtcp2_conn_submit_crypto_data(conn, level, data, datalen); @@ -644,8 +643,8 @@ static int send_alert(SSL *ssl, enum ssl_encryption_level_t bssl_level, } static SSL_QUIC_METHOD quic_method = { - set_read_secret, set_write_secret, add_handshake_data, - flush_flight, send_alert, + set_read_secret, set_write_secret, add_handshake_data, + flush_flight, send_alert, }; static void crypto_boringssl_configure_context(SSL_CTX *ssl_ctx) { diff --git a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto.h b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto.h index 06427d7a7cac70..68093d18b71b14 100644 --- a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto.h +++ b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto.h @@ -29,14 +29,45 @@ #ifdef __cplusplus extern "C" { -#endif +#endif /* defined(__cplusplus) */ #ifdef WIN32 # ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN -# endif +# endif /* !defined(WIN32_LEAN_AND_MEAN) */ # include -#endif /* WIN32 */ +#endif /* defined(WIN32) */ + +/** + * @macrosection + * + * ngtcp2 crypto library error codes + */ + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_ERR_INTERNAL` indicates an internal error. + */ +#define NGTCP2_CRYPTO_ERR_INTERNAL -201 + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_ERR_UNREADABLE_TOKEN` indicates that a token + * is unreadable because it is not correctly formatted; or verifying + * the integrity protection failed. + */ +#define NGTCP2_CRYPTO_ERR_UNREADABLE_TOKEN -202 + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_ERR_VERIFY_TOKEN` indicates that a token does + * not probe the client address; or the token validity has expired; or + * it contains invalid Connection ID. + */ +#define NGTCP2_CRYPTO_ERR_VERIFY_TOKEN -203 /** * @function @@ -135,12 +166,9 @@ ngtcp2_crypto_hkdf_extract(uint8_t *dest, const ngtcp2_crypto_md *md, * * This function returns 0 if it succeeds, or -1. */ -NGTCP2_EXTERN int ngtcp2_crypto_hkdf_expand(uint8_t *dest, size_t destlen, - const ngtcp2_crypto_md *md, - const uint8_t *secret, - size_t secretlen, - const uint8_t *info, - size_t infolen); +NGTCP2_EXTERN int ngtcp2_crypto_hkdf_expand( + uint8_t *dest, size_t destlen, const ngtcp2_crypto_md *md, + const uint8_t *secret, size_t secretlen, const uint8_t *info, size_t infolen); /** * @function @@ -318,8 +346,8 @@ ngtcp2_crypto_hp_mask_cb(uint8_t *dest, const ngtcp2_crypto_cipher *hp, * This function returns 0 if it succeeds, or -1. */ NGTCP2_EXTERN int ngtcp2_crypto_derive_and_install_rx_key( - ngtcp2_conn *conn, uint8_t *key, uint8_t *iv, uint8_t *hp, - ngtcp2_encryption_level level, const uint8_t *secret, size_t secretlen); + ngtcp2_conn *conn, uint8_t *key, uint8_t *iv, uint8_t *hp, + ngtcp2_encryption_level level, const uint8_t *secret, size_t secretlen); /** * @function @@ -365,8 +393,8 @@ NGTCP2_EXTERN int ngtcp2_crypto_derive_and_install_rx_key( * This function returns 0 if it succeeds, or -1. */ NGTCP2_EXTERN int ngtcp2_crypto_derive_and_install_tx_key( - ngtcp2_conn *conn, uint8_t *key, uint8_t *iv, uint8_t *hp, - ngtcp2_encryption_level level, const uint8_t *secret, size_t secretlen); + ngtcp2_conn *conn, uint8_t *key, uint8_t *iv, uint8_t *hp, + ngtcp2_encryption_level level, const uint8_t *secret, size_t secretlen); /** * @function @@ -405,11 +433,11 @@ NGTCP2_EXTERN int ngtcp2_crypto_derive_and_install_tx_key( * This function returns 0 if it succeeds, or -1. */ NGTCP2_EXTERN int ngtcp2_crypto_update_key( - ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, - ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_key, uint8_t *rx_iv, - ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_key, uint8_t *tx_iv, - const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, - size_t secretlen); + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_key, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_key, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen); /** * @function @@ -422,11 +450,11 @@ NGTCP2_EXTERN int ngtcp2_crypto_update_key( * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`. */ NGTCP2_EXTERN int ngtcp2_crypto_update_key_cb( - ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, - ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, - ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, - const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, - size_t secretlen, void *user_data); + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen, void *user_data); /** * @function @@ -514,8 +542,8 @@ ngtcp2_crypto_read_write_crypto_data(ngtcp2_conn *conn, * codes. */ NGTCP2_EXTERN int ngtcp2_crypto_recv_crypto_data_cb( - ngtcp2_conn *conn, ngtcp2_encryption_level encryption_level, - uint64_t offset, const uint8_t *data, size_t datalen, void *user_data); + ngtcp2_conn *conn, ngtcp2_encryption_level encryption_level, uint64_t offset, + const uint8_t *data, size_t datalen, void *user_data); /** * @function @@ -529,8 +557,8 @@ NGTCP2_EXTERN int ngtcp2_crypto_recv_crypto_data_cb( * This function returns 0 if it succeeds, or -1. */ NGTCP2_EXTERN int ngtcp2_crypto_generate_stateless_reset_token( - uint8_t *token, const uint8_t *secret, size_t secretlen, - const ngtcp2_cid *cid); + uint8_t *token, const uint8_t *secret, size_t secretlen, + const ngtcp2_cid *cid); /** * @macro @@ -540,7 +568,7 @@ NGTCP2_EXTERN int ngtcp2_crypto_generate_stateless_reset_token( * `ngtcp2_crypto_generate_retry_token` or * `ngtcp2_crypto_generate_regular_token`. */ -#define NGTCP2_CRYPTO_TOKEN_RAND_DATALEN 32 +#define NGTCP2_CRYPTO_TOKEN_RAND_DATALEN 16 /** * @macro @@ -550,6 +578,14 @@ NGTCP2_EXTERN int ngtcp2_crypto_generate_stateless_reset_token( */ #define NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY 0xb6 +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY2` is the magic byte for + * Retry token generated by `ngtcp2_crypto_generate_retry_token2`. + */ +#define NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY2 0xb7 + /** * @macro * @@ -569,6 +605,17 @@ NGTCP2_EXTERN int ngtcp2_crypto_generate_stateless_reset_token( sizeof(ngtcp2_tstamp) + /* aead tag = */ 16 + \ NGTCP2_CRYPTO_TOKEN_RAND_DATALEN) +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN2` is the maximum length of + * a token generated by `ngtcp2_crypto_generate_retry_token2`. + */ +#define NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN2 \ + (/* magic = */ 1 + sizeof(ngtcp2_sockaddr_union) + /* cid len = */ 1 + \ + NGTCP2_MAX_CIDLEN + sizeof(ngtcp2_tstamp) + /* aead tag = */ 16 + \ + NGTCP2_CRYPTO_TOKEN_RAND_DATALEN) + /** * @macro * @@ -595,13 +642,15 @@ NGTCP2_EXTERN int ngtcp2_crypto_generate_stateless_reset_token( * is a Destination Connection ID in Initial packet sent by client. * |ts| is the timestamp when the token is generated. * + * See also `ngtcp2_crypto_generate_retry_token2`. + * * This function returns the length of generated token if it succeeds, * or -1. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_generate_retry_token( - uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version, - const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, - const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts); + uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts); /** * @function @@ -622,10 +671,76 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_generate_retry_token( * This function returns 0 if it succeeds, or -1. */ NGTCP2_EXTERN int ngtcp2_crypto_verify_retry_token( - ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen, - const uint8_t *secret, size_t secretlen, uint32_t version, - const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, - const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts); + ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen, + const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_crypto_generate_retry_token2` generates a token in the + * buffer pointed by |token| that is sent with Retry packet. The + * buffer pointed by |token| must have at least + * :macro:`NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN2` bytes long. The + * successfully generated token starts with + * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY2`. |secret| of length + * |secretlen| is a keying material to generate keys to encrypt the + * token. |version| is QUIC version. |remote_addr| of length + * |remote_addrlen| is an address of client. |retry_scid| is a Source + * Connection ID chosen by server, and set in Retry packet. |odcid| + * is a Destination Connection ID in Initial packet sent by client. + * |ts| is the timestamp when the token is generated. + * + * Use this function instead of `ngtcp2_crypto_generate_retry_token` + * if more detailed error handling is required when verifying the + * token. `ngtcp2_crypto_verify_retry_token2` must be used to verify + * the token. + * + * This function returns the length of generated token if it succeeds, + * or -1. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_generate_retry_token2( + uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_crypto_verify_retry_token2` verifies Retry token stored in + * the buffer pointed by |token| of length |tokenlen|. |secret| of + * length |secretlen| is a keying material to generate keys to decrypt + * the token. |version| is QUIC version of the Initial packet that + * contains this token. |remote_addr| of length |remote_addrlen| is + * an address of client. |dcid| is a Destination Connection ID in + * Initial packet sent by client. |timeout| is the period during + * which the token is valid. |ts| is the current timestamp. When + * validation succeeds, the extracted Destination Connection ID (which + * is the Destination Connection ID in Initial packet sent by client + * that triggered Retry packet) is stored in the buffer pointed by + * |odcid|. + * + * The token must be generated by + * `ngtcp2_crypto_generate_retry_token2`. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_CRYPTO_ERR_UNREADABLE_TOKEN` + * A token is badly formatted; or verifying the integrity + * protection failed. + * :macro:`NGTCP2_CRYPTO_ERR_VERIFY_TOKEN` + * A token does not probe the client address; or the token + * validity has expired; or it contains invalid Connection ID. + * :macro:`NGTCP2_CRYPTO_ERR_INTERNAL` + * Internal error occurred. + */ +NGTCP2_EXTERN int ngtcp2_crypto_verify_retry_token2( + ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen, + const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts); /** * @function @@ -644,9 +759,9 @@ NGTCP2_EXTERN int ngtcp2_crypto_verify_retry_token( * or -1. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_generate_regular_token( - uint8_t *token, const uint8_t *secret, size_t secretlen, - const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, - ngtcp2_tstamp ts); + uint8_t *token, const uint8_t *secret, size_t secretlen, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + ngtcp2_tstamp ts); /** * @function @@ -661,9 +776,9 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_generate_regular_token( * This function returns 0 if it succeeds, or -1. */ NGTCP2_EXTERN int ngtcp2_crypto_verify_regular_token( - const uint8_t *token, size_t tokenlen, const uint8_t *secret, - size_t secretlen, const ngtcp2_sockaddr *remote_addr, - ngtcp2_socklen remote_addrlen, ngtcp2_duration timeout, ngtcp2_tstamp ts); + const uint8_t *token, size_t tokenlen, const uint8_t *secret, + size_t secretlen, const ngtcp2_sockaddr *remote_addr, + ngtcp2_socklen remote_addrlen, ngtcp2_duration timeout, ngtcp2_tstamp ts); /** * @function @@ -685,9 +800,9 @@ NGTCP2_EXTERN int ngtcp2_crypto_verify_regular_token( * This function returns 0 if it succeeds, or -1. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_write_connection_close( - uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, - const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, - size_t reasonlen); + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, + size_t reasonlen); /** * @function @@ -705,9 +820,9 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_write_connection_close( * This function returns 0 if it succeeds, or -1. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_write_retry( - uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, - const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token, - size_t tokenlen); + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token, + size_t tokenlen); /** * @function @@ -759,7 +874,7 @@ ngtcp2_crypto_aead_ctx_free(ngtcp2_crypto_aead_ctx *aead_ctx); * :member:`ngtcp2_callbacks.delete_crypto_aead_ctx` field. */ NGTCP2_EXTERN void ngtcp2_crypto_delete_crypto_aead_ctx_cb( - ngtcp2_conn *conn, ngtcp2_crypto_aead_ctx *aead_ctx, void *user_data); + ngtcp2_conn *conn, ngtcp2_crypto_aead_ctx *aead_ctx, void *user_data); /** * @function @@ -771,7 +886,7 @@ NGTCP2_EXTERN void ngtcp2_crypto_delete_crypto_aead_ctx_cb( * :member:`ngtcp2_callbacks.delete_crypto_cipher_ctx` field. */ NGTCP2_EXTERN void ngtcp2_crypto_delete_crypto_cipher_ctx_cb( - ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data); + ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data); /** * @function @@ -813,7 +928,7 @@ typedef struct ngtcp2_crypto_conn_ref ngtcp2_crypto_conn_ref; * must return non-NULL :type:`ngtcp2_conn` object. */ typedef ngtcp2_conn *(*ngtcp2_crypto_get_conn)( - ngtcp2_crypto_conn_ref *conn_ref); + ngtcp2_crypto_conn_ref *conn_ref); /** * @struct @@ -836,6 +951,6 @@ typedef struct ngtcp2_crypto_conn_ref { #ifdef __cplusplus } -#endif +#endif /* defined(__cplusplus) */ -#endif /* NGTCP2_CRYPTO_H */ +#endif /* !defined(NGTCP2_CRYPTO_H) */ diff --git a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_boringssl.h b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_boringssl.h index 43a3c36f03a382..89b26f0bc0e346 100644 --- a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_boringssl.h +++ b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_boringssl.h @@ -31,7 +31,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif /* defined(__cplusplus) */ /** * @function @@ -42,7 +42,7 @@ extern "C" { */ NGTCP2_EXTERN ngtcp2_encryption_level ngtcp2_crypto_boringssl_from_ssl_encryption_level( - enum ssl_encryption_level_t ssl_level); + enum ssl_encryption_level_t ssl_level); /** * @function @@ -53,7 +53,7 @@ ngtcp2_crypto_boringssl_from_ssl_encryption_level( */ NGTCP2_EXTERN enum ssl_encryption_level_t ngtcp2_crypto_boringssl_from_ngtcp2_encryption_level( - ngtcp2_encryption_level encryption_level); + ngtcp2_encryption_level encryption_level); /** * @function @@ -99,6 +99,6 @@ ngtcp2_crypto_boringssl_configure_client_context(SSL_CTX *ssl_ctx); #ifdef __cplusplus } -#endif +#endif /* defined(__cplusplus) */ -#endif /* NGTCP2_CRYPTO_BORINGSSL_H */ +#endif /* !defined(NGTCP2_CRYPTO_BORINGSSL_H) */ diff --git a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_picotls.h b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_picotls.h index 61020bb3a8f376..d3f2f978e79923 100644 --- a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_picotls.h +++ b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_picotls.h @@ -31,7 +31,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif /* defined(__cplusplus) */ /** * @struct @@ -79,7 +79,7 @@ ngtcp2_crypto_picotls_from_epoch(size_t epoch); * Picotls backend. */ NGTCP2_EXTERN size_t ngtcp2_crypto_picotls_from_ngtcp2_encryption_level( - ngtcp2_encryption_level encryption_level); + ngtcp2_encryption_level encryption_level); /** * @function @@ -160,7 +160,7 @@ ngtcp2_crypto_picotls_configure_client_context(ptls_context_t *ctx); * It returns 0 if it succeeds, or -1. */ NGTCP2_EXTERN int ngtcp2_crypto_picotls_configure_server_session( - ngtcp2_crypto_picotls_ctx *cptls); + ngtcp2_crypto_picotls_ctx *cptls); /** * @function @@ -224,8 +224,8 @@ ngtcp2_crypto_picotls_deconfigure_session(ngtcp2_crypto_picotls_ctx *cptls); * :macro:`NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_V1`. */ NGTCP2_EXTERN int ngtcp2_crypto_picotls_collect_extension( - ptls_t *ptls, struct st_ptls_handshake_properties_t *properties, - uint16_t type); + ptls_t *ptls, struct st_ptls_handshake_properties_t *properties, + uint16_t type); /** * @function @@ -236,11 +236,11 @@ NGTCP2_EXTERN int ngtcp2_crypto_picotls_collect_extension( * extensions are ignored. */ NGTCP2_EXTERN int ngtcp2_crypto_picotls_collected_extensions( - ptls_t *ptls, struct st_ptls_handshake_properties_t *properties, - ptls_raw_extension_t *extensions); + ptls_t *ptls, struct st_ptls_handshake_properties_t *properties, + ptls_raw_extension_t *extensions); #ifdef __cplusplus } -#endif +#endif /* defined(__cplusplus) */ -#endif /* NGTCP2_CRYPTO_PICOTLS_H */ +#endif /* !defined(NGTCP2_CRYPTO_PICOTLS_H) */ diff --git a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_quictls.h b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_quictls.h index b25c13b81c8b18..22e3eda0c4160a 100644 --- a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_quictls.h +++ b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_quictls.h @@ -31,7 +31,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif /* defined(__cplusplus) */ /** * @macrosection @@ -70,7 +70,7 @@ extern "C" { */ NGTCP2_EXTERN ngtcp2_encryption_level ngtcp2_crypto_quictls_from_ossl_encryption_level( - OSSL_ENCRYPTION_LEVEL ossl_level); + OSSL_ENCRYPTION_LEVEL ossl_level); /** * @function @@ -81,7 +81,7 @@ ngtcp2_crypto_quictls_from_ossl_encryption_level( */ NGTCP2_EXTERN OSSL_ENCRYPTION_LEVEL ngtcp2_crypto_quictls_from_ngtcp2_encryption_level( - ngtcp2_encryption_level encryption_level); + ngtcp2_encryption_level encryption_level); /** * @function @@ -142,6 +142,6 @@ NGTCP2_EXTERN int ngtcp2_crypto_quictls_init(void); #ifdef __cplusplus } -#endif +#endif /* defined(__cplusplus) */ -#endif /* NGTCP2_CRYPTO_QUICTLS_H */ +#endif /* !defined(NGTCP2_CRYPTO_QUICTLS_H) */ diff --git a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_wolfssl.h b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_wolfssl.h index e1d621adce94d8..e95056de5926a9 100644 --- a/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_wolfssl.h +++ b/deps/ngtcp2/ngtcp2/crypto/includes/ngtcp2/ngtcp2_crypto_wolfssl.h @@ -33,7 +33,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif /* defined(__cplusplus) */ /** * @function @@ -44,7 +44,7 @@ extern "C" { */ NGTCP2_EXTERN ngtcp2_encryption_level ngtcp2_crypto_wolfssl_from_wolfssl_encryption_level( - WOLFSSL_ENCRYPTION_LEVEL wolfssl_level); + WOLFSSL_ENCRYPTION_LEVEL wolfssl_level); /** * @function @@ -55,7 +55,7 @@ ngtcp2_crypto_wolfssl_from_wolfssl_encryption_level( */ NGTCP2_EXTERN WOLFSSL_ENCRYPTION_LEVEL ngtcp2_crypto_wolfssl_from_ngtcp2_encryption_level( - ngtcp2_encryption_level encryption_level); + ngtcp2_encryption_level encryption_level); /** * @function @@ -101,6 +101,6 @@ ngtcp2_crypto_wolfssl_configure_client_context(WOLFSSL_CTX *ssl_ctx); #ifdef __cplusplus } -#endif +#endif /* defined(__cplusplus) */ -#endif /* NGTCP2_CRYPTO_WOLFSSL_H */ +#endif /* !defined(NGTCP2_CRYPTO_WOLFSSL_H) */ diff --git a/deps/ngtcp2/ngtcp2/crypto/picotls/picotls.c b/deps/ngtcp2/ngtcp2/crypto/picotls/picotls.c index 35bfb7b2f8fa19..c3f14f5d444bac 100644 --- a/deps/ngtcp2/ngtcp2/crypto/picotls/picotls.c +++ b/deps/ngtcp2/ngtcp2/crypto/picotls/picotls.c @@ -24,7 +24,7 @@ */ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include #include @@ -79,7 +79,7 @@ crypto_cipher_suite_get_aead_max_encryption(ptls_cipher_suite_t *cs) { if (cs->aead == &ptls_openssl_chacha20poly1305) { return NGTCP2_CRYPTO_MAX_ENCRYPTION_CHACHA20_POLY1305; } -#endif /* PTLS_OPENSSL_HAVE_CHACHA20_POLY1305 */ +#endif /* defined(PTLS_OPENSSL_HAVE_CHACHA20_POLY1305) */ return 0; } @@ -95,7 +95,7 @@ crypto_cipher_suite_get_aead_max_decryption_failure(ptls_cipher_suite_t *cs) { if (cs->aead == &ptls_openssl_chacha20poly1305) { return NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_CHACHA20_POLY1305; } -#endif /* PTLS_OPENSSL_HAVE_CHACHA20_POLY1305 */ +#endif /* defined(PTLS_OPENSSL_HAVE_CHACHA20_POLY1305) */ return 0; } @@ -114,7 +114,7 @@ crypto_cipher_suite_get_hp(ptls_cipher_suite_t *cs) { if (cs->aead == &ptls_openssl_chacha20poly1305) { return &ptls_openssl_chacha20; } -#endif /* PTLS_OPENSSL_HAVE_CHACHA20_POLY1305 */ +#endif /* defined(PTLS_OPENSSL_HAVE_CHACHA20_POLY1305) */ return NULL; } @@ -124,7 +124,7 @@ static int supported_cipher_suite(ptls_cipher_suite_t *cs) { cs->aead == &ptls_openssl_aes256gcm #ifdef PTLS_OPENSSL_HAVE_CHACHA20_POLY1305 || cs->aead == &ptls_openssl_chacha20poly1305 -#endif /* PTLS_OPENSSL_HAVE_CHACHA20_POLY1305 */ +#endif /* defined(PTLS_OPENSSL_HAVE_CHACHA20_POLY1305) */ ; } diff --git a/deps/ngtcp2/ngtcp2/crypto/quictls/quictls.c b/deps/ngtcp2/ngtcp2/crypto/quictls/quictls.c index 330ca687b44666..592e5a86535356 100644 --- a/deps/ngtcp2/ngtcp2/crypto/quictls/quictls.c +++ b/deps/ngtcp2/ngtcp2/crypto/quictls/quictls.c @@ -24,7 +24,7 @@ */ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -339,7 +339,7 @@ static ngtcp2_crypto_ctx *crypto_ctx_cipher_id(ngtcp2_crypto_ctx *ctx, ctx->hp.native_handle = (void *)crypto_cipher_id_get_hp(cipher_id); ctx->max_encryption = crypto_cipher_id_get_aead_max_encryption(cipher_id); ctx->max_decryption_failure = - crypto_cipher_id_get_aead_max_decryption_failure(cipher_id); + crypto_cipher_id_get_aead_max_decryption_failure(cipher_id); return ctx; } @@ -527,14 +527,14 @@ int ngtcp2_crypto_hkdf_extract(uint8_t *dest, const ngtcp2_crypto_md *md, EVP_KDF_CTX *kctx = EVP_KDF_CTX_new(kdf); int mode = EVP_KDF_HKDF_MODE_EXTRACT_ONLY; OSSL_PARAM params[] = { - OSSL_PARAM_construct_int(OSSL_KDF_PARAM_MODE, &mode), - OSSL_PARAM_construct_utf8_string(OSSL_KDF_PARAM_DIGEST, - (char *)EVP_MD_get0_name(prf), 0), - OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_KEY, (void *)secret, - secretlen), - OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_SALT, (void *)salt, - saltlen), - OSSL_PARAM_construct_end(), + OSSL_PARAM_construct_int(OSSL_KDF_PARAM_MODE, &mode), + OSSL_PARAM_construct_utf8_string(OSSL_KDF_PARAM_DIGEST, + (char *)EVP_MD_get0_name(prf), 0), + OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_KEY, (void *)secret, + secretlen), + OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_SALT, (void *)salt, + saltlen), + OSSL_PARAM_construct_end(), }; int rv = 0; @@ -584,14 +584,14 @@ int ngtcp2_crypto_hkdf_expand(uint8_t *dest, size_t destlen, EVP_KDF_CTX *kctx = EVP_KDF_CTX_new(kdf); int mode = EVP_KDF_HKDF_MODE_EXPAND_ONLY; OSSL_PARAM params[] = { - OSSL_PARAM_construct_int(OSSL_KDF_PARAM_MODE, &mode), - OSSL_PARAM_construct_utf8_string(OSSL_KDF_PARAM_DIGEST, - (char *)EVP_MD_get0_name(prf), 0), - OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_KEY, (void *)secret, - secretlen), - OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_INFO, (void *)info, - infolen), - OSSL_PARAM_construct_end(), + OSSL_PARAM_construct_int(OSSL_KDF_PARAM_MODE, &mode), + OSSL_PARAM_construct_utf8_string(OSSL_KDF_PARAM_DIGEST, + (char *)EVP_MD_get0_name(prf), 0), + OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_KEY, (void *)secret, + secretlen), + OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_INFO, (void *)info, + infolen), + OSSL_PARAM_construct_end(), }; int rv = 0; @@ -639,15 +639,15 @@ int ngtcp2_crypto_hkdf(uint8_t *dest, size_t destlen, EVP_KDF *kdf = crypto_kdf_hkdf(); EVP_KDF_CTX *kctx = EVP_KDF_CTX_new(kdf); OSSL_PARAM params[] = { - OSSL_PARAM_construct_utf8_string(OSSL_KDF_PARAM_DIGEST, - (char *)EVP_MD_get0_name(prf), 0), - OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_KEY, (void *)secret, - secretlen), - OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_SALT, (void *)salt, - saltlen), - OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_INFO, (void *)info, - infolen), - OSSL_PARAM_construct_end(), + OSSL_PARAM_construct_utf8_string(OSSL_KDF_PARAM_DIGEST, + (char *)EVP_MD_get0_name(prf), 0), + OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_KEY, (void *)secret, + secretlen), + OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_SALT, (void *)salt, + saltlen), + OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_INFO, (void *)info, + infolen), + OSSL_PARAM_construct_end(), }; int rv = 0; @@ -672,7 +672,7 @@ int ngtcp2_crypto_hkdf(uint8_t *dest, size_t destlen, if (EVP_PKEY_derive_init(pctx) != 1 || EVP_PKEY_CTX_hkdf_mode(pctx, EVP_PKEY_HKDEF_MODE_EXTRACT_AND_EXPAND) != - 1 || + 1 || EVP_PKEY_CTX_set_hkdf_md(pctx, prf) != 1 || EVP_PKEY_CTX_set1_hkdf_salt(pctx, salt, (int)saltlen) != 1 || EVP_PKEY_CTX_set1_hkdf_key(pctx, secret, (int)secretlen) != 1 || @@ -699,9 +699,9 @@ int ngtcp2_crypto_encrypt(uint8_t *dest, const ngtcp2_crypto_aead *aead, int len; #if OPENSSL_VERSION_NUMBER >= 0x30000000L OSSL_PARAM params[] = { - OSSL_PARAM_construct_octet_string(OSSL_CIPHER_PARAM_AEAD_TAG, - dest + plaintextlen, taglen), - OSSL_PARAM_construct_end(), + OSSL_PARAM_construct_octet_string(OSSL_CIPHER_PARAM_AEAD_TAG, + dest + plaintextlen, taglen), + OSSL_PARAM_construct_end(), }; #endif /* OPENSSL_VERSION_NUMBER >= 0x30000000L */ @@ -794,16 +794,16 @@ int ngtcp2_crypto_hp_mask(uint8_t *dest, const ngtcp2_crypto_cipher *hp, } int ngtcp2_crypto_read_write_crypto_data( - ngtcp2_conn *conn, ngtcp2_encryption_level encryption_level, - const uint8_t *data, size_t datalen) { + ngtcp2_conn *conn, ngtcp2_encryption_level encryption_level, + const uint8_t *data, size_t datalen) { SSL *ssl = ngtcp2_conn_get_tls_native_handle(conn); int rv; int err; if (SSL_provide_quic_data( - ssl, - ngtcp2_crypto_quictls_from_ngtcp2_encryption_level(encryption_level), - data, datalen) != 1) { + ssl, + ngtcp2_crypto_quictls_from_ngtcp2_encryption_level(encryption_level), + data, datalen) != 1) { return -1; } @@ -874,7 +874,7 @@ int ngtcp2_crypto_set_local_transport_params(void *tls, const uint8_t *buf, } ngtcp2_encryption_level ngtcp2_crypto_quictls_from_ossl_encryption_level( - OSSL_ENCRYPTION_LEVEL ossl_level) { + OSSL_ENCRYPTION_LEVEL ossl_level) { switch (ossl_level) { case ssl_encryption_initial: return NGTCP2_ENCRYPTION_LEVEL_INITIAL; @@ -892,7 +892,7 @@ ngtcp2_encryption_level ngtcp2_crypto_quictls_from_ossl_encryption_level( OSSL_ENCRYPTION_LEVEL ngtcp2_crypto_quictls_from_ngtcp2_encryption_level( - ngtcp2_encryption_level encryption_level) { + ngtcp2_encryption_level encryption_level) { switch (encryption_level) { case NGTCP2_ENCRYPTION_LEVEL_INITIAL: return ssl_encryption_initial; @@ -934,7 +934,7 @@ static int set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, ngtcp2_crypto_conn_ref *conn_ref = SSL_get_app_data(ssl); ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); ngtcp2_encryption_level level = - ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); if (rx_secret && ngtcp2_crypto_derive_and_install_rx_key(conn, NULL, NULL, NULL, level, @@ -956,7 +956,7 @@ static int add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, ngtcp2_crypto_conn_ref *conn_ref = SSL_get_app_data(ssl); ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); ngtcp2_encryption_level level = - ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); int rv; rv = ngtcp2_conn_submit_crypto_data(conn, level, data, datalen); @@ -973,8 +973,7 @@ static int flush_flight(SSL *ssl) { return 1; } -static int send_alert(SSL *ssl, enum ssl_encryption_level_t level, - uint8_t alert) { +static int send_alert(SSL *ssl, OSSL_ENCRYPTION_LEVEL level, uint8_t alert) { ngtcp2_crypto_conn_ref *conn_ref = SSL_get_app_data(ssl); ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); (void)level; @@ -985,14 +984,14 @@ static int send_alert(SSL *ssl, enum ssl_encryption_level_t level, } static SSL_QUIC_METHOD quic_method = { - set_encryption_secrets, - add_handshake_data, - flush_flight, - send_alert, + set_encryption_secrets, + add_handshake_data, + flush_flight, + send_alert, #ifdef LIBRESSL_VERSION_NUMBER - NULL, - NULL, -#endif /* LIBRESSL_VERSION_NUMBER */ + NULL, + NULL, +#endif /* defined(LIBRESSL_VERSION_NUMBER) */ }; static void crypto_quictls_configure_context(SSL_CTX *ssl_ctx) { diff --git a/deps/ngtcp2/ngtcp2/crypto/shared.c b/deps/ngtcp2/ngtcp2/crypto/shared.c index 162094a375cb8b..98cd4de7e8097d 100644 --- a/deps/ngtcp2/ngtcp2/crypto/shared.c +++ b/deps/ngtcp2/ngtcp2/crypto/shared.c @@ -27,9 +27,9 @@ #ifdef WIN32 # include # include -#else +#elif defined(HAVE_NETINET_IN_H) # include -#endif +#endif /* defined(HAVE_NETINET_IN_H) */ #include #include @@ -110,13 +110,11 @@ int ngtcp2_crypto_derive_initial_secrets(uint8_t *rx_secret, uint8_t *tx_secret, } if (ngtcp2_crypto_hkdf_expand_label( - client_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, &ctx.md, - initial_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, CLABEL, - sizeof(CLABEL) - 1) != 0 || + client_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, &ctx.md, initial_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN, CLABEL, sizeof(CLABEL) - 1) != 0 || ngtcp2_crypto_hkdf_expand_label( - server_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, &ctx.md, - initial_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, SLABEL, - sizeof(SLABEL) - 1) != 0) { + server_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, &ctx.md, initial_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN, SLABEL, sizeof(SLABEL) - 1) != 0) { return -1; } @@ -125,13 +123,13 @@ int ngtcp2_crypto_derive_initial_secrets(uint8_t *rx_secret, uint8_t *tx_secret, size_t ngtcp2_crypto_packet_protection_ivlen(const ngtcp2_crypto_aead *aead) { size_t noncelen = ngtcp2_crypto_aead_noncelen(aead); - return ngtcp2_max(8, noncelen); + return ngtcp2_max_size(8, noncelen); } int ngtcp2_crypto_derive_packet_protection_key( - uint8_t *key, uint8_t *iv, uint8_t *hp_key, uint32_t version, - const ngtcp2_crypto_aead *aead, const ngtcp2_crypto_md *md, - const uint8_t *secret, size_t secretlen) { + uint8_t *key, uint8_t *iv, uint8_t *hp_key, uint32_t version, + const ngtcp2_crypto_aead *aead, const ngtcp2_crypto_md *md, + const uint8_t *secret, size_t secretlen) { static const uint8_t KEY_LABEL_V1[] = "quic key"; static const uint8_t IV_LABEL_V1[] = "quic iv"; static const uint8_t HP_KEY_LABEL_V1[] = "quic hp"; @@ -305,8 +303,8 @@ int ngtcp2_crypto_derive_and_install_rx_key(ngtcp2_conn *conn, uint8_t *key, } break; case NGTCP2_ENCRYPTION_LEVEL_HANDSHAKE: - rv = ngtcp2_conn_install_rx_handshake_key(conn, &aead_ctx, iv, ivlen, - &hp_ctx); + rv = + ngtcp2_conn_install_rx_handshake_key(conn, &aead_ctx, iv, ivlen, &hp_ctx); if (rv != 0) { goto fail; } @@ -455,8 +453,8 @@ int ngtcp2_crypto_derive_and_install_tx_key(ngtcp2_conn *conn, uint8_t *key, } break; case NGTCP2_ENCRYPTION_LEVEL_HANDSHAKE: - rv = ngtcp2_conn_install_tx_handshake_key(conn, &aead_ctx, iv, ivlen, - &hp_ctx); + rv = + ngtcp2_conn_install_tx_handshake_key(conn, &aead_ctx, iv, ivlen, &hp_ctx); if (rv != 0) { goto fail; } @@ -489,10 +487,10 @@ int ngtcp2_crypto_derive_and_install_tx_key(ngtcp2_conn *conn, uint8_t *key, } int ngtcp2_crypto_derive_and_install_initial_key( - ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, - uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, - uint8_t *rx_hp_key, uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp_key, - uint32_t version, const ngtcp2_cid *client_dcid) { + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp_key, + uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp_key, uint32_t version, + const ngtcp2_cid *client_dcid) { uint8_t rx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; uint8_t tx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; uint8_t initial_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; @@ -548,21 +546,20 @@ int ngtcp2_crypto_derive_and_install_initial_key( ngtcp2_conn_set_initial_crypto_ctx(conn, &ctx); if (ngtcp2_crypto_derive_initial_secrets( - rx_secret, tx_secret, initial_secret, version, client_dcid, - server ? NGTCP2_CRYPTO_SIDE_SERVER : NGTCP2_CRYPTO_SIDE_CLIENT) != - 0) { + rx_secret, tx_secret, initial_secret, version, client_dcid, + server ? NGTCP2_CRYPTO_SIDE_SERVER : NGTCP2_CRYPTO_SIDE_CLIENT) != 0) { return -1; } if (ngtcp2_crypto_derive_packet_protection_key( - rx_key, rx_iv, rx_hp_key, version, &ctx.aead, &ctx.md, rx_secret, - NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + rx_key, rx_iv, rx_hp_key, version, &ctx.aead, &ctx.md, rx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { return -1; } if (ngtcp2_crypto_derive_packet_protection_key( - tx_key, tx_iv, tx_hp_key, version, &ctx.aead, &ctx.md, tx_secret, - NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + tx_key, tx_iv, tx_hp_key, version, &ctx.aead, &ctx.md, tx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { return -1; } @@ -631,10 +628,10 @@ int ngtcp2_crypto_derive_and_install_initial_key( } int ngtcp2_crypto_derive_and_install_vneg_initial_key( - ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, - uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, - uint8_t *rx_hp_key, uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp_key, - uint32_t version, const ngtcp2_cid *client_dcid) { + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp_key, + uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp_key, uint32_t version, + const ngtcp2_cid *client_dcid) { uint8_t rx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; uint8_t tx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; uint8_t initial_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; @@ -682,21 +679,20 @@ int ngtcp2_crypto_derive_and_install_vneg_initial_key( } if (ngtcp2_crypto_derive_initial_secrets( - rx_secret, tx_secret, initial_secret, version, client_dcid, - server ? NGTCP2_CRYPTO_SIDE_SERVER : NGTCP2_CRYPTO_SIDE_CLIENT) != - 0) { + rx_secret, tx_secret, initial_secret, version, client_dcid, + server ? NGTCP2_CRYPTO_SIDE_SERVER : NGTCP2_CRYPTO_SIDE_CLIENT) != 0) { return -1; } if (ngtcp2_crypto_derive_packet_protection_key( - rx_key, rx_iv, rx_hp_key, version, &ctx->aead, &ctx->md, rx_secret, - NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + rx_key, rx_iv, rx_hp_key, version, &ctx->aead, &ctx->md, rx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { return -1; } if (ngtcp2_crypto_derive_packet_protection_key( - tx_key, tx_iv, tx_hp_key, version, &ctx->aead, &ctx->md, tx_secret, - NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + tx_key, tx_iv, tx_hp_key, version, &ctx->aead, &ctx->md, tx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { return -1; } @@ -721,8 +717,8 @@ int ngtcp2_crypto_derive_and_install_vneg_initial_key( } rv = ngtcp2_conn_install_vneg_initial_key( - conn, version, &rx_aead_ctx, rx_iv, &rx_hp_ctx, &tx_aead_ctx, tx_iv, - &tx_hp_ctx, NGTCP2_CRYPTO_INITIAL_IVLEN); + conn, version, &rx_aead_ctx, rx_iv, &rx_hp_ctx, &tx_aead_ctx, tx_iv, + &tx_hp_ctx, NGTCP2_CRYPTO_INITIAL_IVLEN); if (rv != 0) { goto fail; } @@ -739,11 +735,11 @@ int ngtcp2_crypto_derive_and_install_vneg_initial_key( } int ngtcp2_crypto_update_key( - ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, - ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_key, uint8_t *rx_iv, - ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_key, uint8_t *tx_iv, - const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, - size_t secretlen) { + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_key, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_key, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen) { const ngtcp2_crypto_ctx *ctx = ngtcp2_conn_get_crypto_ctx(conn); const ngtcp2_crypto_aead *aead = &ctx->aead; const ngtcp2_crypto_md *md = &ctx->md; @@ -756,7 +752,7 @@ int ngtcp2_crypto_update_key( } if (ngtcp2_crypto_derive_packet_protection_key( - rx_key, rx_iv, NULL, version, aead, md, rx_secret, secretlen) != 0) { + rx_key, rx_iv, NULL, version, aead, md, rx_secret, secretlen) != 0) { return -1; } @@ -766,7 +762,7 @@ int ngtcp2_crypto_update_key( } if (ngtcp2_crypto_derive_packet_protection_key( - tx_key, tx_iv, NULL, version, aead, md, tx_secret, secretlen) != 0) { + tx_key, tx_iv, NULL, version, aead, md, tx_secret, secretlen) != 0) { return -1; } @@ -819,20 +815,19 @@ int ngtcp2_crypto_hp_mask_cb(uint8_t *dest, const ngtcp2_crypto_cipher *hp, } int ngtcp2_crypto_update_key_cb( - ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, - ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, - ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, - const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, - size_t secretlen, void *user_data) { + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen, void *user_data) { uint8_t rx_key[64]; uint8_t tx_key[64]; (void)conn; (void)user_data; - if (ngtcp2_crypto_update_key(conn, rx_secret, tx_secret, rx_aead_ctx, rx_key, - rx_iv, tx_aead_ctx, tx_key, tx_iv, - current_rx_secret, current_tx_secret, - secretlen) != 0) { + if (ngtcp2_crypto_update_key( + conn, rx_secret, tx_secret, rx_aead_ctx, rx_key, rx_iv, tx_aead_ctx, + tx_key, tx_iv, current_rx_secret, current_tx_secret, secretlen) != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } return 0; @@ -920,13 +915,14 @@ static size_t crypto_generate_retry_token_aad(uint8_t *dest, uint32_t version, static const uint8_t retry_token_info_prefix[] = "retry_token"; ngtcp2_ssize ngtcp2_crypto_generate_retry_token( - uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version, - const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, - const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts) { - uint8_t plaintext[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN]; + uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts) { + uint8_t + plaintext[/* cid len = */ 1 + NGTCP2_MAX_CIDLEN + sizeof(ngtcp2_tstamp)]; uint8_t rand_data[NGTCP2_CRYPTO_TOKEN_RAND_DATALEN]; - uint8_t key[32]; - uint8_t iv[32]; + uint8_t key[16]; + uint8_t iv[12]; size_t keylen; size_t ivlen; ngtcp2_crypto_aead aead; @@ -934,7 +930,7 @@ ngtcp2_ssize ngtcp2_crypto_generate_retry_token( ngtcp2_crypto_aead_ctx aead_ctx; size_t plaintextlen; uint8_t - aad[sizeof(version) + sizeof(ngtcp2_sockaddr_union) + NGTCP2_MAX_CIDLEN]; + aad[sizeof(version) + sizeof(ngtcp2_sockaddr_union) + NGTCP2_MAX_CIDLEN]; size_t aadlen; uint8_t *p = plaintext; ngtcp2_tstamp ts_be = ngtcp2_htonl64(ts); @@ -962,8 +958,8 @@ ngtcp2_ssize ngtcp2_crypto_generate_retry_token( keylen = ngtcp2_crypto_aead_keylen(&aead); ivlen = ngtcp2_crypto_aead_noncelen(&aead); - assert(sizeof(key) >= keylen); - assert(sizeof(iv) >= ivlen); + assert(sizeof(key) == keylen); + assert(sizeof(iv) == ivlen); if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, rand_data, sizeof(rand_data), @@ -999,21 +995,21 @@ ngtcp2_ssize ngtcp2_crypto_generate_retry_token( } int ngtcp2_crypto_verify_retry_token( - ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen, - const uint8_t *secret, size_t secretlen, uint32_t version, - const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, - const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts) { + ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen, + const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts) { uint8_t - plaintext[/* cid len = */ 1 + NGTCP2_MAX_CIDLEN + sizeof(ngtcp2_tstamp)]; - uint8_t key[32]; - uint8_t iv[32]; + plaintext[/* cid len = */ 1 + NGTCP2_MAX_CIDLEN + sizeof(ngtcp2_tstamp)]; + uint8_t key[16]; + uint8_t iv[12]; size_t keylen; size_t ivlen; ngtcp2_crypto_aead_ctx aead_ctx; ngtcp2_crypto_aead aead; ngtcp2_crypto_md md; uint8_t - aad[sizeof(version) + sizeof(ngtcp2_sockaddr_union) + NGTCP2_MAX_CIDLEN]; + aad[sizeof(version) + sizeof(ngtcp2_sockaddr_union) + NGTCP2_MAX_CIDLEN]; size_t aadlen; const uint8_t *rand_data; const uint8_t *ciphertext; @@ -1039,6 +1035,9 @@ int ngtcp2_crypto_verify_retry_token( keylen = ngtcp2_crypto_aead_keylen(&aead); ivlen = ngtcp2_crypto_aead_noncelen(&aead); + assert(sizeof(key) == keylen); + assert(sizeof(iv) == ivlen); + if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, rand_data, NGTCP2_CRYPTO_TOKEN_RAND_DATALEN, retry_token_info_prefix, @@ -1081,6 +1080,206 @@ int ngtcp2_crypto_verify_retry_token( return 0; } +static size_t crypto_generate_retry_token_aad2(uint8_t *dest, uint32_t version, + const ngtcp2_cid *retry_scid) { + uint8_t *p = dest; + + version = ngtcp2_htonl(version); + memcpy(p, &version, sizeof(version)); + p += sizeof(version); + memcpy(p, retry_scid->data, retry_scid->datalen); + p += retry_scid->datalen; + + return (size_t)(p - dest); +} + +static const uint8_t retry_token_info_prefix2[] = "retry_token2"; + +ngtcp2_ssize ngtcp2_crypto_generate_retry_token2( + uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts) { + uint8_t plaintext[sizeof(ngtcp2_sockaddr_union) + /* cid len = */ 1 + + NGTCP2_MAX_CIDLEN + sizeof(ngtcp2_tstamp)]; + uint8_t rand_data[NGTCP2_CRYPTO_TOKEN_RAND_DATALEN]; + uint8_t key[16]; + uint8_t iv[12]; + size_t keylen; + size_t ivlen; + ngtcp2_crypto_aead aead; + ngtcp2_crypto_md md; + ngtcp2_crypto_aead_ctx aead_ctx; + uint8_t aad[sizeof(version) + NGTCP2_MAX_CIDLEN]; + size_t aadlen; + uint8_t *p = plaintext; + ngtcp2_tstamp ts_be = ngtcp2_htonl64(ts); + int rv; + + assert((size_t)remote_addrlen <= sizeof(ngtcp2_sockaddr_union)); + + memset(plaintext, 0, sizeof(plaintext)); + + memcpy(p, remote_addr, (size_t)remote_addrlen); + p += sizeof(ngtcp2_sockaddr_union); + *p++ = (uint8_t)odcid->datalen; + memcpy(p, odcid->data, odcid->datalen); + p += NGTCP2_MAX_CIDLEN; + memcpy(p, &ts_be, sizeof(ts_be)); + + assert((size_t)(p + sizeof(ts_be) - plaintext) == sizeof(plaintext)); + + if (ngtcp2_crypto_random(rand_data, sizeof(rand_data)) != 0) { + return -1; + } + + ngtcp2_crypto_aead_aes_128_gcm(&aead); + ngtcp2_crypto_md_sha256(&md); + + keylen = ngtcp2_crypto_aead_keylen(&aead); + ivlen = ngtcp2_crypto_aead_noncelen(&aead); + + assert(sizeof(key) == keylen); + assert(sizeof(iv) == ivlen); + + if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, + rand_data, sizeof(rand_data), + retry_token_info_prefix2, + sizeof(retry_token_info_prefix2) - 1) != 0) { + return -1; + } + + aadlen = crypto_generate_retry_token_aad2(aad, version, retry_scid); + + p = token; + *p++ = NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY2; + + if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, &aead, key, ivlen) != 0) { + return -1; + } + + rv = ngtcp2_crypto_encrypt(p, &aead, &aead_ctx, plaintext, sizeof(plaintext), + iv, ivlen, aad, aadlen); + + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + if (rv != 0) { + return -1; + } + + p += sizeof(plaintext) + aead.max_overhead; + memcpy(p, rand_data, sizeof(rand_data)); + p += sizeof(rand_data); + + return p - token; +} + +int ngtcp2_crypto_verify_retry_token2( + ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen, + const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts) { + uint8_t plaintext[sizeof(ngtcp2_sockaddr_union) + /* cid len = */ 1 + + NGTCP2_MAX_CIDLEN + sizeof(ngtcp2_tstamp)]; + uint8_t key[16]; + uint8_t iv[12]; + size_t keylen; + size_t ivlen; + ngtcp2_crypto_aead_ctx aead_ctx; + ngtcp2_crypto_aead aead; + ngtcp2_crypto_md md; + uint8_t aad[sizeof(version) + NGTCP2_MAX_CIDLEN]; + size_t aadlen; + const uint8_t *rand_data; + const uint8_t *ciphertext; + size_t ciphertextlen; + size_t cil; + int rv; + ngtcp2_tstamp gen_ts; + ngtcp2_sockaddr_union addr; + size_t addrlen; + uint8_t *p; + + assert((size_t)remote_addrlen <= sizeof(ngtcp2_sockaddr_union)); + + if (tokenlen != NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN2 || + token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY2) { + return NGTCP2_CRYPTO_ERR_UNREADABLE_TOKEN; + } + + rand_data = token + tokenlen - NGTCP2_CRYPTO_TOKEN_RAND_DATALEN; + ciphertext = token + 1; + ciphertextlen = (size_t)(rand_data - ciphertext); + + ngtcp2_crypto_aead_aes_128_gcm(&aead); + ngtcp2_crypto_md_sha256(&md); + + keylen = ngtcp2_crypto_aead_keylen(&aead); + ivlen = ngtcp2_crypto_aead_noncelen(&aead); + + assert(sizeof(key) == keylen); + assert(sizeof(iv) == ivlen); + + if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, + rand_data, NGTCP2_CRYPTO_TOKEN_RAND_DATALEN, + retry_token_info_prefix2, + sizeof(retry_token_info_prefix2) - 1) != 0) { + return NGTCP2_CRYPTO_ERR_INTERNAL; + } + + aadlen = crypto_generate_retry_token_aad2(aad, version, dcid); + + if (ngtcp2_crypto_aead_ctx_decrypt_init(&aead_ctx, &aead, key, ivlen) != 0) { + return NGTCP2_CRYPTO_ERR_INTERNAL; + } + + rv = ngtcp2_crypto_decrypt(plaintext, &aead, &aead_ctx, ciphertext, + ciphertextlen, iv, ivlen, aad, aadlen); + + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + if (rv != 0) { + return NGTCP2_CRYPTO_ERR_UNREADABLE_TOKEN; + } + + p = plaintext; + + memcpy(&addr, p, sizeof(addr)); + + switch (addr.sa.sa_family) { + case NGTCP2_AF_INET: + addrlen = sizeof(ngtcp2_sockaddr_in); + break; + case NGTCP2_AF_INET6: + addrlen = sizeof(ngtcp2_sockaddr_in6); + break; + default: + return NGTCP2_CRYPTO_ERR_VERIFY_TOKEN; + } + + if (addrlen != (size_t)remote_addrlen || + memcmp(&addr, remote_addr, addrlen) != 0) { + return NGTCP2_CRYPTO_ERR_VERIFY_TOKEN; + } + + p += sizeof(addr); + cil = *p++; + + if (cil != 0 && (cil < NGTCP2_MIN_CIDLEN || cil > NGTCP2_MAX_CIDLEN)) { + return NGTCP2_CRYPTO_ERR_VERIFY_TOKEN; + } + + memcpy(&gen_ts, p + NGTCP2_MAX_CIDLEN, sizeof(gen_ts)); + + gen_ts = ngtcp2_ntohl64(gen_ts); + if (gen_ts + timeout <= ts) { + return NGTCP2_CRYPTO_ERR_VERIFY_TOKEN; + } + + ngtcp2_cid_init(odcid, p, cil); + + return 0; +} + static size_t crypto_generate_regular_token_aad(uint8_t *dest, const ngtcp2_sockaddr *sa) { const uint8_t *addr; @@ -1093,7 +1292,7 @@ static size_t crypto_generate_regular_token_aad(uint8_t *dest, break; case NGTCP2_AF_INET6: addr = - (const uint8_t *)&((const ngtcp2_sockaddr_in6 *)(void *)sa)->sin6_addr; + (const uint8_t *)&((const ngtcp2_sockaddr_in6 *)(void *)sa)->sin6_addr; addrlen = sizeof(((const ngtcp2_sockaddr_in6 *)(void *)sa)->sin6_addr); break; default: @@ -1109,13 +1308,13 @@ static size_t crypto_generate_regular_token_aad(uint8_t *dest, static const uint8_t regular_token_info_prefix[] = "regular_token"; ngtcp2_ssize ngtcp2_crypto_generate_regular_token( - uint8_t *token, const uint8_t *secret, size_t secretlen, - const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, - ngtcp2_tstamp ts) { + uint8_t *token, const uint8_t *secret, size_t secretlen, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + ngtcp2_tstamp ts) { uint8_t plaintext[sizeof(ngtcp2_tstamp)]; uint8_t rand_data[NGTCP2_CRYPTO_TOKEN_RAND_DATALEN]; - uint8_t key[32]; - uint8_t iv[32]; + uint8_t key[16]; + uint8_t iv[12]; size_t keylen; size_t ivlen; ngtcp2_crypto_aead aead; @@ -1144,8 +1343,8 @@ ngtcp2_ssize ngtcp2_crypto_generate_regular_token( keylen = ngtcp2_crypto_aead_keylen(&aead); ivlen = ngtcp2_crypto_aead_noncelen(&aead); - assert(sizeof(key) >= keylen); - assert(sizeof(iv) >= ivlen); + assert(sizeof(key) == keylen); + assert(sizeof(iv) == ivlen); if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, rand_data, sizeof(rand_data), @@ -1186,8 +1385,8 @@ int ngtcp2_crypto_verify_regular_token(const uint8_t *token, size_t tokenlen, ngtcp2_duration timeout, ngtcp2_tstamp ts) { uint8_t plaintext[sizeof(ngtcp2_tstamp)]; - uint8_t key[32]; - uint8_t iv[32]; + uint8_t key[16]; + uint8_t iv[12]; size_t keylen; size_t ivlen; ngtcp2_crypto_aead_ctx aead_ctx; @@ -1217,6 +1416,9 @@ int ngtcp2_crypto_verify_regular_token(const uint8_t *token, size_t tokenlen, keylen = ngtcp2_crypto_aead_keylen(&aead); ivlen = ngtcp2_crypto_aead_noncelen(&aead); + assert(sizeof(key) == keylen); + assert(sizeof(iv) == ivlen); + if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, rand_data, NGTCP2_CRYPTO_TOKEN_RAND_DATALEN, regular_token_info_prefix, @@ -1250,9 +1452,9 @@ int ngtcp2_crypto_verify_regular_token(const uint8_t *token, size_t tokenlen, } ngtcp2_ssize ngtcp2_crypto_write_connection_close( - uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, - const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, - size_t reasonlen) { + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, + size_t reasonlen) { uint8_t rx_secret[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; uint8_t tx_secret[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; uint8_t initial_secret[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; @@ -1273,8 +1475,8 @@ ngtcp2_ssize ngtcp2_crypto_write_connection_close( } if (ngtcp2_crypto_derive_packet_protection_key( - tx_key, tx_iv, tx_hp_key, version, &ctx.aead, &ctx.md, tx_secret, - NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + tx_key, tx_iv, tx_hp_key, version, &ctx.aead, &ctx.md, tx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { return -1; } @@ -1290,9 +1492,9 @@ ngtcp2_ssize ngtcp2_crypto_write_connection_close( } spktlen = ngtcp2_pkt_write_connection_close( - dest, destlen, version, dcid, scid, error_code, reason, reasonlen, - ngtcp2_crypto_encrypt_cb, &ctx.aead, &aead_ctx, tx_iv, - ngtcp2_crypto_hp_mask_cb, &ctx.hp, &hp_ctx); + dest, destlen, version, dcid, scid, error_code, reason, reasonlen, + ngtcp2_crypto_encrypt_cb, &ctx.aead, &aead_ctx, tx_iv, + ngtcp2_crypto_hp_mask_cb, &ctx.hp, &hp_ctx); if (spktlen < 0) { spktlen = -1; } @@ -1352,8 +1554,8 @@ int ngtcp2_crypto_client_initial_cb(ngtcp2_conn *conn, void *user_data) { (void)user_data; if (ngtcp2_crypto_derive_and_install_initial_key( - conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - ngtcp2_conn_get_client_chosen_version(conn), dcid) != 0) { + conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + ngtcp2_conn_get_client_chosen_version(conn), dcid) != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -1362,7 +1564,7 @@ int ngtcp2_crypto_client_initial_cb(ngtcp2_conn *conn, void *user_data) { } if (ngtcp2_crypto_read_write_crypto_data( - conn, NGTCP2_ENCRYPTION_LEVEL_INITIAL, NULL, 0) != 0) { + conn, NGTCP2_ENCRYPTION_LEVEL_INITIAL, NULL, 0) != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -1374,8 +1576,8 @@ int ngtcp2_crypto_recv_retry_cb(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd, (void)user_data; if (ngtcp2_crypto_derive_and_install_initial_key( - conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - ngtcp2_conn_get_client_chosen_version(conn), &hd->scid) != 0) { + conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + ngtcp2_conn_get_client_chosen_version(conn), &hd->scid) != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -1388,8 +1590,8 @@ int ngtcp2_crypto_recv_client_initial_cb(ngtcp2_conn *conn, (void)user_data; if (ngtcp2_crypto_derive_and_install_initial_key( - conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - ngtcp2_conn_get_client_chosen_version(conn), dcid) != 0) { + conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + ngtcp2_conn_get_client_chosen_version(conn), dcid) != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -1402,8 +1604,8 @@ int ngtcp2_crypto_version_negotiation_cb(ngtcp2_conn *conn, uint32_t version, (void)user_data; if (ngtcp2_crypto_derive_and_install_vneg_initial_key( - conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, version, - client_dcid) != 0) { + conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, version, + client_dcid) != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -1420,7 +1622,7 @@ void ngtcp2_crypto_delete_crypto_aead_ctx_cb(ngtcp2_conn *conn, } void ngtcp2_crypto_delete_crypto_cipher_ctx_cb( - ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data) { + ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data) { (void)conn; (void)user_data; diff --git a/deps/ngtcp2/ngtcp2/crypto/shared.h b/deps/ngtcp2/ngtcp2/crypto/shared.h index d69fd21212d7d2..34158d3d02dbc0 100644 --- a/deps/ngtcp2/ngtcp2/crypto/shared.h +++ b/deps/ngtcp2/ngtcp2/crypto/shared.h @@ -22,12 +22,12 @@ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef NGTCP2_SHARED_H -#define NGTCP2_SHARED_H +#ifndef SHARED_H +#define SHARED_H #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -286,10 +286,10 @@ int ngtcp2_crypto_set_remote_transport_params(ngtcp2_conn *conn, void *tls); * This function returns 0 if it succeeds, or -1. */ int ngtcp2_crypto_derive_and_install_initial_key( - ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, - uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp, - uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp, uint32_t version, - const ngtcp2_cid *client_dcid); + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp, + uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp, uint32_t version, + const ngtcp2_cid *client_dcid); /** * @function @@ -337,10 +337,10 @@ int ngtcp2_crypto_derive_and_install_initial_key( * This function returns 0 if it succeeds, or -1. */ int ngtcp2_crypto_derive_and_install_vneg_initial_key( - ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, - uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp, - uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp, uint32_t version, - const ngtcp2_cid *client_dcid); + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp, + uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp, uint32_t version, + const ngtcp2_cid *client_dcid); /** * @function @@ -394,4 +394,4 @@ int ngtcp2_crypto_hkdf_expand_label(uint8_t *dest, size_t destlen, const uint8_t *secret, size_t secretlen, const uint8_t *label, size_t labellen); -#endif /* NGTCP2_SHARED_H */ +#endif /* !defined(SHARED_H) */ diff --git a/deps/ngtcp2/ngtcp2/crypto/wolfssl/wolfssl.c b/deps/ngtcp2/ngtcp2/crypto/wolfssl/wolfssl.c index 2b7b5321863915..bc9d9d84a862c5 100644 --- a/deps/ngtcp2/ngtcp2/crypto/wolfssl/wolfssl.c +++ b/deps/ngtcp2/ngtcp2/crypto/wolfssl/wolfssl.c @@ -24,7 +24,7 @@ */ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -39,9 +39,9 @@ #define PRINTF_DEBUG 0 #if PRINTF_DEBUG # define DEBUG_MSG(...) fprintf(stderr, __VA_ARGS__) -#else +#else /* !PRINTF_DEBUG */ # define DEBUG_MSG(...) (void)0 -#endif +#endif /* !PRINTF_DEBUG */ ngtcp2_crypto_aead *ngtcp2_crypto_aead_aes_128_gcm(ngtcp2_crypto_aead *aead) { return ngtcp2_crypto_aead_init(aead, (void *)wolfSSL_EVP_aes_128_gcm()); @@ -65,7 +65,7 @@ ngtcp2_crypto_aead *ngtcp2_crypto_aead_init(ngtcp2_crypto_aead *aead, void *aead_native_handle) { aead->native_handle = aead_native_handle; aead->max_overhead = wolfSSL_quic_get_aead_tag_len( - (const WOLFSSL_EVP_CIPHER *)(aead_native_handle)); + (const WOLFSSL_EVP_CIPHER *)(aead_native_handle)); return aead; } @@ -124,7 +124,7 @@ ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_tls(ngtcp2_crypto_ctx *ctx, ctx->hp.native_handle = (void *)wolfSSL_quic_get_hp(ssl); ctx->max_encryption = crypto_aead_get_aead_max_encryption(aead); ctx->max_decryption_failure = - crypto_aead_get_aead_max_decryption_failure(aead); + crypto_aead_get_aead_max_decryption_failure(aead); return ctx; } @@ -203,7 +203,7 @@ int ngtcp2_crypto_cipher_ctx_encrypt_init(ngtcp2_crypto_cipher_ctx *cipher_ctx, WOLFSSL_EVP_CIPHER_CTX *actx; actx = - wolfSSL_quic_crypt_new(cipher->native_handle, key, NULL, /* encrypt */ 1); + wolfSSL_quic_crypt_new(cipher->native_handle, key, NULL, /* encrypt */ 1); if (actx == NULL) { return -1; } @@ -279,7 +279,6 @@ int ngtcp2_crypto_decrypt(uint8_t *dest, const ngtcp2_crypto_aead *aead, if (wolfSSL_quic_aead_decrypt(dest, aead_ctx->native_handle, ciphertext, ciphertextlen, nonce, aad, aadlen) != WOLFSSL_SUCCESS) { - DEBUG_MSG("WOLFSSL: decrypt FAILED\n"); return -1; } @@ -296,11 +295,11 @@ int ngtcp2_crypto_hp_mask(uint8_t *dest, const ngtcp2_crypto_cipher *hp, (void)hp; if (wolfSSL_EVP_EncryptInit_ex(actx, NULL, NULL, NULL, sample) != - WOLFSSL_SUCCESS || + WOLFSSL_SUCCESS || wolfSSL_EVP_CipherUpdate(actx, dest, &len, PLAINTEXT, sizeof(PLAINTEXT) - 1) != WOLFSSL_SUCCESS || wolfSSL_EVP_EncryptFinal_ex(actx, dest + sizeof(PLAINTEXT) - 1, &len) != - WOLFSSL_SUCCESS) { + WOLFSSL_SUCCESS) { DEBUG_MSG("WOLFSSL: hp_mask FAILED\n"); return -1; } @@ -309,11 +308,11 @@ int ngtcp2_crypto_hp_mask(uint8_t *dest, const ngtcp2_crypto_cipher *hp, } int ngtcp2_crypto_read_write_crypto_data( - ngtcp2_conn *conn, ngtcp2_encryption_level encryption_level, - const uint8_t *data, size_t datalen) { + ngtcp2_conn *conn, ngtcp2_encryption_level encryption_level, + const uint8_t *data, size_t datalen) { WOLFSSL *ssl = ngtcp2_conn_get_tls_native_handle(conn); WOLFSSL_ENCRYPTION_LEVEL level = - ngtcp2_crypto_wolfssl_from_ngtcp2_encryption_level(encryption_level); + ngtcp2_crypto_wolfssl_from_ngtcp2_encryption_level(encryption_level); int rv; int err; @@ -397,7 +396,7 @@ int ngtcp2_crypto_set_local_transport_params(void *tls, const uint8_t *buf, } ngtcp2_encryption_level ngtcp2_crypto_wolfssl_from_wolfssl_encryption_level( - WOLFSSL_ENCRYPTION_LEVEL wolfssl_level) { + WOLFSSL_ENCRYPTION_LEVEL wolfssl_level) { switch (wolfssl_level) { case wolfssl_encryption_initial: return NGTCP2_ENCRYPTION_LEVEL_INITIAL; @@ -415,7 +414,7 @@ ngtcp2_encryption_level ngtcp2_crypto_wolfssl_from_wolfssl_encryption_level( WOLFSSL_ENCRYPTION_LEVEL ngtcp2_crypto_wolfssl_from_ngtcp2_encryption_level( - ngtcp2_encryption_level encryption_level) { + ngtcp2_encryption_level encryption_level) { switch (encryption_level) { case NGTCP2_ENCRYPTION_LEVEL_INITIAL: return wolfssl_encryption_initial; @@ -458,7 +457,7 @@ static int set_encryption_secrets(WOLFSSL *ssl, ngtcp2_crypto_conn_ref *conn_ref = SSL_get_app_data(ssl); ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); ngtcp2_encryption_level level = - ngtcp2_crypto_wolfssl_from_wolfssl_encryption_level(wolfssl_level); + ngtcp2_crypto_wolfssl_from_wolfssl_encryption_level(wolfssl_level); DEBUG_MSG("WOLFSSL: set encryption secrets, level=%d, rxlen=%lu, txlen=%lu\n", wolfssl_level, rx_secret ? secretlen : 0, @@ -484,7 +483,7 @@ static int add_handshake_data(WOLFSSL *ssl, ngtcp2_crypto_conn_ref *conn_ref = SSL_get_app_data(ssl); ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); ngtcp2_encryption_level level = - ngtcp2_crypto_wolfssl_from_wolfssl_encryption_level(wolfssl_level); + ngtcp2_crypto_wolfssl_from_wolfssl_encryption_level(wolfssl_level); int rv; DEBUG_MSG("WOLFSSL: add handshake data, level=%d len=%lu\n", wolfssl_level, @@ -516,10 +515,10 @@ static int send_alert(WOLFSSL *ssl, enum wolfssl_encryption_level_t level, } static WOLFSSL_QUIC_METHOD quic_method = { - set_encryption_secrets, - add_handshake_data, - flush_flight, - send_alert, + set_encryption_secrets, + add_handshake_data, + flush_flight, + send_alert, }; static void crypto_wolfssl_configure_context(WOLFSSL_CTX *ssl_ctx) { @@ -532,7 +531,7 @@ int ngtcp2_crypto_wolfssl_configure_server_context(WOLFSSL_CTX *ssl_ctx) { crypto_wolfssl_configure_context(ssl_ctx); #if PRINTF_DEBUG wolfSSL_Debugging_ON(); -#endif +#endif /* PRINTF_DEBUG */ return 0; } @@ -541,6 +540,6 @@ int ngtcp2_crypto_wolfssl_configure_client_context(WOLFSSL_CTX *ssl_ctx) { wolfSSL_CTX_UseSessionTicket(ssl_ctx); #if PRINTF_DEBUG wolfSSL_Debugging_ON(); -#endif +#endif /* PRINTF_DEBUG */ return 0; } diff --git a/deps/ngtcp2/ngtcp2/lib/includes/ngtcp2/ngtcp2.h b/deps/ngtcp2/ngtcp2/lib/includes/ngtcp2/ngtcp2.h index 72c8142a5a5aa7..acc79be6e0b375 100644 --- a/deps/ngtcp2/ngtcp2/lib/includes/ngtcp2/ngtcp2.h +++ b/deps/ngtcp2/ngtcp2/lib/includes/ngtcp2/ngtcp2.h @@ -30,12 +30,12 @@ libcurl) */ #if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32) # define WIN32 -#endif +#endif /* (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32) */ #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable : 4324) -#endif +#endif /* defined(_MSC_VER) */ #include #if defined(_MSC_VER) && (_MSC_VER < 1800) @@ -43,9 +43,9 @@ compliant. See compiler macros and version number in https://sourceforge.net/p/predef/wiki/Compilers/ */ # include -#else /* !defined(_MSC_VER) || (_MSC_VER >= 1800) */ +#else /* !(defined(_MSC_VER) && (_MSC_VER < 1800)) */ # include -#endif /* !defined(_MSC_VER) || (_MSC_VER >= 1800) */ +#endif /* !(defined(_MSC_VER) && (_MSC_VER < 1800)) */ #include #include #include @@ -54,13 +54,13 @@ # ifdef WIN32 # ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN -# endif /* WIN32_LEAN_AND_MEAN */ +# endif /* !defined(WIN32_LEAN_AND_MEAN) */ # include -# else /* !WIN32 */ +# else /* !defined(WIN32) */ # include # include -# endif /* !WIN32 */ -#endif /* NGTCP2_USE_GENERIC_SOCKADDR */ +# endif /* !defined(WIN32) */ +#endif /* !defined(NGTCP2_USE_GENERIC_SOCKADDR) */ #include @@ -69,26 +69,26 @@ #elif defined(WIN32) # ifdef BUILDING_NGTCP2 # define NGTCP2_EXTERN __declspec(dllexport) -# else /* !BUILDING_NGTCP2 */ +# else /* !defined(BUILDING_NGTCP2) */ # define NGTCP2_EXTERN __declspec(dllimport) -# endif /* !BUILDING_NGTCP2 */ -#else /* !defined(WIN32) */ +# endif /* !defined(BUILDING_NGTCP2) */ +#else /* !(defined(NGTCP2_STATICLIB) || defined(WIN32)) */ # ifdef BUILDING_NGTCP2 # define NGTCP2_EXTERN __attribute__((visibility("default"))) -# else /* !BUILDING_NGTCP2 */ +# else /* !defined(BUILDING_NGTCP2) */ # define NGTCP2_EXTERN -# endif /* !BUILDING_NGTCP2 */ -#endif /* !defined(WIN32) */ +# endif /* !defined(BUILDING_NGTCP2) */ +#endif /* !(defined(NGTCP2_STATICLIB) || defined(WIN32)) */ #ifdef _MSC_VER # define NGTCP2_ALIGN(N) __declspec(align(N)) -#else /* !_MSC_VER */ +#else /* !defined(_MSC_VER) */ # define NGTCP2_ALIGN(N) __attribute__((aligned(N))) -#endif /* !_MSC_VER */ +#endif /* !defined(_MSC_VER) */ #ifdef __cplusplus extern "C" { -#endif +#endif /* defined(__cplusplus) */ /** * @typedef @@ -1229,11 +1229,11 @@ typedef struct ngtcp2_pkt_stateless_reset { #ifdef NGTCP2_USE_GENERIC_SOCKADDR # ifndef NGTCP2_AF_INET # error NGTCP2_AF_INET must be defined -# endif /* !NGTCP2_AF_INET */ +# endif /* !defined(NGTCP2_AF_INET) */ # ifndef NGTCP2_AF_INET6 # error NGTCP2_AF_INET6 must be defined -# endif /* !NGTCP2_AF_INET6 */ +# endif /* !defined(NGTCP2_AF_INET6) */ typedef unsigned short int ngtcp2_sa_family; typedef uint16_t ngtcp2_in_port; @@ -1267,7 +1267,7 @@ typedef struct ngtcp2_sockaddr_in6 { } ngtcp2_sockaddr_in6; typedef uint32_t ngtcp2_socklen; -#else /* !NGTCP2_USE_GENERIC_SOCKADDR */ +#else /* !defined(NGTCP2_USE_GENERIC_SOCKADDR) */ # define NGTCP2_AF_INET AF_INET # define NGTCP2_AF_INET6 AF_INET6 @@ -1303,7 +1303,7 @@ typedef struct sockaddr_in6 ngtcp2_sockaddr_in6; * uint32_t. */ typedef socklen_t ngtcp2_socklen; -#endif /* !NGTCP2_USE_GENERIC_SOCKADDR */ +#endif /* !defined(NGTCP2_USE_GENERIC_SOCKADDR) */ /** * @struct @@ -1689,7 +1689,8 @@ typedef enum ngtcp2_token_type { } ngtcp2_token_type; #define NGTCP2_SETTINGS_V1 1 -#define NGTCP2_SETTINGS_VERSION NGTCP2_SETTINGS_V1 +#define NGTCP2_SETTINGS_V2 2 +#define NGTCP2_SETTINGS_VERSION NGTCP2_SETTINGS_V2 /** * @struct @@ -1723,8 +1724,7 @@ typedef struct ngtcp2_settings { ngtcp2_printf log_printf; /** * :member:`max_tx_udp_payload_size` is the maximum size of UDP - * datagram payload that the local endpoint transmits. It is used - * by congestion controller to compute congestion window. + * datagram payload that the local endpoint transmits. */ size_t max_tx_udp_payload_size; /** @@ -1777,6 +1777,13 @@ typedef struct ngtcp2_settings { * or :member:`ngtcp2_transport_params.initial_max_stream_data_uni`, * depending on the type of stream. The window size is scaled up to * the value specified in this field. + * + * Please note that the auto-tuning is done per stream. Even if the + * previous stream gets larger window as a result of auto-tuning, + * the new stream still starts with the initial value set in + * transport parameters. This might become a bottleneck if + * congestion window of a remote server is wide open. If this + * causes an issue, do not enable auto-tuning. */ uint64_t max_stream_window; /** @@ -1873,10 +1880,29 @@ typedef struct ngtcp2_settings { */ uint8_t no_pmtud; /** - * :member:`pkt_num` is the initial packet number for each packet - * number space. It must be in range [0, INT32_MAX], inclusive. + * :member:`initial_pkt_num` is the initial packet number for each + * packet number space. It must be in range [0, INT32_MAX], + * inclusive. */ uint32_t initial_pkt_num; + /* The following fields have been added since NGTCP2_SETTINGS_V2. */ + /** + * :member:`pmtud_probes` is the array of UDP datagram payload size + * to probe during Path MTU Discovery. The discovery is done in the + * order appeared in this array. The size must be strictly larger + * than 1200, otherwise the behavior is undefined. The maximum + * value in this array should be set to + * :member:`max_tx_udp_payload_size`. If this field is not set, the + * predefined PMTUD probes are made. This field has been available + * since v1.4.0. + */ + const uint16_t *pmtud_probes; + /** + * :member:`pmtud_probeslen` is the number of elements that are + * contained in the array pointed by :member:`pmtud_probes`. This + * field has been available since v1.4.0. + */ + size_t pmtud_probeslen; } ngtcp2_settings; /** @@ -2080,8 +2106,8 @@ typedef struct ngtcp2_crypto_ctx { * Buffer is too small. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_transport_params_encode_versioned( - uint8_t *dest, size_t destlen, int transport_params_version, - const ngtcp2_transport_params *params); + uint8_t *dest, size_t destlen, int transport_params_version, + const ngtcp2_transport_params *params); /** * @function @@ -2324,8 +2350,8 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_decode_hd_short(ngtcp2_pkt_hd *dest, * :macro:`NGTCP2_MIN_STATELESS_RESET_RANDLEN`. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_stateless_reset( - uint8_t *dest, size_t destlen, const uint8_t *stateless_reset_token, - const uint8_t *rand, size_t randlen); + uint8_t *dest, size_t destlen, const uint8_t *stateless_reset_token, + const uint8_t *rand, size_t randlen); /** * @function @@ -2347,9 +2373,9 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_stateless_reset( * Buffer is too small. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_version_negotiation( - uint8_t *dest, size_t destlen, uint8_t unused_random, const uint8_t *dcid, - size_t dcidlen, const uint8_t *scid, size_t scidlen, const uint32_t *sv, - size_t nsv); + uint8_t *dest, size_t destlen, uint8_t unused_random, const uint8_t *dcid, + size_t dcidlen, const uint8_t *scid, size_t scidlen, const uint32_t *sv, + size_t nsv); /** * @struct @@ -2770,8 +2796,8 @@ typedef int (*ngtcp2_stream_reset)(ngtcp2_conn *conn, int64_t stream_id, * call return immediately. */ typedef int (*ngtcp2_acked_stream_data_offset)( - ngtcp2_conn *conn, int64_t stream_id, uint64_t offset, uint64_t datalen, - void *user_data, void *stream_user_data); + ngtcp2_conn *conn, int64_t stream_id, uint64_t offset, uint64_t datalen, + void *user_data, void *stream_user_data); /** * @functypedef @@ -2893,11 +2919,11 @@ typedef int (*ngtcp2_remove_connection_id)(ngtcp2_conn *conn, * immediately. */ typedef int (*ngtcp2_update_key)( - ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, - ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, - ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, - const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, - size_t secretlen, void *user_data); + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen, void *user_data); /** * @macrosection @@ -3024,8 +3050,8 @@ typedef enum ngtcp2_connection_id_status_type { * immediately. */ typedef int (*ngtcp2_connection_id_status)( - ngtcp2_conn *conn, ngtcp2_connection_id_status_type type, uint64_t seq, - const ngtcp2_cid *cid, const uint8_t *token, void *user_data); + ngtcp2_conn *conn, ngtcp2_connection_id_status_type type, uint64_t seq, + const ngtcp2_cid *cid, const uint8_t *token, void *user_data); /** * @functypedef @@ -3064,7 +3090,7 @@ typedef void (*ngtcp2_delete_crypto_aead_ctx)(ngtcp2_conn *conn, * `. */ typedef void (*ngtcp2_delete_crypto_cipher_ctx)( - ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data); + ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data); /** * @macrosection @@ -3510,12 +3536,12 @@ typedef struct ngtcp2_callbacks { * Callback function failed. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_connection_close( - uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, - const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, - size_t reasonlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, - const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, - ngtcp2_hp_mask hp_mask, const ngtcp2_crypto_cipher *hp, - const ngtcp2_crypto_cipher_ctx *hp_ctx); + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, + ngtcp2_hp_mask hp_mask, const ngtcp2_crypto_cipher *hp, + const ngtcp2_crypto_cipher_ctx *hp_ctx); /** * @function @@ -3542,10 +3568,10 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_connection_close( * :macro:`NGTCP2_MIN_INITIAL_DCIDLEN`. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_retry( - uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, - const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token, - size_t tokenlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, - const ngtcp2_crypto_aead_ctx *aead_ctx); + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token, + size_t tokenlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx); /** * @function @@ -3594,12 +3620,12 @@ NGTCP2_EXTERN int ngtcp2_accept(ngtcp2_pkt_hd *dest, const uint8_t *pkt, * Out of memory. */ NGTCP2_EXTERN int ngtcp2_conn_client_new_versioned( - ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, - const ngtcp2_path *path, uint32_t client_chosen_version, - int callbacks_version, const ngtcp2_callbacks *callbacks, - int settings_version, const ngtcp2_settings *settings, - int transport_params_version, const ngtcp2_transport_params *params, - const ngtcp2_mem *mem, void *user_data); + ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + const ngtcp2_path *path, uint32_t client_chosen_version, + int callbacks_version, const ngtcp2_callbacks *callbacks, + int settings_version, const ngtcp2_settings *settings, + int transport_params_version, const ngtcp2_transport_params *params, + const ngtcp2_mem *mem, void *user_data); /** * @function @@ -3629,12 +3655,12 @@ NGTCP2_EXTERN int ngtcp2_conn_client_new_versioned( * Out of memory. */ NGTCP2_EXTERN int ngtcp2_conn_server_new_versioned( - ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, - const ngtcp2_path *path, uint32_t client_chosen_version, - int callbacks_version, const ngtcp2_callbacks *callbacks, - int settings_version, const ngtcp2_settings *settings, - int transport_params_version, const ngtcp2_transport_params *params, - const ngtcp2_mem *mem, void *user_data); + ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + const ngtcp2_path *path, uint32_t client_chosen_version, + int callbacks_version, const ngtcp2_callbacks *callbacks, + int settings_version, const ngtcp2_settings *settings, + int transport_params_version, const ngtcp2_transport_params *params, + const ngtcp2_mem *mem, void *user_data); /** * @function @@ -3698,8 +3724,8 @@ ngtcp2_conn_read_pkt_versioned(ngtcp2_conn *conn, const ngtcp2_path *path, * and :macro:`NGTCP2_WRITE_STREAM_FLAG_NONE` as flags. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_pkt_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_tstamp ts); + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_tstamp ts); /** * @function @@ -3755,10 +3781,10 @@ NGTCP2_EXTERN int ngtcp2_conn_get_handshake_completed(ngtcp2_conn *conn); * Out of memory. */ NGTCP2_EXTERN int ngtcp2_conn_install_initial_key( - ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *rx_aead_ctx, - const uint8_t *rx_iv, const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, - const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, - const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen); + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *rx_aead_ctx, + const uint8_t *rx_iv, const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, + const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, + const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen); /** * @function @@ -3790,11 +3816,11 @@ NGTCP2_EXTERN int ngtcp2_conn_install_initial_key( * Out of memory. */ NGTCP2_EXTERN int ngtcp2_conn_install_vneg_initial_key( - ngtcp2_conn *conn, uint32_t version, - const ngtcp2_crypto_aead_ctx *rx_aead_ctx, const uint8_t *rx_iv, - const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, - const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, - const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen); + ngtcp2_conn *conn, uint32_t version, + const ngtcp2_crypto_aead_ctx *rx_aead_ctx, const uint8_t *rx_iv, + const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, + const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, + const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen); /** * @function @@ -3821,8 +3847,8 @@ NGTCP2_EXTERN int ngtcp2_conn_install_vneg_initial_key( * Out of memory. */ NGTCP2_EXTERN int ngtcp2_conn_install_rx_handshake_key( - ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, - const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx); + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, + size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx); /** * @function @@ -3849,8 +3875,8 @@ NGTCP2_EXTERN int ngtcp2_conn_install_rx_handshake_key( * Out of memory. */ NGTCP2_EXTERN int ngtcp2_conn_install_tx_handshake_key( - ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, - const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx); + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, + size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx); /** * @function @@ -3876,8 +3902,8 @@ NGTCP2_EXTERN int ngtcp2_conn_install_tx_handshake_key( * Out of memory. */ NGTCP2_EXTERN int ngtcp2_conn_install_0rtt_key( - ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, - const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx); + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, + size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx); /** * @function @@ -3906,9 +3932,9 @@ NGTCP2_EXTERN int ngtcp2_conn_install_0rtt_key( * Out of memory. */ NGTCP2_EXTERN int ngtcp2_conn_install_rx_key( - ngtcp2_conn *conn, const uint8_t *secret, size_t secretlen, - const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, size_t ivlen, - const ngtcp2_crypto_cipher_ctx *hp_ctx); + ngtcp2_conn *conn, const uint8_t *secret, size_t secretlen, + const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, size_t ivlen, + const ngtcp2_crypto_cipher_ctx *hp_ctx); /** * @function @@ -3937,9 +3963,9 @@ NGTCP2_EXTERN int ngtcp2_conn_install_rx_key( * Out of memory. */ NGTCP2_EXTERN int ngtcp2_conn_install_tx_key( - ngtcp2_conn *conn, const uint8_t *secret, size_t secretlen, - const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, size_t ivlen, - const ngtcp2_crypto_cipher_ctx *hp_ctx); + ngtcp2_conn *conn, const uint8_t *secret, size_t secretlen, + const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, size_t ivlen, + const ngtcp2_crypto_cipher_ctx *hp_ctx); /** * @function @@ -4063,7 +4089,7 @@ NGTCP2_EXTERN ngtcp2_duration ngtcp2_conn_get_pto(ngtcp2_conn *conn); * User callback failed */ NGTCP2_EXTERN int ngtcp2_conn_decode_and_set_remote_transport_params( - ngtcp2_conn *conn, const uint8_t *data, size_t datalen); + ngtcp2_conn *conn, const uint8_t *data, size_t datalen); /** * @function @@ -4148,7 +4174,7 @@ ngtcp2_ssize ngtcp2_conn_encode_0rtt_transport_params(ngtcp2_conn *conn, * The input is malformed. */ NGTCP2_EXTERN int ngtcp2_conn_decode_and_set_0rtt_transport_params( - ngtcp2_conn *conn, const uint8_t *data, size_t datalen); + ngtcp2_conn *conn, const uint8_t *data, size_t datalen); /** * @function @@ -4168,8 +4194,8 @@ NGTCP2_EXTERN int ngtcp2_conn_decode_and_set_0rtt_transport_params( * `ngtcp2_conn_install_tx_handshake_key` has been called. */ NGTCP2_EXTERN int ngtcp2_conn_set_local_transport_params_versioned( - ngtcp2_conn *conn, int transport_params_version, - const ngtcp2_transport_params *params); + ngtcp2_conn *conn, int transport_params_version, + const ngtcp2_transport_params *params); /** * @function @@ -4193,7 +4219,7 @@ ngtcp2_conn_get_local_transport_params(ngtcp2_conn *conn); * Buffer is too small. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_encode_local_transport_params( - ngtcp2_conn *conn, uint8_t *dest, size_t destlen); + ngtcp2_conn *conn, uint8_t *dest, size_t destlen); /** * @function @@ -4374,10 +4400,10 @@ NGTCP2_EXTERN int ngtcp2_conn_shutdown_stream_read(ngtcp2_conn *conn, * conveniently accepts a single buffer. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_stream_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, - uint32_t flags, int64_t stream_id, const uint8_t *data, size_t datalen, - ngtcp2_tstamp ts); + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, + uint32_t flags, int64_t stream_id, const uint8_t *data, size_t datalen, + ngtcp2_tstamp ts); /** * @function @@ -4388,7 +4414,8 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_stream_versioned( * handshake as well. * * |destlen| should be at least - * :member:`ngtcp2_settings.max_tx_udp_payload_size`. + * :member:`ngtcp2_settings.max_tx_udp_payload_size`. It must be at + * least :macro:`NGTCP2_MAX_UDP_PAYLOAD_SIZE`. * * Specifying -1 to |stream_id| means no new stream data to send. * @@ -4418,8 +4445,10 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_stream_versioned( * In that case, |*pdatalen| would be -1 if |pdatalen| is not * ``NULL``. * - * If |flags| & :macro:`NGTCP2_WRITE_STREAM_FLAG_FIN` is nonzero, and - * 0 length STREAM frame is successfully serialized, |*pdatalen| would + * Empty data is treated specially, and it is only accepted if no + * data, including the empty data, is submitted to a stream or + * :macro:`NGTCP2_WRITE_STREAM_FLAG_FIN` is set in |flags|. If 0 + * length STREAM frame is successfully serialized, |*pdatalen| would * be 0. * * The number of data encoded in STREAM frame is stored in |*pdatalen| @@ -4525,10 +4554,10 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_stream_versioned( * sending it makes QUIC connection enter the closing state. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_writev_stream_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, - uint32_t flags, int64_t stream_id, const ngtcp2_vec *datav, size_t datavcnt, - ngtcp2_tstamp ts); + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, + uint32_t flags, int64_t stream_id, const ngtcp2_vec *datav, size_t datavcnt, + ngtcp2_tstamp ts); /** * @macrosection @@ -4559,10 +4588,10 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_writev_stream_versioned( * conveniently accepts a single buffer. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_datagram_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, - uint32_t flags, uint64_t dgram_id, const uint8_t *data, size_t datalen, - ngtcp2_tstamp ts); + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, + uint32_t flags, uint64_t dgram_id, const uint8_t *data, size_t datalen, + ngtcp2_tstamp ts); /** * @function @@ -4573,7 +4602,8 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_datagram_versioned( * as well. * * |destlen| should be at least - * :member:`ngtcp2_settings.max_tx_udp_payload_size`. + * :member:`ngtcp2_settings.max_tx_udp_payload_size`. It must be at + * least :macro:`NGTCP2_MAX_UDP_PAYLOAD_SIZE`. * * For |path| and |pi| parameters, refer to * `ngtcp2_conn_writev_stream`. @@ -4655,10 +4685,10 @@ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_datagram_versioned( * sending it makes QUIC connection enter the closing state. */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_writev_datagram_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, - uint32_t flags, uint64_t dgram_id, const ngtcp2_vec *datav, size_t datavcnt, - ngtcp2_tstamp ts); + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, + uint32_t flags, uint64_t dgram_id, const ngtcp2_vec *datav, size_t datavcnt, + ngtcp2_tstamp ts); /** * @function @@ -4992,7 +5022,7 @@ ngtcp2_conn_get_path_max_tx_udp_payload_size(ngtcp2_conn *conn); * Out of memory */ NGTCP2_EXTERN int ngtcp2_conn_initiate_immediate_migration( - ngtcp2_conn *conn, const ngtcp2_path *path, ngtcp2_tstamp ts); + ngtcp2_conn *conn, const ngtcp2_path *path, ngtcp2_tstamp ts); /** * @function @@ -5193,7 +5223,19 @@ typedef enum ngtcp2_ccerr_type { * transport error, and it indicates that connection is closed * because of idle timeout. */ - NGTCP2_CCERR_TYPE_IDLE_CLOSE + NGTCP2_CCERR_TYPE_IDLE_CLOSE, + /** + * :enum:`NGTCP2_CCERR_TYPE_DROP_CONN` is a special case of QUIC + * transport error, and it indicates that connection should be + * dropped without sending a CONNECTION_CLOSE frame. + */ + NGTCP2_CCERR_TYPE_DROP_CONN, + /** + * :enum:`NGTCP2_CCERR_TYPE_RETRY` is a special case of QUIC + * transport error, and it indicates that RETRY packet should be + * sent to a client. + */ + NGTCP2_CCERR_TYPE_RETRY } ngtcp2_ccerr_type; /** @@ -5284,6 +5326,18 @@ NGTCP2_EXTERN void ngtcp2_ccerr_set_transport_error(ngtcp2_ccerr *ccerr, * :member:`ccerr->error_code ` to * :macro:`NGTCP2_NO_ERROR`. * + * If |liberr| is :macro:`NGTCP2_ERR_DROP_CONN`, :member:`ccerr->type + * ` is set to + * :enum:`ngtcp2_ccerr_type.NGTCP2_CCERR_TYPE_DROP_CONN`, and + * :member:`ccerr->error_code ` to + * :macro:`NGTCP2_NO_ERROR`. + * + * If |liberr| is :macro:`NGTCP2_ERR_RETRY`, :member:`ccerr->type + * ` is set to + * :enum:`ngtcp2_ccerr_type.NGTCP2_CCERR_TYPE_RETRY`, and + * :member:`ccerr->error_type ` to + * :macro:`NGTCP2_NO_ERROR`. + * * Otherwise, :member:`ccerr->type ` is set to * :enum:`ngtcp2_ccerr_type.NGTCP2_CCERR_TYPE_TRANSPORT`, and * :member:`ccerr->error_code ` is set to an @@ -5380,9 +5434,9 @@ NGTCP2_EXTERN void ngtcp2_ccerr_set_application_error(ngtcp2_ccerr *ccerr, * User callback failed */ NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_connection_close_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, - const ngtcp2_ccerr *ccerr, ngtcp2_tstamp ts); + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, const ngtcp2_ccerr *ccerr, + ngtcp2_tstamp ts); /** * @function @@ -5734,8 +5788,8 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, #define ngtcp2_conn_write_stream(CONN, PATH, PI, DEST, DESTLEN, PDATALEN, \ FLAGS, STREAM_ID, DATA, DATALEN, TS) \ ngtcp2_conn_write_stream_versioned( \ - (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ - (PDATALEN), (FLAGS), (STREAM_ID), (DATA), (DATALEN), (TS)) + (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ + (PDATALEN), (FLAGS), (STREAM_ID), (DATA), (DATALEN), (TS)) /* * `ngtcp2_conn_writev_stream` is a wrapper around @@ -5745,8 +5799,8 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, #define ngtcp2_conn_writev_stream(CONN, PATH, PI, DEST, DESTLEN, PDATALEN, \ FLAGS, STREAM_ID, DATAV, DATAVCNT, TS) \ ngtcp2_conn_writev_stream_versioned( \ - (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ - (PDATALEN), (FLAGS), (STREAM_ID), (DATAV), (DATAVCNT), (TS)) + (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ + (PDATALEN), (FLAGS), (STREAM_ID), (DATAV), (DATAVCNT), (TS)) /* * `ngtcp2_conn_write_datagram` is a wrapper around @@ -5756,8 +5810,8 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, #define ngtcp2_conn_write_datagram(CONN, PATH, PI, DEST, DESTLEN, PACCEPTED, \ FLAGS, DGRAM_ID, DATA, DATALEN, TS) \ ngtcp2_conn_write_datagram_versioned( \ - (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ - (PACCEPTED), (FLAGS), (DGRAM_ID), (DATA), (DATALEN), (TS)) + (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ + (PACCEPTED), (FLAGS), (DGRAM_ID), (DATA), (DATALEN), (TS)) /* * `ngtcp2_conn_writev_datagram` is a wrapper around @@ -5767,8 +5821,8 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, #define ngtcp2_conn_writev_datagram(CONN, PATH, PI, DEST, DESTLEN, PACCEPTED, \ FLAGS, DGRAM_ID, DATAV, DATAVCNT, TS) \ ngtcp2_conn_writev_datagram_versioned( \ - (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ - (PACCEPTED), (FLAGS), (DGRAM_ID), (DATAV), (DATAVCNT), (TS)) + (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ + (PACCEPTED), (FLAGS), (DGRAM_ID), (DATAV), (DATAVCNT), (TS)) /* * `ngtcp2_conn_write_connection_close` is a wrapper around @@ -5778,8 +5832,8 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, #define ngtcp2_conn_write_connection_close(CONN, PATH, PI, DEST, DESTLEN, \ CCERR, TS) \ ngtcp2_conn_write_connection_close_versioned( \ - (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ - (CCERR), (TS)) + (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), (CCERR), \ + (TS)) /* * `ngtcp2_transport_params_encode` is a wrapper around @@ -5788,7 +5842,7 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, */ #define ngtcp2_transport_params_encode(DEST, DESTLEN, PARAMS) \ ngtcp2_transport_params_encode_versioned( \ - (DEST), (DESTLEN), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS)) + (DEST), (DESTLEN), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS)) /* * `ngtcp2_transport_params_decode` is a wrapper around @@ -5807,9 +5861,9 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, #define ngtcp2_conn_client_new(PCONN, DCID, SCID, PATH, VERSION, CALLBACKS, \ SETTINGS, PARAMS, MEM, USER_DATA) \ ngtcp2_conn_client_new_versioned( \ - (PCONN), (DCID), (SCID), (PATH), (VERSION), NGTCP2_CALLBACKS_VERSION, \ - (CALLBACKS), NGTCP2_SETTINGS_VERSION, (SETTINGS), \ - NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (MEM), (USER_DATA)) + (PCONN), (DCID), (SCID), (PATH), (VERSION), NGTCP2_CALLBACKS_VERSION, \ + (CALLBACKS), NGTCP2_SETTINGS_VERSION, (SETTINGS), \ + NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (MEM), (USER_DATA)) /* * `ngtcp2_conn_server_new` is a wrapper around @@ -5819,9 +5873,9 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, #define ngtcp2_conn_server_new(PCONN, DCID, SCID, PATH, VERSION, CALLBACKS, \ SETTINGS, PARAMS, MEM, USER_DATA) \ ngtcp2_conn_server_new_versioned( \ - (PCONN), (DCID), (SCID), (PATH), (VERSION), NGTCP2_CALLBACKS_VERSION, \ - (CALLBACKS), NGTCP2_SETTINGS_VERSION, (SETTINGS), \ - NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (MEM), (USER_DATA)) + (PCONN), (DCID), (SCID), (PATH), (VERSION), NGTCP2_CALLBACKS_VERSION, \ + (CALLBACKS), NGTCP2_SETTINGS_VERSION, (SETTINGS), \ + NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (MEM), (USER_DATA)) /* * `ngtcp2_conn_set_local_transport_params` is a wrapper around @@ -5830,7 +5884,7 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, */ #define ngtcp2_conn_set_local_transport_params(CONN, PARAMS) \ ngtcp2_conn_set_local_transport_params_versioned( \ - (CONN), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS)) + (CONN), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS)) /* * `ngtcp2_transport_params_default` is a wrapper around @@ -5859,10 +5913,10 @@ NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, #ifdef _MSC_VER # pragma warning(pop) -#endif +#endif /* defined(_MSC_VER) */ #ifdef __cplusplus } -#endif +#endif /* defined(__cplusplus) */ -#endif /* NGTCP2_H */ +#endif /* !defined(NGTCP2_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/includes/ngtcp2/version.h b/deps/ngtcp2/ngtcp2/lib/includes/ngtcp2/version.h index 801c6cb2681386..e9e7f9bf86e742 100644 --- a/deps/ngtcp2/ngtcp2/lib/includes/ngtcp2/version.h +++ b/deps/ngtcp2/ngtcp2/lib/includes/ngtcp2/version.h @@ -22,8 +22,8 @@ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef VERSION_H -#define VERSION_H +#ifndef NGTCP2_VERSION_H +#define NGTCP2_VERSION_H /** * @macrosection @@ -36,7 +36,7 @@ * * Version number of the ngtcp2 library release. */ -#define NGTCP2_VERSION "1.3.0" +#define NGTCP2_VERSION "1.9.1" /** * @macro @@ -46,6 +46,6 @@ * number, 8 bits for minor and 8 bits for patch. Version 1.2.3 * becomes 0x010203. */ -#define NGTCP2_VERSION_NUM 0x010300 +#define NGTCP2_VERSION_NUM 0x010901 -#endif /* VERSION_H */ +#endif /* !defined(NGTCP2_VERSION_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_acktr.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_acktr.c index d4778d66accf31..0e5bfe069e9fab 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_acktr.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_acktr.c @@ -29,7 +29,7 @@ #include "ngtcp2_macro.h" #include "ngtcp2_tstamp.h" -ngtcp2_objalloc_def(acktr_entry, ngtcp2_acktr_entry, oplent); +ngtcp2_objalloc_def(acktr_entry, ngtcp2_acktr_entry, oplent) static void acktr_entry_init(ngtcp2_acktr_entry *ent, int64_t pkt_num, ngtcp2_tstamp tstamp) { @@ -56,41 +56,26 @@ void ngtcp2_acktr_entry_objalloc_del(ngtcp2_acktr_entry *ent, ngtcp2_objalloc_acktr_entry_release(objalloc, ent); } -static int greater(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) { - return *(int64_t *)lhs > *(int64_t *)rhs; -} - -int ngtcp2_acktr_init(ngtcp2_acktr *acktr, ngtcp2_log *log, - const ngtcp2_mem *mem) { - int rv; +void ngtcp2_acktr_init(ngtcp2_acktr *acktr, ngtcp2_log *log, + const ngtcp2_mem *mem) { + ngtcp2_objalloc_acktr_entry_init(&acktr->objalloc, NGTCP2_ACKTR_MAX_ENT + 1, + mem); - ngtcp2_objalloc_acktr_entry_init(&acktr->objalloc, 32, mem); + ngtcp2_static_ringbuf_acks_init(&acktr->acks); - rv = ngtcp2_ringbuf_init(&acktr->acks, 32, sizeof(ngtcp2_acktr_ack_entry), - mem); - if (rv != 0) { - goto fail_acks_init; - } - - ngtcp2_ksl_init(&acktr->ents, greater, sizeof(int64_t), mem); + ngtcp2_ksl_init(&acktr->ents, ngtcp2_ksl_int64_greater, + ngtcp2_ksl_int64_greater_search, sizeof(int64_t), mem); acktr->log = log; - acktr->mem = mem; acktr->flags = NGTCP2_ACKTR_FLAG_NONE; acktr->first_unacked_ts = UINT64_MAX; acktr->rx_npkt = 0; - - return 0; - -fail_acks_init: - ngtcp2_objalloc_free(&acktr->objalloc); - return rv; } void ngtcp2_acktr_free(ngtcp2_acktr *acktr) { #ifdef NOMEMPOOL ngtcp2_ksl_it it; -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ if (acktr == NULL) { return; @@ -101,12 +86,10 @@ void ngtcp2_acktr_free(ngtcp2_acktr *acktr) { ngtcp2_ksl_it_next(&it)) { ngtcp2_acktr_entry_objalloc_del(ngtcp2_ksl_it_get(&it), &acktr->objalloc); } -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ ngtcp2_ksl_free(&acktr->ents); - ngtcp2_ringbuf_free(&acktr->acks); - ngtcp2_objalloc_free(&acktr->objalloc); } @@ -213,11 +196,11 @@ void ngtcp2_acktr_forget(ngtcp2_acktr *acktr, ngtcp2_acktr_entry *ent) { } } -ngtcp2_ksl_it ngtcp2_acktr_get(ngtcp2_acktr *acktr) { +ngtcp2_ksl_it ngtcp2_acktr_get(const ngtcp2_acktr *acktr) { return ngtcp2_ksl_begin(&acktr->ents); } -int ngtcp2_acktr_empty(ngtcp2_acktr *acktr) { +int ngtcp2_acktr_empty(const ngtcp2_acktr *acktr) { ngtcp2_ksl_it it = ngtcp2_ksl_begin(&acktr->ents); return ngtcp2_ksl_it_end(&it); } @@ -225,7 +208,7 @@ int ngtcp2_acktr_empty(ngtcp2_acktr *acktr) { ngtcp2_acktr_ack_entry *ngtcp2_acktr_add_ack(ngtcp2_acktr *acktr, int64_t pkt_num, int64_t largest_ack) { - ngtcp2_acktr_ack_entry *ent = ngtcp2_ringbuf_push_front(&acktr->acks); + ngtcp2_acktr_ack_entry *ent = ngtcp2_ringbuf_push_front(&acktr->acks.rb); ent->largest_ack = largest_ack; ent->pkt_num = pkt_num; @@ -266,8 +249,10 @@ static void acktr_on_ack(ngtcp2_acktr *acktr, ngtcp2_ringbuf *rb, ngtcp2_ksl_it_prev(&it); ent = ngtcp2_ksl_it_get(&it); - if (ent->pkt_num > ack_ent->largest_ack && - ack_ent->largest_ack >= ent->pkt_num - (int64_t)(ent->len - 1)) { + + assert(ent->pkt_num > ack_ent->largest_ack); + + if (ack_ent->largest_ack + (int64_t)ent->len > ent->pkt_num) { ent->len = (size_t)(ent->pkt_num - ack_ent->largest_ack); } } @@ -279,7 +264,7 @@ void ngtcp2_acktr_recv_ack(ngtcp2_acktr *acktr, const ngtcp2_ack *fr) { ngtcp2_acktr_ack_entry *ent; int64_t largest_ack = fr->largest_ack, min_ack; size_t i, j; - ngtcp2_ringbuf *rb = &acktr->acks; + ngtcp2_ringbuf *rb = &acktr->acks.rb; size_t nacks = ngtcp2_ringbuf_len(rb); /* Assume that ngtcp2_pkt_validate_ack(fr) returns 0 */ @@ -295,7 +280,7 @@ void ngtcp2_acktr_recv_ack(ngtcp2_acktr *acktr, const ngtcp2_ack *fr) { min_ack = largest_ack - (int64_t)fr->first_ack_range; - if (min_ack <= ent->pkt_num && ent->pkt_num <= largest_ack) { + if (min_ack <= ent->pkt_num) { acktr_on_ack(acktr, rb, j); return; } @@ -306,8 +291,7 @@ void ngtcp2_acktr_recv_ack(ngtcp2_acktr *acktr, const ngtcp2_ack *fr) { for (;;) { if (ent->pkt_num > largest_ack) { - ++j; - if (j == nacks) { + if (++j == nacks) { return; } ent = ngtcp2_ringbuf_get(rb, j); @@ -330,7 +314,7 @@ void ngtcp2_acktr_commit_ack(ngtcp2_acktr *acktr) { acktr->rx_npkt = 0; } -int ngtcp2_acktr_require_active_ack(ngtcp2_acktr *acktr, +int ngtcp2_acktr_require_active_ack(const ngtcp2_acktr *acktr, ngtcp2_duration max_ack_delay, ngtcp2_tstamp ts) { return ngtcp2_tstamp_elapsed(acktr->first_unacked_ts, max_ack_delay, ts); diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_acktr.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_acktr.h index 809fb692adc3c8..16aee42f275f67 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_acktr.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_acktr.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -39,7 +39,7 @@ /* NGTCP2_ACKTR_MAX_ENT is the maximum number of ngtcp2_acktr_entry which ngtcp2_acktr stores. */ -#define NGTCP2_ACKTR_MAX_ENT 1024 +#define NGTCP2_ACKTR_MAX_ENT (NGTCP2_MAX_ACK_RANGES + 1) typedef struct ngtcp2_log ngtcp2_log; @@ -65,7 +65,7 @@ typedef struct ngtcp2_acktr_entry { }; } ngtcp2_acktr_entry; -ngtcp2_objalloc_decl(acktr_entry, ngtcp2_acktr_entry, oplent); +ngtcp2_objalloc_decl(acktr_entry, ngtcp2_acktr_entry, oplent) /* * ngtcp2_acktr_entry_objalloc_new allocates memory for ent, and @@ -108,17 +108,18 @@ typedef struct ngtcp2_acktr_ack_entry { expired and canceled. */ #define NGTCP2_ACKTR_FLAG_CANCEL_TIMER 0x0100u +ngtcp2_static_ringbuf_def(acks, 32, sizeof(ngtcp2_acktr_ack_entry)) + /* * ngtcp2_acktr tracks received packets which we have to send ack. */ typedef struct ngtcp2_acktr { ngtcp2_objalloc objalloc; - ngtcp2_ringbuf acks; + ngtcp2_static_ringbuf_acks acks; /* ents includes ngtcp2_acktr_entry sorted by decreasing order of packet number. */ ngtcp2_ksl ents; ngtcp2_log *log; - const ngtcp2_mem *mem; /* flags is bitwise OR of zero, or more of NGTCP2_ACKTR_FLAG_*. */ uint16_t flags; /* first_unacked_ts is timestamp when ngtcp2_acktr_entry is added @@ -131,15 +132,9 @@ typedef struct ngtcp2_acktr { /* * ngtcp2_acktr_init initializes |acktr|. - * - * This function returns 0 if it succeeds, or one of the following - * negative error codes: - * - * NGTCP2_ERR_NOMEM - * Out of memory. */ -int ngtcp2_acktr_init(ngtcp2_acktr *acktr, ngtcp2_log *log, - const ngtcp2_mem *mem); +void ngtcp2_acktr_init(ngtcp2_acktr *acktr, ngtcp2_log *log, + const ngtcp2_mem *mem); /* * ngtcp2_acktr_free frees resources allocated for |acktr|. It frees @@ -174,13 +169,13 @@ void ngtcp2_acktr_forget(ngtcp2_acktr *acktr, ngtcp2_acktr_entry *ent); * has the largest packet number to be acked. If there is no entry, * returned value satisfies ngtcp2_ksl_it_end(&it) != 0. */ -ngtcp2_ksl_it ngtcp2_acktr_get(ngtcp2_acktr *acktr); +ngtcp2_ksl_it ngtcp2_acktr_get(const ngtcp2_acktr *acktr); /* * ngtcp2_acktr_empty returns nonzero if it has no packet to * acknowledge. */ -int ngtcp2_acktr_empty(ngtcp2_acktr *acktr); +int ngtcp2_acktr_empty(const ngtcp2_acktr *acktr); /* * ngtcp2_acktr_add_ack records outgoing ACK frame whose largest @@ -208,7 +203,7 @@ void ngtcp2_acktr_commit_ack(ngtcp2_acktr *acktr); * ngtcp2_acktr_require_active_ack returns nonzero if ACK frame should * be generated actively. */ -int ngtcp2_acktr_require_active_ack(ngtcp2_acktr *acktr, +int ngtcp2_acktr_require_active_ack(const ngtcp2_acktr *acktr, ngtcp2_duration max_ack_delay, ngtcp2_tstamp ts); @@ -218,4 +213,4 @@ int ngtcp2_acktr_require_active_ack(ngtcp2_acktr *acktr, */ void ngtcp2_acktr_immediate_ack(ngtcp2_acktr *acktr); -#endif /* NGTCP2_ACKTR_H */ +#endif /* !defined(NGTCP2_ACKTR_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_addr.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_addr.h index 8e3a9f591d9977..65ee7cd9f3006b 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_addr.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_addr.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -68,4 +68,4 @@ uint32_t ngtcp2_addr_compare(const ngtcp2_addr *a, const ngtcp2_addr *b); */ int ngtcp2_addr_empty(const ngtcp2_addr *addr); -#endif /* NGTCP2_ADDR_H */ +#endif /* !defined(NGTCP2_ADDR_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_balloc.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_balloc.c index 5cc39ee3b03da4..4a6797689fcc01 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_balloc.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_balloc.c @@ -66,7 +66,7 @@ int ngtcp2_balloc_get(ngtcp2_balloc *balloc, void **pbuf, size_t n) { if (ngtcp2_buf_left(&balloc->buf) < n) { p = ngtcp2_mem_malloc(balloc->mem, - sizeof(ngtcp2_memblock_hd) + 0x10u + balloc->blklen); + sizeof(ngtcp2_memblock_hd) + 0x8u + balloc->blklen); if (p == NULL) { return NGTCP2_ERR_NOMEM; } @@ -75,10 +75,10 @@ int ngtcp2_balloc_get(ngtcp2_balloc *balloc, void **pbuf, size_t n) { hd->next = balloc->head; balloc->head = hd; ngtcp2_buf_init( - &balloc->buf, - (uint8_t *)(((uintptr_t)p + sizeof(ngtcp2_memblock_hd) + 0xfu) & - ~(uintptr_t)0xfu), - balloc->blklen); + &balloc->buf, + (uint8_t *)(((uintptr_t)p + sizeof(ngtcp2_memblock_hd) + 0xfu) & + ~(uintptr_t)0xfu), + balloc->blklen); } assert(((uintptr_t)balloc->buf.last & 0xfu) == 0); diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_balloc.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_balloc.h index 1fb16325d9953d..c0e2a3f7562968 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_balloc.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_balloc.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -39,7 +39,10 @@ typedef struct ngtcp2_memblock_hd ngtcp2_memblock_hd; * ngtcp2_memblock_hd is the header of memory block. */ struct ngtcp2_memblock_hd { - ngtcp2_memblock_hd *next; + union { + ngtcp2_memblock_hd *next; + uint64_t pad; + }; }; /* @@ -60,7 +63,7 @@ typedef struct ngtcp2_balloc { /* * ngtcp2_balloc_init initializes |balloc| with |blklen| which is the - * size of memory block. + * size of memory block. |blklen| must be divisible by 16. */ void ngtcp2_balloc_init(ngtcp2_balloc *balloc, size_t blklen, const ngtcp2_mem *mem); @@ -88,4 +91,4 @@ int ngtcp2_balloc_get(ngtcp2_balloc *balloc, void **pbuf, size_t n); */ void ngtcp2_balloc_clear(ngtcp2_balloc *balloc); -#endif /* NGTCP2_BALLOC_H */ +#endif /* !defined(NGTCP2_BALLOC_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_bbr.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_bbr.c index 27c4667c03924a..8777ca4c8a1632 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_bbr.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_bbr.c @@ -39,8 +39,9 @@ #define NGTCP2_BBR_EXTRA_ACKED_FILTERLEN 10 #define NGTCP2_BBR_STARTUP_PACING_GAIN_H 277 +#define NGTCP2_BBR_DRAIN_PACING_GAIN_H 35 -#define NGTCP2_BBR_STARTUP_CWND_GAIN_H 200 +#define NGTCP2_BBR_DEFAULT_CWND_GAIN_H 200 #define NGTCP2_BBR_PROBE_RTT_CWND_GAIN_H 50 @@ -72,7 +73,7 @@ static void bbr_reset_lower_bounds(ngtcp2_cc_bbr *bbr); static void bbr_init_round_counting(ngtcp2_cc_bbr *bbr); -static void bbr_init_full_pipe(ngtcp2_cc_bbr *bbr); +static void bbr_reset_full_bw(ngtcp2_cc_bbr *bbr); static void bbr_init_pacing_rate(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat); @@ -84,8 +85,7 @@ static void bbr_set_pacing_rate(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat); static void bbr_enter_startup(ngtcp2_cc_bbr *bbr); -static void bbr_check_startup_done(ngtcp2_cc_bbr *bbr, - const ngtcp2_cc_ack *ack); +static void bbr_check_startup_done(ngtcp2_cc_bbr *bbr); static void bbr_update_on_ack(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); @@ -148,16 +148,17 @@ static void bbr_start_probe_bw_cruise(ngtcp2_cc_bbr *bbr); static void bbr_start_probe_bw_refill(ngtcp2_cc_bbr *bbr); -static void bbr_start_probe_bw_up(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, - ngtcp2_tstamp ts); +static void bbr_start_probe_bw_up(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat); static void bbr_update_probe_bw_cycle_phase(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); -static int bbr_check_time_to_cruise(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, - ngtcp2_tstamp ts); +static int bbr_is_time_to_cruise(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +static int bbr_is_time_to_go_down(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat); static int bbr_has_elapsed_in_phase(ngtcp2_cc_bbr *bbr, ngtcp2_duration interval, ngtcp2_tstamp ts); @@ -175,9 +176,8 @@ static void bbr_probe_inflight_hi_upward(ngtcp2_cc_bbr *bbr, static void bbr_adapt_upper_bounds(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); -static int bbr_check_time_to_probe_bw(ngtcp2_cc_bbr *bbr, - ngtcp2_conn_stat *cstat, - ngtcp2_tstamp ts); +static int bbr_is_time_to_probe_bw(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); static void bbr_pick_probe_wait(ngtcp2_cc_bbr *bbr); @@ -197,6 +197,8 @@ static void bbr_handle_inflight_too_high(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, const ngtcp2_rs *rs, ngtcp2_tstamp ts); +static void bbr_note_loss(ngtcp2_cc_bbr *bbr); + static void bbr_handle_lost_packet(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts); @@ -227,15 +229,14 @@ static void bbr_handle_restart_from_idle(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts); -static uint64_t bbr_bdp_multiple(ngtcp2_cc_bbr *bbr, uint64_t bw, - uint64_t gain_h); +static uint64_t bbr_bdp_multiple(ngtcp2_cc_bbr *bbr, uint64_t gain_h); static uint64_t bbr_quantization_budget(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, uint64_t inflight); static uint64_t bbr_inflight(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, - uint64_t bw, uint64_t gain_h); + uint64_t gain_h); static void bbr_update_max_inflight(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat); @@ -247,10 +248,6 @@ static uint64_t min_pipe_cwnd(size_t max_udp_payload_size); static void bbr_advance_max_bw_filter(ngtcp2_cc_bbr *bbr); -static void bbr_modulate_cwnd_for_recovery(ngtcp2_cc_bbr *bbr, - ngtcp2_conn_stat *cstat, - const ngtcp2_cc_ack *ack); - static void bbr_save_cwnd(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat); static void bbr_restore_cwnd(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat); @@ -272,7 +269,7 @@ static int in_congestion_recovery(const ngtcp2_conn_stat *cstat, ngtcp2_tstamp sent_time); static void bbr_handle_recovery(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); + const ngtcp2_cc_ack *ack); static void bbr_on_init(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, ngtcp2_tstamp initial_ts) { @@ -289,11 +286,12 @@ static void bbr_on_init(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, bbr->idle_restart = 0; bbr->extra_acked_interval_start = initial_ts; bbr->extra_acked_delivered = 0; + bbr->full_bw_reached = 0; bbr_reset_congestion_signals(bbr); bbr_reset_lower_bounds(bbr); bbr_init_round_counting(bbr); - bbr_init_full_pipe(bbr); + bbr_reset_full_bw(bbr); bbr_init_pacing_rate(bbr, cstat); bbr_enter_startup(bbr); @@ -326,23 +324,17 @@ static void bbr_on_init(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, bbr->bw_probe_up_acks = 0; bbr->inflight_hi = UINT64_MAX; - bbr->bw_hi = UINT64_MAX; bbr->probe_rtt_expired = 0; bbr->probe_rtt_min_delay = UINT64_MAX; bbr->probe_rtt_min_stamp = initial_ts; bbr->in_loss_recovery = 0; - bbr->packet_conservation = 0; + bbr->round_count_at_recovery = UINT64_MAX; bbr->max_inflight = 0; bbr->congestion_recovery_start_ts = UINT64_MAX; - bbr->congestion_recovery_next_round_delivered = 0; - - bbr->prior_inflight_lo = 0; - bbr->prior_inflight_hi = 0; - bbr->prior_bw_lo = 0; } static void bbr_reset_congestion_signals(ngtcp2_cc_bbr *bbr) { @@ -362,48 +354,53 @@ static void bbr_init_round_counting(ngtcp2_cc_bbr *bbr) { bbr->round_count = 0; } -static void bbr_init_full_pipe(ngtcp2_cc_bbr *bbr) { - bbr->filled_pipe = 0; +static void bbr_reset_full_bw(ngtcp2_cc_bbr *bbr) { bbr->full_bw = 0; bbr->full_bw_count = 0; + bbr->full_bw_now = 0; } -static void bbr_check_startup_full_bandwidth(ngtcp2_cc_bbr *bbr) { - if (bbr->filled_pipe || !bbr->round_start || bbr->rst->rs.is_app_limited) { +static void bbr_check_full_bw_reached(ngtcp2_cc_bbr *bbr, + ngtcp2_conn_stat *cstat) { + if (bbr->full_bw_now || bbr->rst->rs.is_app_limited) { return; } - if (bbr->max_bw * 100 >= bbr->full_bw * 125) { - bbr->full_bw = bbr->max_bw; - bbr->full_bw_count = 0; - } - - ++bbr->full_bw_count; - - if (bbr->full_bw_count >= 3) { - bbr->filled_pipe = 1; + if (cstat->delivery_rate_sec * 100 >= bbr->full_bw * 125) { + bbr_reset_full_bw(bbr); + bbr->full_bw = cstat->delivery_rate_sec; - ngtcp2_log_info(bbr->cc.log, NGTCP2_LOG_EVENT_CCA, - "bbr filled pipe, full_bw=%" PRIu64, bbr->full_bw); + return; } -} -static void bbr_check_startup_high_loss(ngtcp2_cc_bbr *bbr, - const ngtcp2_cc_ack *ack) { - if (bbr->filled_pipe || !bbr->round_start || bbr->rst->rs.is_app_limited) { + if (!bbr->round_start) { return; } - if (bbr->loss_events_in_round <= 3) { + ++bbr->full_bw_count; + + bbr->full_bw_now = bbr->full_bw_count >= 3; + if (!bbr->full_bw_now) { return; } - /* loss_thresh = 2% */ - if (bbr->bytes_lost_in_round * 100 <= ack->prior_bytes_in_flight * 2) { + bbr->full_bw_reached = 1; + + ngtcp2_log_info(bbr->cc.log, NGTCP2_LOG_EVENT_CCA, + "bbr reached full bandwidth, full_bw=%" PRIu64, bbr->full_bw); +} + +static void bbr_check_startup_high_loss(ngtcp2_cc_bbr *bbr) { + if (bbr->full_bw_reached || bbr->loss_events_in_round <= 6 || + (bbr->in_loss_recovery && + bbr->round_count <= bbr->round_count_at_recovery) || + !is_inflight_too_high(&bbr->rst->rs)) { return; } - bbr->filled_pipe = 1; + bbr->full_bw_reached = 1; + bbr->inflight_hi = ngtcp2_max_uint64(bbr_bdp_multiple(bbr, bbr->cwnd_gain_h), + bbr->inflight_latest); } static void bbr_init_pacing_rate(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { @@ -423,7 +420,7 @@ static void bbr_set_pacing_rate_with_gain(ngtcp2_cc_bbr *bbr, interval = NGTCP2_SECONDS * 100 * 100 / pacing_gain_h / bbr->bw / (100 - NGTCP2_BBR_PACING_MARGIN_PERCENT); - if (bbr->filled_pipe || interval < cstat->pacing_interval) { + if (bbr->full_bw_reached || interval < cstat->pacing_interval) { cstat->pacing_interval = interval; } } @@ -437,15 +434,13 @@ static void bbr_enter_startup(ngtcp2_cc_bbr *bbr) { bbr->state = NGTCP2_BBR_STATE_STARTUP; bbr->pacing_gain_h = NGTCP2_BBR_STARTUP_PACING_GAIN_H; - bbr->cwnd_gain_h = NGTCP2_BBR_STARTUP_CWND_GAIN_H; + bbr->cwnd_gain_h = NGTCP2_BBR_DEFAULT_CWND_GAIN_H; } -static void bbr_check_startup_done(ngtcp2_cc_bbr *bbr, - const ngtcp2_cc_ack *ack) { - bbr_check_startup_full_bandwidth(bbr); - bbr_check_startup_high_loss(bbr, ack); +static void bbr_check_startup_done(ngtcp2_cc_bbr *bbr) { + bbr_check_startup_high_loss(bbr); - if (bbr->state == NGTCP2_BBR_STATE_STARTUP && bbr->filled_pipe) { + if (bbr->state == NGTCP2_BBR_STATE_STARTUP && bbr->full_bw_reached) { bbr_enter_drain(bbr); } } @@ -468,7 +463,8 @@ static void bbr_update_model_and_state(ngtcp2_cc_bbr *bbr, bbr_update_latest_delivery_signals(bbr, cstat); bbr_update_congestion_signals(bbr, cstat, ack); bbr_update_ack_aggregation(bbr, cstat, ack, ts); - bbr_check_startup_done(bbr, ack); + bbr_check_full_bw_reached(bbr, cstat); + bbr_check_startup_done(bbr); bbr_check_drain(bbr, cstat, ts); bbr_update_probe_bw_cycle_phase(bbr, cstat, ack, ts); bbr_update_min_rtt(bbr, ack, ts); @@ -493,9 +489,9 @@ static void bbr_update_on_loss(ngtcp2_cc_bbr *cc, ngtcp2_conn_stat *cstat, static void bbr_update_latest_delivery_signals(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { bbr->loss_round_start = 0; - bbr->bw_latest = ngtcp2_max(bbr->bw_latest, cstat->delivery_rate_sec); + bbr->bw_latest = ngtcp2_max_uint64(bbr->bw_latest, cstat->delivery_rate_sec); bbr->inflight_latest = - ngtcp2_max(bbr->inflight_latest, bbr->rst->rs.delivered); + ngtcp2_max_uint64(bbr->inflight_latest, bbr->rst->rs.delivered); if (bbr->rst->rs.prior_delivered >= bbr->loss_round_delivered) { bbr->loss_round_delivered = bbr->rst->delivered; @@ -519,11 +515,6 @@ static void bbr_update_congestion_signals(ngtcp2_cc_bbr *bbr, if (ack->bytes_lost) { bbr->bytes_lost_in_round += ack->bytes_lost; ++bbr->loss_events_in_round; - - if (!bbr->loss_in_round) { - bbr->loss_in_round = 1; - bbr->loss_round_delivered = bbr->rst->delivered; - } } if (!bbr->loss_round_start) { @@ -558,16 +549,15 @@ static void bbr_init_lower_bounds(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { } static void bbr_loss_lower_bounds(ngtcp2_cc_bbr *bbr) { - bbr->bw_lo = ngtcp2_max(bbr->bw_latest, bbr->bw_lo * NGTCP2_BBR_BETA_NUMER / - NGTCP2_BBR_BETA_DENOM); - bbr->inflight_lo = ngtcp2_max(bbr->inflight_latest, - bbr->inflight_lo * NGTCP2_BBR_BETA_NUMER / - NGTCP2_BBR_BETA_DENOM); + bbr->bw_lo = ngtcp2_max_uint64( + bbr->bw_latest, bbr->bw_lo * NGTCP2_BBR_BETA_NUMER / NGTCP2_BBR_BETA_DENOM); + bbr->inflight_lo = ngtcp2_max_uint64( + bbr->inflight_latest, + bbr->inflight_lo * NGTCP2_BBR_BETA_NUMER / NGTCP2_BBR_BETA_DENOM); } static void bbr_bound_bw_for_model(ngtcp2_cc_bbr *bbr) { - bbr->bw = ngtcp2_min(bbr->max_bw, bbr->bw_lo); - bbr->bw = ngtcp2_min(bbr->bw, bbr->bw_hi); + bbr->bw = ngtcp2_min_uint64(bbr->max_bw, bbr->bw_lo); } static void bbr_update_max_bw(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, @@ -633,7 +623,13 @@ static void bbr_update_ack_aggregation(ngtcp2_cc_bbr *bbr, bbr->extra_acked_delivered += ack->bytes_delivered; extra = bbr->extra_acked_delivered - expected_delivered; - extra = ngtcp2_min(extra, cstat->cwnd); + extra = ngtcp2_min_uint64(extra, cstat->cwnd); + + if (bbr->full_bw_reached) { + bbr->extra_acked_filter.window_length = NGTCP2_BBR_EXTRA_ACKED_FILTERLEN; + } else { + bbr->extra_acked_filter.window_length = 1; + } ngtcp2_window_filter_update(&bbr->extra_acked_filter, extra, bbr->round_count); @@ -645,14 +641,14 @@ static void bbr_enter_drain(ngtcp2_cc_bbr *bbr) { ngtcp2_log_info(bbr->cc.log, NGTCP2_LOG_EVENT_CCA, "bbr enter Drain"); bbr->state = NGTCP2_BBR_STATE_DRAIN; - bbr->pacing_gain_h = 100 * 100 / NGTCP2_BBR_STARTUP_CWND_GAIN_H; - bbr->cwnd_gain_h = NGTCP2_BBR_STARTUP_CWND_GAIN_H; + bbr->pacing_gain_h = NGTCP2_BBR_DRAIN_PACING_GAIN_H; + bbr->cwnd_gain_h = NGTCP2_BBR_DEFAULT_CWND_GAIN_H; } static void bbr_check_drain(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) { if (bbr->state == NGTCP2_BBR_STATE_DRAIN && - cstat->bytes_in_flight <= bbr_inflight(bbr, cstat, bbr->bw, 100)) { + cstat->bytes_in_flight <= bbr_inflight(bbr, cstat, 100)) { bbr_enter_probe_bw(bbr, ts); } } @@ -677,7 +673,7 @@ static void bbr_start_probe_bw_down(ngtcp2_cc_bbr *bbr, ngtcp2_tstamp ts) { bbr->state = NGTCP2_BBR_STATE_PROBE_BW_DOWN; bbr->pacing_gain_h = 90; - bbr->cwnd_gain_h = 200; + bbr->cwnd_gain_h = NGTCP2_BBR_DEFAULT_CWND_GAIN_H; } static void bbr_start_probe_bw_cruise(ngtcp2_cc_bbr *bbr) { @@ -686,7 +682,7 @@ static void bbr_start_probe_bw_cruise(ngtcp2_cc_bbr *bbr) { bbr->state = NGTCP2_BBR_STATE_PROBE_BW_CRUISE; bbr->pacing_gain_h = 100; - bbr->cwnd_gain_h = 200; + bbr->cwnd_gain_h = NGTCP2_BBR_DEFAULT_CWND_GAIN_H; } static void bbr_start_probe_bw_refill(ngtcp2_cc_bbr *bbr) { @@ -703,18 +699,18 @@ static void bbr_start_probe_bw_refill(ngtcp2_cc_bbr *bbr) { bbr->state = NGTCP2_BBR_STATE_PROBE_BW_REFILL; bbr->pacing_gain_h = 100; - bbr->cwnd_gain_h = 200; + bbr->cwnd_gain_h = NGTCP2_BBR_DEFAULT_CWND_GAIN_H; } -static void bbr_start_probe_bw_up(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, - ngtcp2_tstamp ts) { +static void bbr_start_probe_bw_up(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { ngtcp2_log_info(bbr->cc.log, NGTCP2_LOG_EVENT_CCA, "bbr start ProbeBW_UP"); bbr->ack_phase = NGTCP2_BBR_ACK_PHASE_ACKS_PROBE_STARTING; bbr_start_round(bbr); + bbr_reset_full_bw(bbr); - bbr->cycle_stamp = ts; + bbr->full_bw = cstat->delivery_rate_sec; bbr->state = NGTCP2_BBR_STATE_PROBE_BW_UP; bbr->pacing_gain_h = 125; bbr->cwnd_gain_h = 225; @@ -726,7 +722,7 @@ static void bbr_update_probe_bw_cycle_phase(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { - if (!bbr->filled_pipe) { + if (!bbr->full_bw_reached) { return; } @@ -738,17 +734,17 @@ static void bbr_update_probe_bw_cycle_phase(ngtcp2_cc_bbr *bbr, switch (bbr->state) { case NGTCP2_BBR_STATE_PROBE_BW_DOWN: - if (bbr_check_time_to_probe_bw(bbr, cstat, ts)) { + if (bbr_is_time_to_probe_bw(bbr, cstat, ts)) { return; } - if (bbr_check_time_to_cruise(bbr, cstat, ts)) { + if (bbr_is_time_to_cruise(bbr, cstat, ts)) { bbr_start_probe_bw_cruise(bbr); } break; case NGTCP2_BBR_STATE_PROBE_BW_CRUISE: - if (bbr_check_time_to_probe_bw(bbr, cstat, ts)) { + if (bbr_is_time_to_probe_bw(bbr, cstat, ts)) { return; } @@ -756,13 +752,12 @@ static void bbr_update_probe_bw_cycle_phase(ngtcp2_cc_bbr *bbr, case NGTCP2_BBR_STATE_PROBE_BW_REFILL: if (bbr->round_start) { bbr->bw_probe_samples = 1; - bbr_start_probe_bw_up(bbr, cstat, ts); + bbr_start_probe_bw_up(bbr, cstat); } break; case NGTCP2_BBR_STATE_PROBE_BW_UP: - if (bbr_has_elapsed_in_phase(bbr, bbr->min_rtt, ts) && - cstat->bytes_in_flight > bbr_inflight(bbr, cstat, bbr->max_bw, 125)) { + if (bbr_is_time_to_go_down(bbr, cstat)) { bbr_start_probe_bw_down(bbr, ts); } @@ -772,15 +767,26 @@ static void bbr_update_probe_bw_cycle_phase(ngtcp2_cc_bbr *bbr, } } -static int bbr_check_time_to_cruise(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, - ngtcp2_tstamp ts) { +static int bbr_is_time_to_cruise(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { (void)ts; if (cstat->bytes_in_flight > bbr_inflight_with_headroom(bbr, cstat)) { return 0; } - if (cstat->bytes_in_flight <= bbr_inflight(bbr, cstat, bbr->max_bw, 100)) { + if (cstat->bytes_in_flight <= bbr_inflight(bbr, cstat, 100)) { + return 1; + } + + return 0; +} + +static int bbr_is_time_to_go_down(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { + if (bbr->rst->is_cwnd_limited && cstat->cwnd >= bbr->inflight_hi) { + bbr_reset_full_bw(bbr); + bbr->full_bw = cstat->delivery_rate_sec; + } else if (bbr->full_bw_now) { return 1; } @@ -801,13 +807,13 @@ static uint64_t bbr_inflight_with_headroom(ngtcp2_cc_bbr *bbr, return UINT64_MAX; } - headroom = ngtcp2_max(cstat->max_tx_udp_payload_size, - bbr->inflight_hi * NGTCP2_BBR_HEADROOM_NUMER / - NGTCP2_BBR_HEADROOM_DENOM); + headroom = ngtcp2_max_uint64(cstat->max_tx_udp_payload_size, + bbr->inflight_hi * NGTCP2_BBR_HEADROOM_NUMER / + NGTCP2_BBR_HEADROOM_DENOM); mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size); if (bbr->inflight_hi > headroom) { - return ngtcp2_max(bbr->inflight_hi - headroom, mpcwnd); + return ngtcp2_max_uint64(bbr->inflight_hi - headroom, mpcwnd); } return mpcwnd; @@ -818,8 +824,8 @@ static void bbr_raise_inflight_hi_slope(ngtcp2_cc_bbr *bbr, uint64_t growth_this_round = cstat->max_tx_udp_payload_size << bbr->bw_probe_up_rounds; - bbr->bw_probe_up_rounds = ngtcp2_min(bbr->bw_probe_up_rounds + 1, 30); - bbr->probe_up_cnt = ngtcp2_max(cstat->cwnd / growth_this_round, 1) * + bbr->bw_probe_up_rounds = ngtcp2_min_size(bbr->bw_probe_up_rounds + 1, 30); + bbr->probe_up_cnt = ngtcp2_max_uint64(cstat->cwnd / growth_this_round, 1) * cstat->max_tx_udp_payload_size; } @@ -860,8 +866,7 @@ static void bbr_adapt_upper_bounds(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, } if (!bbr_check_inflight_too_high(bbr, cstat, ts)) { - /* bbr->bw_hi never be updated */ - if (bbr->inflight_hi == UINT64_MAX /* || bbr->bw_hi == UINT64_MAX */) { + if (bbr->inflight_hi == UINT64_MAX) { return; } @@ -869,19 +874,14 @@ static void bbr_adapt_upper_bounds(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, bbr->inflight_hi = bbr->rst->rs.tx_in_flight; } - if (cstat->delivery_rate_sec > bbr->bw_hi) { - bbr->bw_hi = cstat->delivery_rate_sec; - } - if (bbr->state == NGTCP2_BBR_STATE_PROBE_BW_UP) { bbr_probe_inflight_hi_upward(bbr, cstat, ack); } } } -static int bbr_check_time_to_probe_bw(ngtcp2_cc_bbr *bbr, - ngtcp2_conn_stat *cstat, - ngtcp2_tstamp ts) { +static int bbr_is_time_to_probe_bw(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { if (bbr_has_elapsed_in_phase(bbr, bbr->bw_probe_wait, ts) || bbr_is_reno_coexistence_probe_time(bbr, cstat)) { bbr_start_probe_bw_refill(bbr); @@ -902,22 +902,22 @@ static void bbr_pick_probe_wait(ngtcp2_cc_bbr *bbr) { bbr->rand(&rand, 1, &bbr->rand_ctx); bbr->bw_probe_wait = - 2 * NGTCP2_SECONDS + (ngtcp2_tstamp)(NGTCP2_SECONDS * rand / 255); + 2 * NGTCP2_SECONDS + (ngtcp2_tstamp)(NGTCP2_SECONDS * rand / 255); } static int bbr_is_reno_coexistence_probe_time(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { uint64_t reno_rounds = - bbr_target_inflight(bbr, cstat) / cstat->max_tx_udp_payload_size; + bbr_target_inflight(bbr, cstat) / cstat->max_tx_udp_payload_size; - return bbr->rounds_since_bw_probe >= ngtcp2_min(reno_rounds, 63); + return bbr->rounds_since_bw_probe >= ngtcp2_min_uint64(reno_rounds, 63); } static uint64_t bbr_target_inflight(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { - uint64_t bdp = bbr_inflight(bbr, cstat, bbr->bw, 100); + uint64_t bdp = bbr_bdp_multiple(bbr, bbr->cwnd_gain_h); - return ngtcp2_min(bdp, cstat->cwnd); + return ngtcp2_min_uint64(bdp, cstat->cwnd); } static int bbr_check_inflight_too_high(ngtcp2_cc_bbr *bbr, @@ -946,9 +946,9 @@ static void bbr_handle_inflight_too_high(ngtcp2_cc_bbr *bbr, bbr->bw_probe_samples = 0; if (!rs->is_app_limited) { - bbr->inflight_hi = ngtcp2_max( - rs->tx_in_flight, bbr_target_inflight(bbr, cstat) * - NGTCP2_BBR_BETA_NUMER / NGTCP2_BBR_BETA_DENOM); + bbr->inflight_hi = ngtcp2_max_uint64( + rs->tx_in_flight, bbr_target_inflight(bbr, cstat) * + NGTCP2_BBR_BETA_NUMER / NGTCP2_BBR_BETA_DENOM); } if (bbr->state == NGTCP2_BBR_STATE_PROBE_BW_UP) { @@ -956,10 +956,21 @@ static void bbr_handle_inflight_too_high(ngtcp2_cc_bbr *bbr, } } +static void bbr_note_loss(ngtcp2_cc_bbr *bbr) { + if (bbr->loss_in_round) { + return; + } + + bbr->loss_in_round = 1; + bbr->loss_round_delivered = bbr->rst->delivered; +} + static void bbr_handle_lost_packet(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) { ngtcp2_rs rs = {0}; + bbr_note_loss(bbr); + if (!bbr->bw_probe_samples) { return; } @@ -1007,7 +1018,7 @@ static void bbr_update_min_rtt(ngtcp2_cc_bbr *bbr, const ngtcp2_cc_ack *ack, int min_rtt_expired; bbr->probe_rtt_expired = - ts > bbr->probe_rtt_min_stamp + NGTCP2_BBR_PROBE_RTT_INTERVAL; + ts > bbr->probe_rtt_min_stamp + NGTCP2_BBR_PROBE_RTT_INTERVAL; if (ack->rtt != UINT64_MAX && (ack->rtt < bbr->probe_rtt_min_delay || bbr->probe_rtt_expired)) { @@ -1106,7 +1117,7 @@ static void bbr_mark_connection_app_limited(ngtcp2_cc_bbr *bbr, static void bbr_exit_probe_rtt(ngtcp2_cc_bbr *bbr, ngtcp2_tstamp ts) { bbr_reset_lower_bounds(bbr); - if (bbr->filled_pipe) { + if (bbr->full_bw_reached) { bbr_start_probe_bw_down(bbr, ts); bbr_start_probe_bw_cruise(bbr); } else { @@ -1131,15 +1142,14 @@ static void bbr_handle_restart_from_idle(ngtcp2_cc_bbr *bbr, } } -static uint64_t bbr_bdp_multiple(ngtcp2_cc_bbr *bbr, uint64_t bw, - uint64_t gain_h) { +static uint64_t bbr_bdp_multiple(ngtcp2_cc_bbr *bbr, uint64_t gain_h) { uint64_t bdp; if (bbr->min_rtt == UINT64_MAX) { return bbr->initial_cwnd; } - bdp = bw * bbr->min_rtt / NGTCP2_SECONDS; + bdp = ngtcp2_max_uint64(bbr->bw * bbr->min_rtt / NGTCP2_SECONDS, 1); return (uint64_t)(bdp * gain_h / 100); } @@ -1153,9 +1163,9 @@ static uint64_t bbr_quantization_budget(ngtcp2_cc_bbr *bbr, uint64_t inflight) { bbr_update_offload_budget(bbr, cstat); - inflight = ngtcp2_max(inflight, bbr->offload_budget); + inflight = ngtcp2_max_uint64(inflight, bbr->offload_budget); inflight = - ngtcp2_max(inflight, min_pipe_cwnd(cstat->max_tx_udp_payload_size)); + ngtcp2_max_uint64(inflight, min_pipe_cwnd(cstat->max_tx_udp_payload_size)); if (bbr->state == NGTCP2_BBR_STATE_PROBE_BW_UP) { inflight += 2 * cstat->max_tx_udp_payload_size; @@ -1165,8 +1175,8 @@ static uint64_t bbr_quantization_budget(ngtcp2_cc_bbr *bbr, } static uint64_t bbr_inflight(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, - uint64_t bw, uint64_t gain_h) { - uint64_t inflight = bbr_bdp_multiple(bbr, bw, gain_h); + uint64_t gain_h) { + uint64_t inflight = bbr_bdp_multiple(bbr, gain_h); return bbr_quantization_budget(bbr, cstat, inflight); } @@ -1178,8 +1188,7 @@ static void bbr_update_max_inflight(ngtcp2_cc_bbr *bbr, /* Not documented */ /* bbr_update_aggregation_budget(bbr); */ - inflight = - bbr_bdp_multiple(bbr, bbr->bw, bbr->cwnd_gain_h) + bbr->extra_acked; + inflight = bbr_bdp_multiple(bbr, bbr->cwnd_gain_h) + bbr->extra_acked; bbr->max_inflight = bbr_quantization_budget(bbr, cstat, inflight); } @@ -1192,44 +1201,26 @@ static void bbr_advance_max_bw_filter(ngtcp2_cc_bbr *bbr) { ++bbr->cycle_count; } -static void bbr_modulate_cwnd_for_recovery(ngtcp2_cc_bbr *bbr, - ngtcp2_conn_stat *cstat, - const ngtcp2_cc_ack *ack) { - if (ack->bytes_lost > 0) { - if (cstat->cwnd > ack->bytes_lost) { - cstat->cwnd -= ack->bytes_lost; - cstat->cwnd = ngtcp2_max(cstat->cwnd, 2 * cstat->max_tx_udp_payload_size); - } else { - cstat->cwnd = 2 * cstat->max_tx_udp_payload_size; - } - } - - if (bbr->packet_conservation) { - cstat->cwnd = - ngtcp2_max(cstat->cwnd, cstat->bytes_in_flight + ack->bytes_delivered); - } -} - static void bbr_save_cwnd(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { if (!bbr->in_loss_recovery && bbr->state != NGTCP2_BBR_STATE_PROBE_RTT) { bbr->prior_cwnd = cstat->cwnd; return; } - bbr->prior_cwnd = ngtcp2_max(bbr->prior_cwnd, cstat->cwnd); + bbr->prior_cwnd = ngtcp2_max_uint64(bbr->prior_cwnd, cstat->cwnd); } static void bbr_restore_cwnd(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { - cstat->cwnd = ngtcp2_max(cstat->cwnd, bbr->prior_cwnd); + cstat->cwnd = ngtcp2_max_uint64(cstat->cwnd, bbr->prior_cwnd); } static uint64_t bbr_probe_rtt_cwnd(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { uint64_t probe_rtt_cwnd = - bbr_bdp_multiple(bbr, bbr->bw, NGTCP2_BBR_PROBE_RTT_CWND_GAIN_H); + bbr_bdp_multiple(bbr, NGTCP2_BBR_PROBE_RTT_CWND_GAIN_H); uint64_t mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size); - return ngtcp2_max(probe_rtt_cwnd, mpcwnd); + return ngtcp2_max_uint64(probe_rtt_cwnd, mpcwnd); } static void bbr_bound_cwnd_for_probe_rtt(ngtcp2_cc_bbr *bbr, @@ -1239,7 +1230,7 @@ static void bbr_bound_cwnd_for_probe_rtt(ngtcp2_cc_bbr *bbr, if (bbr->state == NGTCP2_BBR_STATE_PROBE_RTT) { probe_rtt_cwnd = bbr_probe_rtt_cwnd(bbr, cstat); - cstat->cwnd = ngtcp2_min(cstat->cwnd, probe_rtt_cwnd); + cstat->cwnd = ngtcp2_min_uint64(cstat->cwnd, probe_rtt_cwnd); } } @@ -1248,21 +1239,18 @@ static void bbr_set_cwnd(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, uint64_t mpcwnd; bbr_update_max_inflight(bbr, cstat); - bbr_modulate_cwnd_for_recovery(bbr, cstat, ack); - - if (!bbr->packet_conservation) { - if (bbr->filled_pipe) { - cstat->cwnd += ack->bytes_delivered; - cstat->cwnd = ngtcp2_min(cstat->cwnd, bbr->max_inflight); - } else if (cstat->cwnd < bbr->max_inflight || - bbr->rst->delivered < bbr->initial_cwnd) { - cstat->cwnd += ack->bytes_delivered; - } - mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size); - cstat->cwnd = ngtcp2_max(cstat->cwnd, mpcwnd); + if (bbr->full_bw_reached) { + cstat->cwnd += ack->bytes_delivered; + cstat->cwnd = ngtcp2_min_uint64(cstat->cwnd, bbr->max_inflight); + } else if (cstat->cwnd < bbr->max_inflight || + bbr->rst->delivered < bbr->initial_cwnd) { + cstat->cwnd += ack->bytes_delivered; } + mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size); + cstat->cwnd = ngtcp2_max_uint64(cstat->cwnd, mpcwnd); + bbr_bound_cwnd_for_probe_rtt(bbr, cstat); bbr_bound_cwnd_for_model(bbr, cstat); } @@ -1280,30 +1268,23 @@ static void bbr_bound_cwnd_for_model(ngtcp2_cc_bbr *bbr, cap = bbr_inflight_with_headroom(bbr, cstat); } - cap = ngtcp2_min(cap, bbr->inflight_lo); - cap = ngtcp2_max(cap, mpcwnd); + cap = ngtcp2_min_uint64(cap, bbr->inflight_lo); + cap = ngtcp2_max_uint64(cap, mpcwnd); - cstat->cwnd = ngtcp2_min(cstat->cwnd, cap); + cstat->cwnd = ngtcp2_min_uint64(cstat->cwnd, cap); } static void bbr_set_send_quantum(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat) { - size_t floor, send_quantum; + size_t send_quantum = 64 * 1024; (void)bbr; - if (cstat->pacing_interval > (NGTCP2_SECONDS * 8 * 10 / 12) >> 20) { - floor = cstat->max_tx_udp_payload_size; - } else { - floor = 2 * cstat->max_tx_udp_payload_size; - } - if (cstat->pacing_interval) { - send_quantum = (size_t)(NGTCP2_MILLISECONDS / cstat->pacing_interval); - send_quantum = ngtcp2_min(send_quantum, 64 * 1024); - } else { - send_quantum = 64 * 1024; + send_quantum = ngtcp2_min_size( + send_quantum, (size_t)(NGTCP2_MILLISECONDS / cstat->pacing_interval)); } - cstat->send_quantum = ngtcp2_max(send_quantum, floor); + cstat->send_quantum = + ngtcp2_max_size(send_quantum, 2 * cstat->max_tx_udp_payload_size); } static int in_congestion_recovery(const ngtcp2_conn_stat *cstat, @@ -1313,16 +1294,12 @@ static int in_congestion_recovery(const ngtcp2_conn_stat *cstat, } static void bbr_handle_recovery(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { + const ngtcp2_cc_ack *ack) { if (bbr->in_loss_recovery) { - if (ts - cstat->congestion_recovery_start_ts >= cstat->smoothed_rtt) { - bbr->packet_conservation = 0; - } - if (ack->largest_pkt_sent_ts != UINT64_MAX && !in_congestion_recovery(cstat, ack->largest_pkt_sent_ts)) { bbr->in_loss_recovery = 0; - bbr->packet_conservation = 0; + bbr->round_count_at_recovery = UINT64_MAX; bbr_restore_cwnd(bbr, cstat); } @@ -1331,18 +1308,15 @@ static void bbr_handle_recovery(ngtcp2_cc_bbr *bbr, ngtcp2_conn_stat *cstat, if (bbr->congestion_recovery_start_ts != UINT64_MAX) { bbr->in_loss_recovery = 1; + bbr->round_count_at_recovery = + bbr->round_start ? bbr->round_count : bbr->round_count + 1; bbr_save_cwnd(bbr, cstat); cstat->cwnd = - cstat->bytes_in_flight + - ngtcp2_max(ack->bytes_delivered, cstat->max_tx_udp_payload_size); + cstat->bytes_in_flight + + ngtcp2_max_uint64(ack->bytes_delivered, cstat->max_tx_udp_payload_size); cstat->congestion_recovery_start_ts = bbr->congestion_recovery_start_ts; bbr->congestion_recovery_start_ts = UINT64_MAX; - bbr->packet_conservation = 1; - bbr->congestion_recovery_next_round_delivered = bbr->rst->delivered; - bbr->prior_inflight_hi = bbr->inflight_hi; - bbr->prior_inflight_lo = bbr->inflight_lo; - bbr->prior_bw_lo = bbr->bw_lo; } } @@ -1350,12 +1324,18 @@ static void bbr_cc_on_pkt_lost(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) { ngtcp2_cc_bbr *bbr = ngtcp2_struct_of(cc, ngtcp2_cc_bbr, cc); + if (bbr->state == NGTCP2_BBR_STATE_STARTUP) { + return; + } + bbr_update_on_loss(bbr, cstat, pkt, ts); } static void bbr_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - ngtcp2_tstamp sent_ts, ngtcp2_tstamp ts) { + ngtcp2_tstamp sent_ts, uint64_t bytes_lost, + ngtcp2_tstamp ts) { ngtcp2_cc_bbr *bbr = ngtcp2_struct_of(cc, ngtcp2_cc_bbr, cc); + (void)bytes_lost; if (bbr->in_loss_recovery || bbr->congestion_recovery_start_ts != UINT64_MAX || @@ -1377,13 +1357,8 @@ static void bbr_cc_on_spurious_congestion(ngtcp2_cc *cc, if (bbr->in_loss_recovery) { bbr->in_loss_recovery = 0; - bbr->packet_conservation = 0; + bbr->round_count_at_recovery = UINT64_MAX; bbr_restore_cwnd(bbr, cstat); - bbr->full_bw_count = 0; - bbr->loss_in_round = 0; - bbr->inflight_lo = ngtcp2_max(bbr->inflight_lo, bbr->prior_inflight_lo); - bbr->inflight_hi = ngtcp2_max(bbr->inflight_hi, bbr->prior_inflight_hi); - bbr->bw_lo = ngtcp2_max(bbr->bw_lo, bbr->prior_bw_lo); } } @@ -1396,19 +1371,19 @@ static void bbr_cc_on_persistent_congestion(ngtcp2_cc *cc, cstat->congestion_recovery_start_ts = UINT64_MAX; bbr->congestion_recovery_start_ts = UINT64_MAX; bbr->in_loss_recovery = 0; - bbr->packet_conservation = 0; + bbr->round_count_at_recovery = UINT64_MAX; bbr_save_cwnd(bbr, cstat); cstat->cwnd = cstat->bytes_in_flight + cstat->max_tx_udp_payload_size; - cstat->cwnd = - ngtcp2_max(cstat->cwnd, min_pipe_cwnd(cstat->max_tx_udp_payload_size)); + cstat->cwnd = ngtcp2_max_uint64( + cstat->cwnd, min_pipe_cwnd(cstat->max_tx_udp_payload_size)); } static void bbr_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { ngtcp2_cc_bbr *bbr = ngtcp2_struct_of(cc, ngtcp2_cc_bbr, cc); - bbr_handle_recovery(bbr, cstat, ack, ts); + bbr_handle_recovery(bbr, cstat, ack); bbr_update_on_ack(bbr, cstat, ack, ts); } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_bbr.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_bbr.h index 0017be35010e66..f266ec5d71e4e4 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_bbr.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_bbr.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -95,9 +95,10 @@ typedef struct ngtcp2_cc_bbr { uint64_t round_count; /* Full pipe */ - int filled_pipe; uint64_t full_bw; size_t full_bw_count; + int full_bw_reached; + int full_bw_now; /* Pacing rate */ uint64_t pacing_gain_h; @@ -123,19 +124,13 @@ typedef struct ngtcp2_cc_bbr { size_t bw_probe_up_rounds; uint64_t bw_probe_up_acks; uint64_t inflight_hi; - uint64_t bw_hi; int probe_rtt_expired; ngtcp2_duration probe_rtt_min_delay; ngtcp2_tstamp probe_rtt_min_stamp; int in_loss_recovery; - int packet_conservation; + uint64_t round_count_at_recovery; uint64_t max_inflight; ngtcp2_tstamp congestion_recovery_start_ts; - uint64_t congestion_recovery_next_round_delivered; - - uint64_t prior_inflight_lo; - uint64_t prior_inflight_hi; - uint64_t prior_bw_lo; } ngtcp2_cc_bbr; void ngtcp2_cc_bbr_init(ngtcp2_cc_bbr *bbr, ngtcp2_log *log, @@ -143,4 +138,4 @@ void ngtcp2_cc_bbr_init(ngtcp2_cc_bbr *bbr, ngtcp2_log *log, ngtcp2_tstamp initial_ts, ngtcp2_rand rand, const ngtcp2_rand_ctx *rand_ctx); -#endif /* NGTCP2_BBR_H */ +#endif /* !defined(NGTCP2_BBR_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_buf.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_buf.h index 85b5f4ddf0464a..e87adb119916ca 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_buf.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_buf.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -105,4 +105,4 @@ int ngtcp2_buf_chain_new(ngtcp2_buf_chain **pbufchain, size_t len, */ void ngtcp2_buf_chain_del(ngtcp2_buf_chain *bufchain, const ngtcp2_mem *mem); -#endif /* NGTCP2_BUF_H */ +#endif /* !defined(NGTCP2_BUF_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_cc.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_cc.c index 9ad37fbdb6395a..1ff59f315c5b66 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_cc.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_cc.c @@ -32,16 +32,13 @@ #include "ngtcp2_mem.h" #include "ngtcp2_rcvry.h" #include "ngtcp2_conn_stat.h" +#include "ngtcp2_rst.h" #include "ngtcp2_unreachable.h" -/* NGTCP2_CC_DELIVERY_RATE_SEC_FILTERLEN is the window length of - delivery rate filter driven by ACK clocking. */ -#define NGTCP2_CC_DELIVERY_RATE_SEC_FILTERLEN 10 - uint64_t ngtcp2_cc_compute_initcwnd(size_t max_udp_payload_size) { uint64_t n = 2 * max_udp_payload_size; - n = ngtcp2_max(n, 14720); - return ngtcp2_min(10 * max_udp_payload_size, n); + n = ngtcp2_max_uint64(n, 14720); + return ngtcp2_min_uint64(10 * max_udp_payload_size, n); } ngtcp2_cc_pkt *ngtcp2_cc_pkt_init(ngtcp2_cc_pkt *pkt, int64_t pkt_num, @@ -59,13 +56,7 @@ ngtcp2_cc_pkt *ngtcp2_cc_pkt_init(ngtcp2_cc_pkt *pkt, int64_t pkt_num, return pkt; } -static void reno_cc_reset(ngtcp2_cc_reno *reno) { - ngtcp2_window_filter_init(&reno->delivery_rate_sec_filter, - NGTCP2_CC_DELIVERY_RATE_SEC_FILTERLEN); - reno->ack_count = 0; - reno->target_cwnd = 0; - reno->pending_add = 0; -} +static void reno_cc_reset(ngtcp2_cc_reno *reno) { reno->pending_add = 0; } void ngtcp2_cc_reno_init(ngtcp2_cc_reno *reno, ngtcp2_log *log) { memset(reno, 0, sizeof(*reno)); @@ -74,8 +65,7 @@ void ngtcp2_cc_reno_init(ngtcp2_cc_reno *reno, ngtcp2_log *log) { reno->cc.on_pkt_acked = ngtcp2_cc_reno_cc_on_pkt_acked; reno->cc.congestion_event = ngtcp2_cc_reno_cc_congestion_event; reno->cc.on_persistent_congestion = - ngtcp2_cc_reno_cc_on_persistent_congestion; - reno->cc.on_ack_recv = ngtcp2_cc_reno_cc_on_ack_recv; + ngtcp2_cc_reno_cc_on_persistent_congestion; reno->cc.reset = ngtcp2_cc_reno_cc_reset; reno_cc_reset(reno); @@ -94,11 +84,7 @@ void ngtcp2_cc_reno_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, uint64_t m; (void)ts; - if (in_congestion_recovery(cstat, pkt->sent_ts)) { - return; - } - - if (reno->target_cwnd && reno->target_cwnd < cstat->cwnd) { + if (in_congestion_recovery(cstat, pkt->sent_ts) || pkt->is_app_limited) { return; } @@ -118,9 +104,10 @@ void ngtcp2_cc_reno_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, void ngtcp2_cc_reno_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp sent_ts, - ngtcp2_tstamp ts) { + uint64_t bytes_lost, ngtcp2_tstamp ts) { ngtcp2_cc_reno *reno = ngtcp2_struct_of(cc, ngtcp2_cc_reno, cc); uint64_t min_cwnd; + (void)bytes_lost; if (in_congestion_recovery(cstat, sent_ts)) { return; @@ -129,7 +116,7 @@ void ngtcp2_cc_reno_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, cstat->congestion_recovery_start_ts = ts; cstat->cwnd >>= NGTCP2_LOSS_REDUCTION_FACTOR_BITS; min_cwnd = 2 * cstat->max_tx_udp_payload_size; - cstat->cwnd = ngtcp2_max(cstat->cwnd, min_cwnd); + cstat->cwnd = ngtcp2_max_uint64(cstat->cwnd, min_cwnd); cstat->ssthresh = cstat->cwnd; reno->pending_add = 0; @@ -149,35 +136,6 @@ void ngtcp2_cc_reno_cc_on_persistent_congestion(ngtcp2_cc *cc, cstat->congestion_recovery_start_ts = UINT64_MAX; } -void ngtcp2_cc_reno_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { - ngtcp2_cc_reno *reno = ngtcp2_struct_of(cc, ngtcp2_cc_reno, cc); - uint64_t target_cwnd, initcwnd; - uint64_t max_delivery_rate_sec; - (void)ack; - (void)ts; - - ++reno->ack_count; - - ngtcp2_window_filter_update(&reno->delivery_rate_sec_filter, - cstat->delivery_rate_sec, reno->ack_count); - - max_delivery_rate_sec = - ngtcp2_window_filter_get_best(&reno->delivery_rate_sec_filter); - - if (cstat->min_rtt != UINT64_MAX && max_delivery_rate_sec) { - target_cwnd = max_delivery_rate_sec * cstat->smoothed_rtt / NGTCP2_SECONDS; - initcwnd = ngtcp2_cc_compute_initcwnd(cstat->max_tx_udp_payload_size); - reno->target_cwnd = ngtcp2_max(initcwnd, target_cwnd) * 289 / 100; - - ngtcp2_log_info(reno->cc.log, NGTCP2_LOG_EVENT_CCA, - "target_cwnd=%" PRIu64 " max_delivery_rate_sec=%" PRIu64 - " smoothed_rtt=%" PRIu64, - reno->target_cwnd, max_delivery_rate_sec, - cstat->smoothed_rtt); - } -} - void ngtcp2_cc_reno_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) { ngtcp2_cc_reno *reno = ngtcp2_struct_of(cc, ngtcp2_cc_reno, cc); @@ -187,45 +145,49 @@ void ngtcp2_cc_reno_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, reno_cc_reset(reno); } +static void cubic_vars_reset(ngtcp2_cubic_vars *v) { + v->cwnd_prior = 0; + v->w_max = 0; + v->k = 0; + v->epoch_start = UINT64_MAX; + v->w_est = 0; + + v->state = NGTCP2_CUBIC_STATE_INITIAL; + v->app_limited_start_ts = UINT64_MAX; + v->app_limited_duration = 0; + v->pending_bytes_delivered = 0; + v->pending_est_bytes_delivered = 0; +} + static void cubic_cc_reset(ngtcp2_cc_cubic *cubic) { - ngtcp2_window_filter_init(&cubic->delivery_rate_sec_filter, - NGTCP2_CC_DELIVERY_RATE_SEC_FILTERLEN); - cubic->ack_count = 0; - cubic->target_cwnd = 0; - cubic->w_last_max = 0; - cubic->w_tcp = 0; - cubic->origin_point = 0; - cubic->epoch_start = UINT64_MAX; - cubic->k = 0; - - cubic->prior.cwnd = 0; - cubic->prior.ssthresh = 0; - cubic->prior.w_last_max = 0; - cubic->prior.w_tcp = 0; - cubic->prior.origin_point = 0; - cubic->prior.epoch_start = UINT64_MAX; - cubic->prior.k = 0; - - cubic->rtt_sample_count = 0; - cubic->current_round_min_rtt = UINT64_MAX; - cubic->last_round_min_rtt = UINT64_MAX; - cubic->window_end = -1; + cubic_vars_reset(&cubic->current); + cubic_vars_reset(&cubic->undo.v); + cubic->undo.cwnd = 0; + cubic->undo.ssthresh = 0; + + cubic->hs.current_round_min_rtt = UINT64_MAX; + cubic->hs.last_round_min_rtt = UINT64_MAX; + cubic->hs.curr_rtt = UINT64_MAX; + cubic->hs.rtt_sample_count = 0; + cubic->hs.css_baseline_min_rtt = UINT64_MAX; + cubic->hs.css_round = 0; + + cubic->next_round_delivered = 0; } -void ngtcp2_cc_cubic_init(ngtcp2_cc_cubic *cubic, ngtcp2_log *log) { +void ngtcp2_cc_cubic_init(ngtcp2_cc_cubic *cubic, ngtcp2_log *log, + ngtcp2_rst *rst) { memset(cubic, 0, sizeof(*cubic)); cubic->cc.log = log; - cubic->cc.on_pkt_acked = ngtcp2_cc_cubic_cc_on_pkt_acked; + cubic->cc.on_ack_recv = ngtcp2_cc_cubic_cc_on_ack_recv; cubic->cc.congestion_event = ngtcp2_cc_cubic_cc_congestion_event; cubic->cc.on_spurious_congestion = ngtcp2_cc_cubic_cc_on_spurious_congestion; cubic->cc.on_persistent_congestion = - ngtcp2_cc_cubic_cc_on_persistent_congestion; - cubic->cc.on_ack_recv = ngtcp2_cc_cubic_cc_on_ack_recv; - cubic->cc.on_pkt_sent = ngtcp2_cc_cubic_cc_on_pkt_sent; - cubic->cc.new_rtt_sample = ngtcp2_cc_cubic_cc_new_rtt_sample; + ngtcp2_cc_cubic_cc_on_persistent_congestion; cubic->cc.reset = ngtcp2_cc_cubic_cc_reset; - cubic->cc.event = ngtcp2_cc_cubic_cc_event; + + cubic->rst = rst; cubic_cc_reset(cubic); } @@ -254,191 +216,250 @@ uint64_t ngtcp2_cbrt(uint64_t n) { return y; } -/* HyStart++ constants */ -#define NGTCP2_HS_MIN_SSTHRESH 16 +/* RFC 9406 HyStart++ constants */ +#define NGTCP2_HS_MIN_RTT_THRESH (4 * NGTCP2_MILLISECONDS) +#define NGTCP2_HS_MAX_RTT_THRESH (16 * NGTCP2_MILLISECONDS) +#define NGTCP2_HS_MIN_RTT_DIVISOR 8 #define NGTCP2_HS_N_RTT_SAMPLE 8 -#define NGTCP2_HS_MIN_ETA (4 * NGTCP2_MILLISECONDS) -#define NGTCP2_HS_MAX_ETA (16 * NGTCP2_MILLISECONDS) +#define NGTCP2_HS_CSS_GROWTH_DIVISOR 4 +#define NGTCP2_HS_CSS_ROUNDS 5 + +static uint64_t cubic_cc_compute_w_cubic(ngtcp2_cc_cubic *cubic, + const ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_duration t = ts - cubic->current.epoch_start; + uint64_t delta; + uint64_t tx = (t << 10) / NGTCP2_SECONDS; + uint64_t kx = (cubic->current.k << 10) / NGTCP2_SECONDS; + uint64_t time_delta; + + if (tx < kx) { + return UINT64_MAX; + } + + time_delta = tx - kx; + + delta = cstat->max_tx_udp_payload_size * + ((((time_delta * time_delta) >> 10) * time_delta) >> 10) * 4 / 10; -void ngtcp2_cc_cubic_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_pkt *pkt, - ngtcp2_tstamp ts) { + return cubic->current.w_max + (delta >> 10); +} + +void ngtcp2_cc_cubic_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts) { ngtcp2_cc_cubic *cubic = ngtcp2_struct_of(cc, ngtcp2_cc_cubic, cc); - ngtcp2_duration t, eta; - uint64_t target, cwnd_thres; - uint64_t tx, kx, time_delta, delta; - uint64_t add, tcp_add; - uint64_t m; + uint64_t w_cubic, w_cubic_next, target, m; + ngtcp2_duration rtt_thresh; + int round_start; - if (pkt->pktns_id == NGTCP2_PKTNS_ID_APPLICATION && cubic->window_end != -1 && - cubic->window_end <= pkt->pkt_num) { - cubic->window_end = -1; + if (in_congestion_recovery(cstat, ack->largest_pkt_sent_ts)) { + return; } - if (in_congestion_recovery(cstat, pkt->sent_ts)) { + if (cubic->current.state == NGTCP2_CUBIC_STATE_CONGESTION_AVOIDANCE) { + if (cubic->rst->rs.is_app_limited && !cubic->rst->is_cwnd_limited) { + if (cubic->current.app_limited_start_ts == UINT64_MAX) { + cubic->current.app_limited_start_ts = ts; + } + + return; + } + + if (cubic->current.app_limited_start_ts != UINT64_MAX) { + cubic->current.app_limited_duration += + ts - cubic->current.app_limited_start_ts; + cubic->current.app_limited_start_ts = UINT64_MAX; + } + } else if (cubic->rst->rs.is_app_limited && !cubic->rst->is_cwnd_limited) { return; } + round_start = ack->pkt_delivered >= cubic->next_round_delivered; + if (round_start) { + cubic->next_round_delivered = cubic->rst->delivered; + + cubic->rst->is_cwnd_limited = 0; + } + if (cstat->cwnd < cstat->ssthresh) { /* slow-start */ - if (cubic->target_cwnd == 0 || cubic->target_cwnd > cstat->cwnd) { - cstat->cwnd += pkt->pktlen; + if (cubic->hs.css_round) { + cstat->cwnd += ack->bytes_delivered / NGTCP2_HS_CSS_GROWTH_DIVISOR; + } else { + cstat->cwnd += ack->bytes_delivered; } ngtcp2_log_info(cubic->cc.log, NGTCP2_LOG_EVENT_CCA, - "pkn=%" PRId64 " acked, slow start cwnd=%" PRIu64, - pkt->pkt_num, cstat->cwnd); + "%" PRIu64 " bytes acked, slow start cwnd=%" PRIu64, + ack->bytes_delivered, cstat->cwnd); + + if (round_start) { + cubic->hs.last_round_min_rtt = cubic->hs.current_round_min_rtt; + cubic->hs.current_round_min_rtt = UINT64_MAX; + cubic->hs.rtt_sample_count = 0; - if (cubic->last_round_min_rtt != UINT64_MAX && - cubic->current_round_min_rtt != UINT64_MAX && - cstat->cwnd >= - NGTCP2_HS_MIN_SSTHRESH * cstat->max_tx_udp_payload_size && - cubic->rtt_sample_count >= NGTCP2_HS_N_RTT_SAMPLE) { - eta = cubic->last_round_min_rtt / 8; - - if (eta < NGTCP2_HS_MIN_ETA) { - eta = NGTCP2_HS_MIN_ETA; - } else if (eta > NGTCP2_HS_MAX_ETA) { - eta = NGTCP2_HS_MAX_ETA; + if (cubic->hs.css_round) { + ++cubic->hs.css_round; } + } + + cubic->hs.current_round_min_rtt = + ngtcp2_min_uint64(cubic->hs.current_round_min_rtt, ack->rtt); + ++cubic->hs.rtt_sample_count; - if (cubic->current_round_min_rtt >= cubic->last_round_min_rtt + eta) { + if (cubic->hs.css_round) { + if (cubic->hs.current_round_min_rtt < cubic->hs.css_baseline_min_rtt) { + cubic->hs.css_baseline_min_rtt = UINT64_MAX; + cubic->hs.css_round = 0; + return; + } + + if (cubic->hs.css_round >= NGTCP2_HS_CSS_ROUNDS) { ngtcp2_log_info(cubic->cc.log, NGTCP2_LOG_EVENT_CCA, "HyStart++ exit slow start"); - cubic->w_last_max = cstat->cwnd; cstat->ssthresh = cstat->cwnd; } - } - - return; - } - /* congestion avoidance */ - - if (cubic->epoch_start == UINT64_MAX) { - cubic->epoch_start = ts; - if (cstat->cwnd < cubic->w_last_max) { - cubic->k = ngtcp2_cbrt((cubic->w_last_max - cstat->cwnd) * 10 / 4 / - cstat->max_tx_udp_payload_size); - cubic->origin_point = cubic->w_last_max; - } else { - cubic->k = 0; - cubic->origin_point = cstat->cwnd; + return; } - cubic->w_tcp = cstat->cwnd; - - ngtcp2_log_info(cubic->cc.log, NGTCP2_LOG_EVENT_CCA, - "cubic-ca epoch_start=%" PRIu64 " k=%" PRIu64 - " origin_point=%" PRIu64, - cubic->epoch_start, cubic->k, cubic->origin_point); + if (cubic->hs.rtt_sample_count >= NGTCP2_HS_N_RTT_SAMPLE && + cubic->hs.current_round_min_rtt != UINT64_MAX && + cubic->hs.last_round_min_rtt != UINT64_MAX) { + rtt_thresh = + ngtcp2_max_uint64(NGTCP2_HS_MIN_RTT_THRESH, + ngtcp2_min_uint64(cubic->hs.last_round_min_rtt / + NGTCP2_HS_MIN_RTT_DIVISOR, + NGTCP2_HS_MAX_RTT_THRESH)); + + if (cubic->hs.current_round_min_rtt >= + cubic->hs.last_round_min_rtt + rtt_thresh) { + cubic->hs.css_baseline_min_rtt = cubic->hs.current_round_min_rtt; + cubic->hs.css_round = 1; + } + } - cubic->pending_add = 0; - cubic->pending_w_add = 0; + return; } - t = ts - cubic->epoch_start; - - tx = (t << 10) / NGTCP2_SECONDS; - kx = (cubic->k << 10); + /* congestion avoidance */ - if (tx > kx) { - time_delta = tx - kx; - } else { - time_delta = kx - tx; + switch (cubic->current.state) { + case NGTCP2_CUBIC_STATE_INITIAL: + m = cstat->max_tx_udp_payload_size * ack->bytes_delivered + + cubic->current.pending_bytes_delivered; + cstat->cwnd += m / cstat->cwnd; + cubic->current.pending_bytes_delivered = m % cstat->cwnd; + return; + case NGTCP2_CUBIC_STATE_RECOVERY: + cubic->current.state = NGTCP2_CUBIC_STATE_CONGESTION_AVOIDANCE; + cubic->current.epoch_start = ts; + break; + default: + break; } - delta = cstat->max_tx_udp_payload_size * - ((((time_delta * time_delta) >> 10) * time_delta) >> 10) * 4 / 10; - delta >>= 10; - - if (tx > kx) { - target = cubic->origin_point + delta; - } else { - target = cubic->origin_point - delta; - } + w_cubic = cubic_cc_compute_w_cubic(cubic, cstat, + ts - cubic->current.app_limited_duration); + w_cubic_next = cubic_cc_compute_w_cubic( + cubic, cstat, + ts - cubic->current.app_limited_duration + cstat->smoothed_rtt); - cwnd_thres = - (target * (((t + cstat->smoothed_rtt) << 10) / NGTCP2_SECONDS)) >> 10; - if (cwnd_thres < cstat->cwnd) { + if (w_cubic_next == UINT64_MAX || w_cubic_next < cstat->cwnd) { target = cstat->cwnd; - } else if (2 * cwnd_thres > 3 * cstat->cwnd) { + } else if (2 * w_cubic_next > 3 * cstat->cwnd) { target = cstat->cwnd * 3 / 2; } else { - target = cwnd_thres; + target = w_cubic_next; } - if (target > cstat->cwnd) { - m = cubic->pending_add + - cstat->max_tx_udp_payload_size * (target - cstat->cwnd); - add = m / cstat->cwnd; - cubic->pending_add = m % cstat->cwnd; - } else { - m = cubic->pending_add + cstat->max_tx_udp_payload_size; - add = m / (100 * cstat->cwnd); - cubic->pending_add = m % (100 * cstat->cwnd); - } - - m = cubic->pending_w_add + cstat->max_tx_udp_payload_size * pkt->pktlen; - - cubic->w_tcp += m / cstat->cwnd; - cubic->pending_w_add = m % cstat->cwnd; + m = ack->bytes_delivered * cstat->max_tx_udp_payload_size + + cubic->current.pending_est_bytes_delivered; + cubic->current.pending_est_bytes_delivered = m % cstat->cwnd; - if (cubic->w_tcp > cstat->cwnd) { - tcp_add = cstat->max_tx_udp_payload_size * (cubic->w_tcp - cstat->cwnd) / - cstat->cwnd; - if (tcp_add > add) { - add = tcp_add; - } + if (cubic->current.w_est < cubic->current.cwnd_prior) { + cubic->current.w_est += m * 9 / 17 / cstat->cwnd; + } else { + cubic->current.w_est += m / cstat->cwnd; } - if (cubic->target_cwnd == 0 || cubic->target_cwnd > cstat->cwnd) { - cstat->cwnd += add; + if (w_cubic == UINT64_MAX || cubic->current.w_est > w_cubic) { + cstat->cwnd = cubic->current.w_est; + } else { + m = (target - cstat->cwnd) * cstat->max_tx_udp_payload_size + + cubic->current.pending_bytes_delivered; + cstat->cwnd += m / cstat->cwnd; + cubic->current.pending_bytes_delivered = m % cstat->cwnd; } ngtcp2_log_info(cubic->cc.log, NGTCP2_LOG_EVENT_CCA, - "pkn=%" PRId64 " acked, cubic-ca cwnd=%" PRIu64 " t=%" PRIu64 - " k=%" PRIi64 " time_delta=%" PRIu64 " delta=%" PRIu64 - " target=%" PRIu64 " w_tcp=%" PRIu64, - pkt->pkt_num, cstat->cwnd, t, cubic->k, time_delta >> 4, - delta, target, cubic->w_tcp); + "%" PRIu64 " bytes acked, cubic-ca cwnd=%" PRIu64 + " k=%" PRIi64 " target=%" PRIu64 " w_est=%" PRIu64, + ack->bytes_delivered, cstat->cwnd, cubic->current.k, target, + cubic->current.w_est); } void ngtcp2_cc_cubic_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp sent_ts, + uint64_t bytes_lost, ngtcp2_tstamp ts) { ngtcp2_cc_cubic *cubic = ngtcp2_struct_of(cc, ngtcp2_cc_cubic, cc); - uint64_t min_cwnd; + uint64_t flight_size; if (in_congestion_recovery(cstat, sent_ts)) { return; } - if (cubic->prior.cwnd < cstat->cwnd) { - cubic->prior.cwnd = cstat->cwnd; - cubic->prior.ssthresh = cstat->ssthresh; - cubic->prior.w_last_max = cubic->w_last_max; - cubic->prior.w_tcp = cubic->w_tcp; - cubic->prior.origin_point = cubic->origin_point; - cubic->prior.epoch_start = cubic->epoch_start; - cubic->prior.k = cubic->k; + if (cubic->undo.cwnd < cstat->cwnd) { + cubic->undo.v = cubic->current; + cubic->undo.cwnd = cstat->cwnd; + cubic->undo.ssthresh = cstat->ssthresh; } cstat->congestion_recovery_start_ts = ts; - cubic->epoch_start = UINT64_MAX; - if (cstat->cwnd < cubic->w_last_max) { - cubic->w_last_max = cstat->cwnd * 17 / 10 / 2; + cubic->current.state = NGTCP2_CUBIC_STATE_RECOVERY; + cubic->current.epoch_start = UINT64_MAX; + cubic->current.app_limited_start_ts = UINT64_MAX; + cubic->current.app_limited_duration = 0; + cubic->current.pending_bytes_delivered = 0; + cubic->current.pending_est_bytes_delivered = 0; + + if (cstat->cwnd < cubic->current.w_max) { + cubic->current.w_max = cstat->cwnd * 17 / 20; } else { - cubic->w_last_max = cstat->cwnd; + cubic->current.w_max = cstat->cwnd; } - min_cwnd = 2 * cstat->max_tx_udp_payload_size; cstat->ssthresh = cstat->cwnd * 7 / 10; - cstat->ssthresh = ngtcp2_max(cstat->ssthresh, min_cwnd); + + if (cubic->rst->rs.delivered * 2 < cstat->cwnd) { + flight_size = cstat->bytes_in_flight + bytes_lost; + cstat->ssthresh = ngtcp2_min_uint64( + cstat->ssthresh, + ngtcp2_max_uint64(cubic->rst->rs.delivered, flight_size) * 7 / 10); + } + + cstat->ssthresh = + ngtcp2_max_uint64(cstat->ssthresh, 2 * cstat->max_tx_udp_payload_size); + + cubic->current.cwnd_prior = cstat->cwnd; cstat->cwnd = cstat->ssthresh; + cubic->current.w_est = cstat->cwnd; + + if (cstat->cwnd < cubic->current.w_max) { + cubic->current.k = + ngtcp2_cbrt(((cubic->current.w_max - cstat->cwnd) << 10) * 10 / 4 / + cstat->max_tx_udp_payload_size) * + NGTCP2_SECONDS; + cubic->current.k >>= 10; + } else { + cubic->current.k = 0; + } + ngtcp2_log_info(cubic->cc.log, NGTCP2_LOG_EVENT_CCA, "reduce cwnd because of packet loss cwnd=%" PRIu64, cstat->cwnd); @@ -450,101 +471,34 @@ void ngtcp2_cc_cubic_cc_on_spurious_congestion(ngtcp2_cc *cc, ngtcp2_cc_cubic *cubic = ngtcp2_struct_of(cc, ngtcp2_cc_cubic, cc); (void)ts; - if (cstat->cwnd >= cubic->prior.cwnd) { - return; - } - - cstat->congestion_recovery_start_ts = UINT64_MAX; - - cstat->cwnd = cubic->prior.cwnd; - cstat->ssthresh = cubic->prior.ssthresh; - cubic->w_last_max = cubic->prior.w_last_max; - cubic->w_tcp = cubic->prior.w_tcp; - cubic->origin_point = cubic->prior.origin_point; - cubic->epoch_start = cubic->prior.epoch_start; - cubic->k = cubic->prior.k; - - cubic->prior.cwnd = 0; - cubic->prior.ssthresh = 0; - cubic->prior.w_last_max = 0; - cubic->prior.w_tcp = 0; - cubic->prior.origin_point = 0; - cubic->prior.epoch_start = UINT64_MAX; - cubic->prior.k = 0; - - ngtcp2_log_info(cubic->cc.log, NGTCP2_LOG_EVENT_CCA, - "spurious congestion is detected and congestion state is " - "restored cwnd=%" PRIu64, - cstat->cwnd); -} - -void ngtcp2_cc_cubic_cc_on_persistent_congestion(ngtcp2_cc *cc, - ngtcp2_conn_stat *cstat, - ngtcp2_tstamp ts) { - (void)cc; - (void)ts; - - cstat->cwnd = 2 * cstat->max_tx_udp_payload_size; cstat->congestion_recovery_start_ts = UINT64_MAX; -} - -void ngtcp2_cc_cubic_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_ack *ack, - ngtcp2_tstamp ts) { - ngtcp2_cc_cubic *cubic = ngtcp2_struct_of(cc, ngtcp2_cc_cubic, cc); - uint64_t target_cwnd, initcwnd; - uint64_t max_delivery_rate_sec; - (void)ack; - (void)ts; - - ++cubic->ack_count; - - ngtcp2_window_filter_update(&cubic->delivery_rate_sec_filter, - cstat->delivery_rate_sec, cubic->ack_count); - max_delivery_rate_sec = - ngtcp2_window_filter_get_best(&cubic->delivery_rate_sec_filter); - - if (cstat->min_rtt != UINT64_MAX && max_delivery_rate_sec) { - target_cwnd = max_delivery_rate_sec * cstat->smoothed_rtt / NGTCP2_SECONDS; - initcwnd = ngtcp2_cc_compute_initcwnd(cstat->max_tx_udp_payload_size); - cubic->target_cwnd = ngtcp2_max(initcwnd, target_cwnd) * 289 / 100; + if (cstat->cwnd < cubic->undo.cwnd) { + cubic->current = cubic->undo.v; + cstat->cwnd = cubic->undo.cwnd; + cstat->ssthresh = cubic->undo.ssthresh; ngtcp2_log_info(cubic->cc.log, NGTCP2_LOG_EVENT_CCA, - "target_cwnd=%" PRIu64 " max_delivery_rate_sec=%" PRIu64 - " smoothed_rtt=%" PRIu64, - cubic->target_cwnd, max_delivery_rate_sec, - cstat->smoothed_rtt); + "spurious congestion is detected and congestion state is " + "restored cwnd=%" PRIu64, + cstat->cwnd); } -} - -void ngtcp2_cc_cubic_cc_on_pkt_sent(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_pkt *pkt) { - ngtcp2_cc_cubic *cubic = ngtcp2_struct_of(cc, ngtcp2_cc_cubic, cc); - (void)cstat; - if (pkt->pktns_id != NGTCP2_PKTNS_ID_APPLICATION || cubic->window_end != -1) { - return; - } - - cubic->window_end = pkt->pkt_num; - cubic->last_round_min_rtt = cubic->current_round_min_rtt; - cubic->current_round_min_rtt = UINT64_MAX; - cubic->rtt_sample_count = 0; + cubic_vars_reset(&cubic->undo.v); + cubic->undo.cwnd = 0; + cubic->undo.ssthresh = 0; } -void ngtcp2_cc_cubic_cc_new_rtt_sample(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - ngtcp2_tstamp ts) { +void ngtcp2_cc_cubic_cc_on_persistent_congestion(ngtcp2_cc *cc, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { ngtcp2_cc_cubic *cubic = ngtcp2_struct_of(cc, ngtcp2_cc_cubic, cc); (void)ts; - if (cubic->window_end == -1) { - return; - } + cubic_cc_reset(cubic); - cubic->current_round_min_rtt = - ngtcp2_min(cubic->current_round_min_rtt, cstat->latest_rtt); - ++cubic->rtt_sample_count; + cstat->cwnd = 2 * cstat->max_tx_udp_payload_size; + cstat->congestion_recovery_start_ts = UINT64_MAX; } void ngtcp2_cc_cubic_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, @@ -555,23 +509,3 @@ void ngtcp2_cc_cubic_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, cubic_cc_reset(cubic); } - -void ngtcp2_cc_cubic_cc_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - ngtcp2_cc_event_type event, ngtcp2_tstamp ts) { - ngtcp2_cc_cubic *cubic = ngtcp2_struct_of(cc, ngtcp2_cc_cubic, cc); - ngtcp2_tstamp last_ts; - - if (event != NGTCP2_CC_EVENT_TYPE_TX_START || - cubic->epoch_start == UINT64_MAX) { - return; - } - - last_ts = cstat->last_tx_pkt_ts[NGTCP2_PKTNS_ID_APPLICATION]; - if (last_ts == UINT64_MAX || last_ts <= cubic->epoch_start) { - return; - } - - assert(ts >= last_ts); - - cubic->epoch_start += ts - last_ts; -} diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_cc.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_cc.h index 524bcdb7e4bf86..e3c363a51bb85a 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_cc.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_cc.h @@ -27,18 +27,18 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include #include "ngtcp2_pktns_id.h" -#include "ngtcp2_window_filter.h" #define NGTCP2_LOSS_REDUCTION_FACTOR_BITS 1 #define NGTCP2_PERSISTENT_CONGESTION_THRESHOLD 3 typedef struct ngtcp2_log ngtcp2_log; typedef struct ngtcp2_conn_stat ngtcp2_conn_stat; +typedef struct ngtcp2_rst ngtcp2_rst; /** * @struct @@ -144,10 +144,12 @@ typedef void (*ngtcp2_cc_on_pkt_lost)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, * * :type:`ngtcp2_cc_congestion_event` is a callback function which is * called when congestion event happens (e.g., when packet is lost). + * |bytes_lost| is the number of bytes lost in this congestion event. */ typedef void (*ngtcp2_cc_congestion_event)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp sent_ts, + uint64_t bytes_lost, ngtcp2_tstamp ts); /** @@ -305,9 +307,6 @@ ngtcp2_cc_pkt *ngtcp2_cc_pkt_init(ngtcp2_cc_pkt *pkt, int64_t pkt_num, /* ngtcp2_cc_reno is the RENO congestion controller. */ typedef struct ngtcp2_cc_reno { ngtcp2_cc cc; - ngtcp2_window_filter delivery_rate_sec_filter; - uint64_t ack_count; - uint64_t target_cwnd; uint64_t pending_add; } ngtcp2_cc_reno; @@ -318,59 +317,81 @@ void ngtcp2_cc_reno_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, void ngtcp2_cc_reno_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp sent_ts, - ngtcp2_tstamp ts); + uint64_t bytes_lost, ngtcp2_tstamp ts); void ngtcp2_cc_reno_cc_on_persistent_congestion(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts); -void ngtcp2_cc_reno_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); - void ngtcp2_cc_reno_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts); +typedef enum ngtcp2_cubic_state { + /* NGTCP2_CUBIC_STATE_INITIAL is the state where CUBIC is in slow + start phase, or congestion avoidance phase before congestion + events occur. */ + NGTCP2_CUBIC_STATE_INITIAL, + /* NGTCP2_CUBIC_STATE_RECOVERY is the state that a connection is in + recovery period. */ + NGTCP2_CUBIC_STATE_RECOVERY, + /* NGTCP2_CUBIC_STATE_CONGESTION_AVOIDANCE is the state where CUBIC + is in congestion avoidance phase after recovery period ends. */ + NGTCP2_CUBIC_STATE_CONGESTION_AVOIDANCE, +} ngtcp2_cubic_state; + +typedef struct ngtcp2_cubic_vars { + uint64_t cwnd_prior; + uint64_t w_max; + ngtcp2_duration k; + ngtcp2_tstamp epoch_start; + uint64_t w_est; + + ngtcp2_cubic_state state; + /* app_limited_start_ts is the timestamp where app limited period + started. */ + ngtcp2_tstamp app_limited_start_ts; + /* app_limited_duration is the cumulative duration where a + connection is under app limited when ACK is received. */ + ngtcp2_duration app_limited_duration; + uint64_t pending_bytes_delivered; + uint64_t pending_est_bytes_delivered; +} ngtcp2_cubic_vars; + /* ngtcp2_cc_cubic is CUBIC congestion controller. */ typedef struct ngtcp2_cc_cubic { ngtcp2_cc cc; - ngtcp2_window_filter delivery_rate_sec_filter; - uint64_t ack_count; - uint64_t target_cwnd; - uint64_t w_last_max; - uint64_t w_tcp; - uint64_t origin_point; - ngtcp2_tstamp epoch_start; - uint64_t k; - /* prior stores the congestion state when a congestion event occurs + ngtcp2_rst *rst; + /* current is a set of variables that are currently in effect. */ + ngtcp2_cubic_vars current; + /* undo stores the congestion state when a congestion event occurs in order to restore the state when it turns out that the event is spurious. */ struct { + ngtcp2_cubic_vars v; uint64_t cwnd; uint64_t ssthresh; - uint64_t w_last_max; - uint64_t w_tcp; - uint64_t origin_point; - ngtcp2_tstamp epoch_start; - uint64_t k; - } prior; + } undo; /* HyStart++ variables */ - size_t rtt_sample_count; - uint64_t current_round_min_rtt; - uint64_t last_round_min_rtt; - int64_t window_end; - uint64_t pending_add; - uint64_t pending_w_add; + struct { + ngtcp2_duration current_round_min_rtt; + ngtcp2_duration last_round_min_rtt; + ngtcp2_duration curr_rtt; + size_t rtt_sample_count; + ngtcp2_duration css_baseline_min_rtt; + size_t css_round; + } hs; + uint64_t next_round_delivered; } ngtcp2_cc_cubic; -void ngtcp2_cc_cubic_init(ngtcp2_cc_cubic *cc, ngtcp2_log *log); +void ngtcp2_cc_cubic_init(ngtcp2_cc_cubic *cc, ngtcp2_log *log, + ngtcp2_rst *rst); -void ngtcp2_cc_cubic_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_pkt *pkt, - ngtcp2_tstamp ts); +void ngtcp2_cc_cubic_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); void ngtcp2_cc_cubic_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp sent_ts, - ngtcp2_tstamp ts); + uint64_t bytes_lost, ngtcp2_tstamp ts); void ngtcp2_cc_cubic_cc_on_spurious_congestion(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, @@ -380,21 +401,9 @@ void ngtcp2_cc_cubic_cc_on_persistent_congestion(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts); -void ngtcp2_cc_cubic_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); - -void ngtcp2_cc_cubic_cc_on_pkt_sent(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - const ngtcp2_cc_pkt *pkt); - -void ngtcp2_cc_cubic_cc_new_rtt_sample(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - ngtcp2_tstamp ts); - void ngtcp2_cc_cubic_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts); -void ngtcp2_cc_cubic_cc_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, - ngtcp2_cc_event_type event, ngtcp2_tstamp ts); - uint64_t ngtcp2_cbrt(uint64_t n); -#endif /* NGTCP2_CC_H */ +#endif /* !defined(NGTCP2_CC_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_cid.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_cid.c index f3b92b569ec928..181850cfcbc87a 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_cid.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_cid.c @@ -36,6 +36,7 @@ void ngtcp2_cid_init(ngtcp2_cid *cid, const uint8_t *data, size_t datalen) { assert(datalen <= NGTCP2_MAX_CIDLEN); cid->datalen = datalen; + if (datalen) { ngtcp2_cpymem(cid->data, data, datalen); } @@ -74,12 +75,14 @@ void ngtcp2_dcid_init(ngtcp2_dcid *dcid, uint64_t seq, const ngtcp2_cid *cid, const uint8_t *token) { dcid->seq = seq; dcid->cid = *cid; + if (token) { memcpy(dcid->token, token, NGTCP2_STATELESS_RESET_TOKENLEN); dcid->flags = NGTCP2_DCID_FLAG_TOKEN_PRESENT; } else { dcid->flags = NGTCP2_DCID_FLAG_NONE; } + ngtcp2_path_storage_zero(&dcid->ps); dcid->retired_ts = UINT64_MAX; dcid->bound_ts = UINT64_MAX; @@ -115,6 +118,7 @@ void ngtcp2_dcid_copy(ngtcp2_dcid *dest, const ngtcp2_dcid *src) { void ngtcp2_dcid_copy_cid_token(ngtcp2_dcid *dest, const ngtcp2_dcid *src) { dest->seq = src->seq; dest->cid = src->cid; + if (src->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) { dest->flags |= NGTCP2_DCID_FLAG_TOKEN_PRESENT; memcpy(dest->token, src->token, NGTCP2_STATELESS_RESET_TOKENLEN); @@ -123,15 +127,14 @@ void ngtcp2_dcid_copy_cid_token(ngtcp2_dcid *dest, const ngtcp2_dcid *src) { } } -int ngtcp2_dcid_verify_uniqueness(ngtcp2_dcid *dcid, uint64_t seq, +int ngtcp2_dcid_verify_uniqueness(const ngtcp2_dcid *dcid, uint64_t seq, const ngtcp2_cid *cid, const uint8_t *token) { if (dcid->seq == seq) { return ngtcp2_cid_eq(&dcid->cid, cid) && - (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) && - memcmp(dcid->token, token, - NGTCP2_STATELESS_RESET_TOKENLEN) == 0 - ? 0 - : NGTCP2_ERR_PROTO; + (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) && + memcmp(dcid->token, token, NGTCP2_STATELESS_RESET_TOKENLEN) == 0 + ? 0 + : NGTCP2_ERR_PROTO; } return !ngtcp2_cid_eq(&dcid->cid, cid) ? 0 : NGTCP2_ERR_PROTO; @@ -140,8 +143,7 @@ int ngtcp2_dcid_verify_uniqueness(ngtcp2_dcid *dcid, uint64_t seq, int ngtcp2_dcid_verify_stateless_reset_token(const ngtcp2_dcid *dcid, const uint8_t *token) { return (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) && - ngtcp2_cmemeq(dcid->token, token, - NGTCP2_STATELESS_RESET_TOKENLEN) - ? 0 - : NGTCP2_ERR_INVALID_ARGUMENT; + ngtcp2_cmemeq(dcid->token, token, NGTCP2_STATELESS_RESET_TOKENLEN) + ? 0 + : NGTCP2_ERR_INVALID_ARGUMENT; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_cid.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_cid.h index 0b37441178c72a..6372ef113d6454 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_cid.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_cid.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -37,20 +37,20 @@ /* NGTCP2_SCID_FLAG_NONE indicates that no flag is set. */ #define NGTCP2_SCID_FLAG_NONE 0x00u /* NGTCP2_SCID_FLAG_USED indicates that a local endpoint observed that - a remote endpoint uses a particular Connection ID. */ + a remote endpoint uses this particular Connection ID. */ #define NGTCP2_SCID_FLAG_USED 0x01u -/* NGTCP2_SCID_FLAG_RETIRED indicates that a particular Connection ID - is retired. */ +/* NGTCP2_SCID_FLAG_RETIRED indicates that this particular Connection + ID is retired. */ #define NGTCP2_SCID_FLAG_RETIRED 0x02u typedef struct ngtcp2_scid { ngtcp2_pq_entry pe; - /* seq is the sequence number associated to the CID. */ + /* seq is the sequence number associated to the Connection ID. */ uint64_t seq; /* cid is a connection ID */ ngtcp2_cid cid; - /* retired_ts is the timestamp when peer tells that this CID is - retired. */ + /* retired_ts is the timestamp when a remote endpoint tells that + this Connection ID is retired. */ ngtcp2_tstamp retired_ts; /* flags is the bitwise OR of zero or more of NGTCP2_SCID_FLAG_*. */ uint8_t flags; @@ -66,33 +66,33 @@ typedef struct ngtcp2_scid { #define NGTCP2_DCID_FLAG_TOKEN_PRESENT 0x02u typedef struct ngtcp2_dcid { - /* seq is the sequence number associated to the CID. */ + /* seq is the sequence number associated to the Connection ID. */ uint64_t seq; - /* cid is a connection ID */ + /* cid is a Connection ID */ ngtcp2_cid cid; /* path is a path which cid is bound to. The addresses are zero length if cid has not been bound to a particular path yet. */ ngtcp2_path_storage ps; - /* retired_ts is the timestamp when peer tells that this CID is + /* retired_ts is the timestamp when this Connection ID is retired. */ ngtcp2_tstamp retired_ts; - /* bound_ts is the timestamp when this connection ID is bound to a - particular path. It is only assigned when a connection ID is - used just for sending PATH_RESPONSE and is not zero-length. */ + /* bound_ts is the timestamp when this Connection ID is bound to a + particular path. It is only assigned when a Connection ID is + used just for sending PATH_RESPONSE, and is not zero-length. */ ngtcp2_tstamp bound_ts; /* bytes_sent is the number of bytes sent to an associated path. */ uint64_t bytes_sent; /* bytes_recv is the number of bytes received from an associated path. */ uint64_t bytes_recv; - /* max_udp_payload_size is the maximum size of UDP payload that is - allowed to send to this path. */ + /* max_udp_payload_size is the maximum size of UDP datagram payload + that is allowed to be sent to this path. */ size_t max_udp_payload_size; /* flags is bitwise OR of zero or more of NGTCP2_DCID_FLAG_*. */ uint8_t flags; - /* token is a stateless reset token associated to this CID. - Actually, the stateless reset token is tied to the connection, - not to the particular connection ID. */ + /* token is a stateless reset token received along with this + Connection ID. The stateless reset token is tied to the + connection, not to the particular Connection ID. */ uint8_t token[NGTCP2_STATELESS_RESET_TOKENLEN]; } ngtcp2_dcid; @@ -106,7 +106,7 @@ void ngtcp2_cid_zero(ngtcp2_cid *cid); int ngtcp2_cid_less(const ngtcp2_cid *lhs, const ngtcp2_cid *rhs); /* - * ngtcp2_cid_empty returns nonzero if |cid| includes empty connection + * ngtcp2_cid_empty returns nonzero if |cid| includes empty Connection * ID. */ int ngtcp2_cid_empty(const ngtcp2_cid *cid); @@ -123,7 +123,7 @@ void ngtcp2_scid_copy(ngtcp2_scid *dest, const ngtcp2_scid *src); /* * ngtcp2_dcid_init initializes |dcid| with the given parameters. If - * |token| is NULL, the function fills dcid->token it with 0. |token| + * |token| is NULL, the function fills dcid->token with 0. |token| * must be NGTCP2_STATELESS_RESET_TOKENLEN bytes long. */ void ngtcp2_dcid_init(ngtcp2_dcid *dcid, uint64_t seq, const ngtcp2_cid *cid, @@ -131,14 +131,14 @@ void ngtcp2_dcid_init(ngtcp2_dcid *dcid, uint64_t seq, const ngtcp2_cid *cid, /* * ngtcp2_dcid_set_token sets |token| to |dcid|. |token| must not be - * NULL and must be NGTCP2_STATELESS_RESET_TOKENLEN bytes long. + * NULL, and must be NGTCP2_STATELESS_RESET_TOKENLEN bytes long. */ void ngtcp2_dcid_set_token(ngtcp2_dcid *dcid, const uint8_t *token); /* * ngtcp2_dcid_set_path sets |path| to |dcid|. It sets - * max_udp_payload_size to the minimum UDP payload size supported - * by the IP protocol version. + * max_udp_payload_size to the minimum UDP datagram payload size + * supported by the IP protocol version. */ void ngtcp2_dcid_set_path(ngtcp2_dcid *dcid, const ngtcp2_path *path); @@ -149,7 +149,7 @@ void ngtcp2_dcid_copy(ngtcp2_dcid *dest, const ngtcp2_dcid *src); /* * ngtcp2_dcid_copy_cid_token behaves like ngtcp2_dcid_copy, but it - * only copies cid, seq, and path. + * only copies cid, seq, and token. */ void ngtcp2_dcid_copy_cid_token(ngtcp2_dcid *dest, const ngtcp2_dcid *src); @@ -157,14 +157,15 @@ void ngtcp2_dcid_copy_cid_token(ngtcp2_dcid *dest, const ngtcp2_dcid *src); * ngtcp2_dcid_verify_uniqueness verifies uniqueness of (|seq|, |cid|, * |token|) tuple against |dcid|. */ -int ngtcp2_dcid_verify_uniqueness(ngtcp2_dcid *dcid, uint64_t seq, +int ngtcp2_dcid_verify_uniqueness(const ngtcp2_dcid *dcid, uint64_t seq, const ngtcp2_cid *cid, const uint8_t *token); /* * ngtcp2_dcid_verify_stateless_reset_token verifies stateless reset - * token |token| against the one included in |dcid|. This function - * returns 0 if the verification succeeds, or one of the following - * negative error codes: + * token |token| against the one included in |dcid|. Tokens are + * compared in constant time. This function returns 0 if the + * verification succeeds, or one of the following negative error + * codes: * * NGTCP2_ERR_INVALID_ARGUMENT * Tokens do not match; or |dcid| does not contain a token. @@ -172,4 +173,4 @@ int ngtcp2_dcid_verify_uniqueness(ngtcp2_dcid *dcid, uint64_t seq, int ngtcp2_dcid_verify_stateless_reset_token(const ngtcp2_dcid *dcid, const uint8_t *token); -#endif /* NGTCP2_CID_H */ +#endif /* !defined(NGTCP2_CID_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn.c index c8caf47ea76232..8b60efabe126d0 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn.c @@ -37,7 +37,8 @@ #include "ngtcp2_rcvry.h" #include "ngtcp2_unreachable.h" #include "ngtcp2_net.h" -#include "ngtcp2_conversion.h" +#include "ngtcp2_transport_params.h" +#include "ngtcp2_settings.h" #include "ngtcp2_tstamp.h" #include "ngtcp2_frame_chain.h" @@ -50,8 +51,12 @@ /* NGTCP2_MIN_COALESCED_PAYLOADLEN is the minimum length of QUIC packet payload that should be coalesced to a long packet. */ #define NGTCP2_MIN_COALESCED_PAYLOADLEN 128 +/* NGTCP2_MAX_ACK_PER_PKT is the maximum number of ACK frame per an + incoming QUIC packet to process. ACK frames that exceed this limit + are not processed. */ +#define NGTCP2_MAX_ACK_PER_PKT 1 -ngtcp2_objalloc_def(strm, ngtcp2_strm, oplent); +ngtcp2_objalloc_def(strm, ngtcp2_strm, oplent) /* * conn_local_stream returns nonzero if |stream_id| indicates that it @@ -323,8 +328,8 @@ static int conn_call_select_preferred_addr(ngtcp2_conn *conn, assert(conn->remote.transport_params->preferred_addr_present); rv = conn->callbacks.select_preferred_addr( - conn, dest, &conn->remote.transport_params->preferred_addr, - conn->user_data); + conn, dest, &conn->remote.transport_params->preferred_addr, + conn->user_data); if (rv != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -377,7 +382,7 @@ static int conn_call_extend_max_stream_data(ngtcp2_conn *conn, } rv = conn->callbacks.extend_max_stream_data( - conn, stream_id, datalen, conn->user_data, strm->stream_user_data); + conn, stream_id, datalen, conn->user_data, strm->stream_user_data); if (rv != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -395,9 +400,9 @@ static int conn_call_dcid_status(ngtcp2_conn *conn, } rv = conn->callbacks.dcid_status( - conn, type, dcid->seq, &dcid->cid, - (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) ? dcid->token : NULL, - conn->user_data); + conn, type, dcid->seq, &dcid->cid, + (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) ? dcid->token : NULL, + conn->user_data); if (rv != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -413,7 +418,7 @@ static int conn_call_activate_dcid(ngtcp2_conn *conn, const ngtcp2_dcid *dcid) { static int conn_call_deactivate_dcid(ngtcp2_conn *conn, const ngtcp2_dcid *dcid) { return conn_call_dcid_status( - conn, NGTCP2_CONNECTION_ID_STATUS_TYPE_DEACTIVATE, dcid); + conn, NGTCP2_CONNECTION_ID_STATUS_TYPE_DEACTIVATE, dcid); } static int conn_call_stream_stop_sending(ngtcp2_conn *conn, int64_t stream_id, @@ -587,8 +592,8 @@ static int conn_call_recv_datagram(ngtcp2_conn *conn, flags |= NGTCP2_DATAGRAM_FLAG_0RTT; } - rv = conn->callbacks.recv_datagram(conn, flags, data, datalen, - conn->user_data); + rv = + conn->callbacks.recv_datagram(conn, flags, data, datalen, conn->user_data); if (rv != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -607,8 +612,8 @@ conn_call_update_key(ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, assert(conn->callbacks.update_key); rv = conn->callbacks.update_key( - conn, rx_secret, tx_secret, rx_aead_ctx, rx_iv, tx_aead_ctx, tx_iv, - current_rx_secret, current_tx_secret, secretlen, conn->user_data); + conn, rx_secret, tx_secret, rx_aead_ctx, rx_iv, tx_aead_ctx, tx_iv, + current_rx_secret, current_tx_secret, secretlen, conn->user_data); if (rv != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -623,7 +628,7 @@ static int conn_call_version_negotiation(ngtcp2_conn *conn, uint32_t version, assert(conn->callbacks.version_negotiation); rv = - conn->callbacks.version_negotiation(conn, version, dcid, conn->user_data); + conn->callbacks.version_negotiation(conn, version, dcid, conn->user_data); if (rv != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -663,13 +668,11 @@ static int conn_call_recv_tx_key(ngtcp2_conn *conn, return 0; } -static int pktns_init(ngtcp2_pktns *pktns, ngtcp2_pktns_id pktns_id, - ngtcp2_rst *rst, ngtcp2_cc *cc, int64_t initial_pkt_num, - ngtcp2_log *log, ngtcp2_qlog *qlog, - ngtcp2_objalloc *rtb_entry_objalloc, - ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) { - int rv; - +static void pktns_init(ngtcp2_pktns *pktns, ngtcp2_pktns_id pktns_id, + ngtcp2_rst *rst, ngtcp2_cc *cc, int64_t initial_pkt_num, + ngtcp2_log *log, ngtcp2_qlog *qlog, + ngtcp2_objalloc *rtb_entry_objalloc, + ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) { memset(pktns, 0, sizeof(*pktns)); ngtcp2_gaptr_init(&pktns->rx.pngap, mem); @@ -678,25 +681,15 @@ static int pktns_init(ngtcp2_pktns *pktns, ngtcp2_pktns_id pktns_id, pktns->tx.non_ack_pkt_start_ts = UINT64_MAX; pktns->rx.max_pkt_num = -1; pktns->rx.max_ack_eliciting_pkt_num = -1; + pktns->id = pktns_id; - rv = ngtcp2_acktr_init(&pktns->acktr, log, mem); - if (rv != 0) { - goto fail_acktr_init; - } + ngtcp2_acktr_init(&pktns->acktr, log, mem); ngtcp2_strm_init(&pktns->crypto.strm, 0, NGTCP2_STRM_FLAG_NONE, 0, 0, NULL, frc_objalloc, mem); - ngtcp2_rtb_init(&pktns->rtb, pktns_id, &pktns->crypto.strm, rst, cc, - initial_pkt_num, log, qlog, rtb_entry_objalloc, frc_objalloc, - mem); - - return 0; - -fail_acktr_init: - ngtcp2_gaptr_free(&pktns->rx.pngap); - - return rv; + ngtcp2_rtb_init(&pktns->rtb, rst, cc, initial_pkt_num, log, qlog, + rtb_entry_objalloc, frc_objalloc, mem); } static int pktns_new(ngtcp2_pktns **ppktns, ngtcp2_pktns_id pktns_id, @@ -704,20 +697,15 @@ static int pktns_new(ngtcp2_pktns **ppktns, ngtcp2_pktns_id pktns_id, ngtcp2_log *log, ngtcp2_qlog *qlog, ngtcp2_objalloc *rtb_entry_objalloc, ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) { - int rv; - *ppktns = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pktns)); if (*ppktns == NULL) { return NGTCP2_ERR_NOMEM; } - rv = pktns_init(*ppktns, pktns_id, rst, cc, initial_pkt_num, log, qlog, - rtb_entry_objalloc, frc_objalloc, mem); - if (rv != 0) { - ngtcp2_mem_free(mem, *ppktns); - } + pktns_init(*ppktns, pktns_id, rst, cc, initial_pkt_num, log, qlog, + rtb_entry_objalloc, frc_objalloc, mem); - return rv; + return 0; } static int cycle_less(const ngtcp2_pq_entry *lhs, const ngtcp2_pq_entry *rhs) { @@ -783,6 +771,8 @@ static int cid_less(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) { return ngtcp2_cid_less(lhs, rhs); } +ngtcp2_ksl_search_def(cid_less, cid_less) + static int retired_ts_less(const ngtcp2_pq_entry *lhs, const ngtcp2_pq_entry *rhs) { const ngtcp2_scid *a = ngtcp2_struct_of(lhs, ngtcp2_scid, pe); @@ -803,8 +793,9 @@ static void conn_reset_conn_stat_cc(ngtcp2_conn *conn, cstat->first_rtt_sample_ts = UINT64_MAX; cstat->pto_count = 0; cstat->loss_detection_timer = UINT64_MAX; - cstat->cwnd = - ngtcp2_cc_compute_initcwnd(conn->local.settings.max_tx_udp_payload_size); + cstat->max_tx_udp_payload_size = + ngtcp2_conn_get_path_max_tx_udp_payload_size(conn); + cstat->cwnd = ngtcp2_cc_compute_initcwnd(cstat->max_tx_udp_payload_size); cstat->ssthresh = UINT64_MAX; cstat->congestion_recovery_start_ts = UINT64_MAX; cstat->bytes_in_flight = 0; @@ -847,7 +838,7 @@ static void delete_scid(ngtcp2_ksl *scids, const ngtcp2_mem *mem) { static ngtcp2_duration compute_pto(ngtcp2_duration smoothed_rtt, ngtcp2_duration rttvar, ngtcp2_duration max_ack_delay) { - ngtcp2_duration var = ngtcp2_max(4 * rttvar, NGTCP2_GRANULARITY); + ngtcp2_duration var = ngtcp2_max_uint64(4 * rttvar, NGTCP2_GRANULARITY); return smoothed_rtt + var + max_ack_delay; } @@ -859,7 +850,7 @@ static ngtcp2_duration conn_compute_initial_pto(ngtcp2_conn *conn, ngtcp2_duration initial_rtt = conn->local.settings.initial_rtt; ngtcp2_duration max_ack_delay; - if (pktns->rtb.pktns_id == NGTCP2_PKTNS_ID_APPLICATION && + if (pktns->id == NGTCP2_PKTNS_ID_APPLICATION && conn->remote.transport_params) { max_ack_delay = conn->remote.transport_params->max_ack_delay; } else { @@ -876,7 +867,7 @@ static ngtcp2_duration conn_compute_pto(ngtcp2_conn *conn, ngtcp2_conn_stat *cstat = &conn->cstat; ngtcp2_duration max_ack_delay; - if (pktns->rtb.pktns_id == NGTCP2_PKTNS_ID_APPLICATION && + if (pktns->id == NGTCP2_PKTNS_ID_APPLICATION && conn->remote.transport_params) { max_ack_delay = conn->remote.transport_params->max_ack_delay; } else { @@ -898,7 +889,7 @@ static ngtcp2_duration conn_compute_pv_timeout_pto(ngtcp2_conn *conn, ngtcp2_duration pto) { ngtcp2_duration initial_pto = conn_compute_initial_pto(conn, &conn->pktns); - return 3 * ngtcp2_max(pto, initial_pto); + return 3 * ngtcp2_max_uint64(pto, initial_pto); } /* @@ -1007,28 +998,15 @@ static void conn_reset_ecn_validation_state(ngtcp2_conn *conn) { static uint8_t server_default_available_versions[] = {0, 0, 0, 1}; /* - * available_versions_new allocates new buffer, and writes |versions| - * of length |versionslen| in network byte order, suitable for sending - * in available_versions field of version_information QUIC transport - * parameter. The pointer to the allocated buffer is assigned to - * |*pbuf|. - * - * This function returns 0 if it succeeds, or one of the negative - * error codes: - * - * NGTCP2_ERR_NOMEM - * Out of memory. + * available_versions_init writes |versions| of length |versionslen| + * in network byte order to the buffer pointed by |buf|, suitable for + * sending in available_versions field of version_information QUIC + * transport parameter. This function returns the pointer to the one + * beyond the last byte written. */ -static int available_versions_new(uint8_t **pbuf, const uint32_t *versions, - size_t versionslen, const ngtcp2_mem *mem) { +static void *available_versions_init(void *buf, const uint32_t *versions, + size_t versionslen) { size_t i; - uint8_t *buf = ngtcp2_mem_malloc(mem, sizeof(uint32_t) * versionslen); - - if (buf == NULL) { - return NGTCP2_ERR_NOMEM; - } - - *pbuf = buf; for (i = 0; i < versionslen; ++i) { buf = ngtcp2_put_uint32be(buf, versions[i]); @@ -1055,6 +1033,16 @@ conn_set_local_transport_params(ngtcp2_conn *conn, p->version_info_present = 1; } +static size_t buflen_align(size_t buflen) { + return (buflen + 0x7) & (size_t)~0x7; +} + +static void *buf_align(void *buf) { + return (void *)((uintptr_t)((uint8_t *)buf + 0x7) & (uintptr_t)~0x7); +} + +static void *buf_advance(void *buf, size_t n) { return (uint8_t *)buf + n; } + static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, const ngtcp2_path *path, uint32_t client_chosen_version, int callbacks_version, @@ -1065,16 +1053,20 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_mem *mem, void *user_data, int server) { int rv; ngtcp2_scid *scident; - uint8_t *buf; + void *buf, *tokenbuf; + size_t buflen; uint8_t fixed_bit_byte; size_t i; uint32_t *preferred_versions; + ngtcp2_settings settingsbuf; ngtcp2_transport_params paramsbuf; (void)callbacks_version; (void)settings_version; + settings = + ngtcp2_settings_convert_to_latest(&settingsbuf, settings_version, settings); params = ngtcp2_transport_params_convert_to_latest( - ¶msbuf, transport_params_version, params); + ¶msbuf, transport_params_version, params); assert(settings->max_window <= NGTCP2_MAX_VARINT); assert(settings->max_stream_window <= NGTCP2_MAX_VARINT); @@ -1111,21 +1103,59 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, assert(callbacks->get_path_challenge_data); assert(!server || !ngtcp2_is_reserved_version(client_chosen_version)); + for (i = 0; i < settings->pmtud_probeslen; ++i) { + assert(settings->pmtud_probes[i] > NGTCP2_MAX_UDP_PAYLOAD_SIZE); + } + if (mem == NULL) { mem = ngtcp2_mem_default(); } - *pconn = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_conn)); - if (*pconn == NULL) { - rv = NGTCP2_ERR_NOMEM; - goto fail_conn; + buflen = sizeof(ngtcp2_conn); + if (settings->qlog_write) { + buflen = buflen_align(buflen); + buflen += NGTCP2_QLOG_BUFLEN; + } + + if (settings->pmtud_probeslen) { + buflen = buflen_align(buflen); + buflen += sizeof(settings->pmtud_probes[0]) * settings->pmtud_probeslen; + } + + if (settings->preferred_versionslen) { + buflen = buflen_align(buflen); + buflen += + sizeof(settings->preferred_versions[0]) * settings->preferred_versionslen; + } + + if (settings->available_versionslen) { + buflen = buflen_align(buflen); + buflen += + sizeof(settings->available_versions[0]) * settings->available_versionslen; + } else if (server) { + if (settings->preferred_versionslen) { + buflen = buflen_align(buflen); + buflen += sizeof(settings->preferred_versions[0]) * + settings->preferred_versionslen; + } + } else if (!ngtcp2_is_reserved_version(client_chosen_version)) { + buflen = buflen_align(buflen); + buflen += sizeof(client_chosen_version); + } + + buf = ngtcp2_mem_calloc(mem, 1, buflen); + if (buf == NULL) { + return NGTCP2_ERR_NOMEM; } + *pconn = buf; + buf = buf_advance(buf, sizeof(ngtcp2_conn)); + (*pconn)->server = server; - ngtcp2_objalloc_frame_chain_init(&(*pconn)->frc_objalloc, 64, mem); - ngtcp2_objalloc_rtb_entry_init(&(*pconn)->rtb_entry_objalloc, 64, mem); - ngtcp2_objalloc_strm_init(&(*pconn)->strm_objalloc, 64, mem); + ngtcp2_objalloc_frame_chain_init(&(*pconn)->frc_objalloc, 16, mem); + ngtcp2_objalloc_rtb_entry_init(&(*pconn)->rtb_entry_objalloc, 16, mem); + ngtcp2_objalloc_strm_init(&(*pconn)->strm_objalloc, 16, mem); ngtcp2_static_ringbuf_dcid_bound_init(&(*pconn)->dcid.bound); @@ -1135,7 +1165,8 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, ngtcp2_gaptr_init(&(*pconn)->dcid.seqgap, mem); - ngtcp2_ksl_init(&(*pconn)->scid.set, cid_less, sizeof(ngtcp2_cid), mem); + ngtcp2_ksl_init(&(*pconn)->scid.set, cid_less, ksl_cid_less_search, + sizeof(ngtcp2_cid), mem); ngtcp2_pq_init(&(*pconn)->scid.used, retired_ts_less, mem); @@ -1143,9 +1174,9 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, ngtcp2_pq_init(&(*pconn)->tx.strmq, cycle_less, mem); - ngtcp2_idtr_init(&(*pconn)->remote.bidi.idtr, !server, mem); + ngtcp2_idtr_init(&(*pconn)->remote.bidi.idtr, mem); - ngtcp2_idtr_init(&(*pconn)->remote.uni.idtr, !server, mem); + ngtcp2_idtr_init(&(*pconn)->remote.uni.idtr, mem); ngtcp2_static_ringbuf_path_challenge_init(&(*pconn)->rx.path_challenge); @@ -1154,36 +1185,46 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, ngtcp2_qlog_init(&(*pconn)->qlog, settings->qlog_write, settings->initial_ts, user_data); if ((*pconn)->qlog.write) { - buf = ngtcp2_mem_malloc(mem, NGTCP2_QLOG_BUFLEN); - if (buf == NULL) { - rv = NGTCP2_ERR_NOMEM; - goto fail_qlog_buf; - } + buf = buf_align(buf); ngtcp2_buf_init(&(*pconn)->qlog.buf, buf, NGTCP2_QLOG_BUFLEN); + buf = buf_advance(buf, NGTCP2_QLOG_BUFLEN); } (*pconn)->local.settings = *settings; if (settings->tokenlen) { - buf = ngtcp2_mem_malloc(mem, settings->tokenlen); - if (buf == NULL) { + tokenbuf = ngtcp2_mem_malloc(mem, settings->tokenlen); + if (tokenbuf == NULL) { rv = NGTCP2_ERR_NOMEM; goto fail_token; } - memcpy(buf, settings->token, settings->tokenlen); - (*pconn)->local.settings.token = buf; + memcpy(tokenbuf, settings->token, settings->tokenlen); + (*pconn)->local.settings.token = tokenbuf; } else { (*pconn)->local.settings.token = NULL; } + if (settings->pmtud_probeslen) { + (*pconn)->local.settings.pmtud_probes = buf_align(buf); + buf = ngtcp2_cpymem( + (uint16_t *)(*pconn)->local.settings.pmtud_probes, settings->pmtud_probes, + sizeof(settings->pmtud_probes[0]) * settings->pmtud_probeslen); + } + if (!(*pconn)->local.settings.original_version) { (*pconn)->local.settings.original_version = client_chosen_version; } + ngtcp2_dcid_init(&(*pconn)->dcid.current, 0, dcid, NULL); + ngtcp2_dcid_set_path(&(*pconn)->dcid.current, path); + + rv = ngtcp2_gaptr_push(&(*pconn)->dcid.seqgap, 0, 1); + if (rv != 0) { + goto fail_seqgap_push; + } + conn_reset_conn_stat(*pconn, &(*pconn)->cstat); (*pconn)->cstat.initial_rtt = settings->initial_rtt; - (*pconn)->cstat.max_tx_udp_payload_size = - (*pconn)->local.settings.max_tx_udp_payload_size; ngtcp2_rst_init(&(*pconn)->rst); @@ -1195,7 +1236,7 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, break; case NGTCP2_CC_ALGO_CUBIC: - ngtcp2_cc_cubic_init(&(*pconn)->cubic, &(*pconn)->log); + ngtcp2_cc_cubic_init(&(*pconn)->cubic, &(*pconn)->log, &(*pconn)->rst); break; case NGTCP2_CC_ALGO_BBR: @@ -1224,13 +1265,10 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, goto fail_hs_pktns_init; } - rv = pktns_init(&(*pconn)->pktns, NGTCP2_PKTNS_ID_APPLICATION, &(*pconn)->rst, - &(*pconn)->cc, settings->initial_pkt_num, &(*pconn)->log, - &(*pconn)->qlog, &(*pconn)->rtb_entry_objalloc, - &(*pconn)->frc_objalloc, mem); - if (rv != 0) { - goto fail_pktns_init; - } + pktns_init(&(*pconn)->pktns, NGTCP2_PKTNS_ID_APPLICATION, &(*pconn)->rst, + &(*pconn)->cc, settings->initial_pkt_num, &(*pconn)->log, + &(*pconn)->qlog, &(*pconn)->rtb_entry_objalloc, + &(*pconn)->frc_objalloc, mem); scident = ngtcp2_mem_malloc(mem, sizeof(*scident)); if (scident == NULL) { @@ -1249,14 +1287,6 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, scident = NULL; - ngtcp2_dcid_init(&(*pconn)->dcid.current, 0, dcid, NULL); - ngtcp2_dcid_set_path(&(*pconn)->dcid.current, path); - - rv = ngtcp2_gaptr_push(&(*pconn)->dcid.seqgap, 0, 1); - if (rv != 0) { - goto fail_seqgap_push; - } - if (settings->preferred_versionslen) { if (!server && !ngtcp2_is_reserved_version(client_chosen_version)) { for (i = 0; i < settings->preferred_versionslen; ++i) { @@ -1268,12 +1298,9 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, assert(i < settings->preferred_versionslen); } - preferred_versions = ngtcp2_mem_malloc( - mem, sizeof(uint32_t) * settings->preferred_versionslen); - if (preferred_versions == NULL) { - rv = NGTCP2_ERR_NOMEM; - goto fail_preferred_versions; - } + preferred_versions = buf_align(buf); + buf = buf_advance(preferred_versions, sizeof(preferred_versions[0]) * + settings->preferred_versionslen); for (i = 0; i < settings->preferred_versionslen; ++i) { assert(ngtcp2_is_supported_version(settings->preferred_versions[i])); @@ -1304,39 +1331,33 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, ngtcp2_is_supported_version(settings->available_versions[i])); } - rv = available_versions_new(&buf, settings->available_versions, - settings->available_versionslen, mem); - if (rv != 0) { - goto fail_available_versions; - } - - (*pconn)->vneg.available_versions = buf; + (*pconn)->vneg.available_versions = buf_align(buf); (*pconn)->vneg.available_versionslen = - sizeof(uint32_t) * settings->available_versionslen; + sizeof(uint32_t) * settings->available_versionslen; + + buf = available_versions_init((*pconn)->vneg.available_versions, + settings->available_versions, + settings->available_versionslen); } else if (server) { if (settings->preferred_versionslen) { - rv = available_versions_new(&buf, settings->preferred_versions, - settings->preferred_versionslen, mem); - if (rv != 0) { - goto fail_available_versions; - } - - (*pconn)->vneg.available_versions = buf; + (*pconn)->vneg.available_versions = buf_align(buf); (*pconn)->vneg.available_versionslen = - sizeof(uint32_t) * settings->preferred_versionslen; + sizeof(uint32_t) * settings->preferred_versionslen; + + buf = available_versions_init((*pconn)->vneg.available_versions, + settings->preferred_versions, + settings->preferred_versionslen); } else { (*pconn)->vneg.available_versions = server_default_available_versions; (*pconn)->vneg.available_versionslen = - sizeof(server_default_available_versions); + sizeof(server_default_available_versions); } - } else if (!server && !ngtcp2_is_reserved_version(client_chosen_version)) { - rv = available_versions_new(&buf, &client_chosen_version, 1, mem); - if (rv != 0) { - goto fail_available_versions; - } - - (*pconn)->vneg.available_versions = buf; + } else if (!ngtcp2_is_reserved_version(client_chosen_version)) { + (*pconn)->vneg.available_versions = buf_align(buf); (*pconn)->vneg.available_versionslen = sizeof(uint32_t); + + buf = available_versions_init((*pconn)->vneg.available_versions, + &client_chosen_version, 1); } (*pconn)->local.settings.available_versions = NULL; @@ -1367,54 +1388,39 @@ static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, conn_reset_ecn_validation_state(*pconn); - ngtcp2_qlog_start( - &(*pconn)->qlog, - server ? ((*pconn)->local.transport_params.retry_scid_present - ? &(*pconn)->local.transport_params.retry_scid - : &(*pconn)->local.transport_params.original_dcid) - : dcid, - server); + ngtcp2_qlog_start(&(*pconn)->qlog, + server + ? ((*pconn)->local.transport_params.retry_scid_present + ? &(*pconn)->local.transport_params.retry_scid + : &(*pconn)->local.transport_params.original_dcid) + : dcid, + server); return 0; -fail_available_versions: - ngtcp2_mem_free(mem, (*pconn)->vneg.preferred_versions); -fail_preferred_versions: -fail_seqgap_push: fail_scid_set_insert: ngtcp2_mem_free(mem, scident); fail_scident: - pktns_free(&(*pconn)->pktns, mem); -fail_pktns_init: pktns_del((*pconn)->hs_pktns, mem); fail_hs_pktns_init: pktns_del((*pconn)->in_pktns, mem); fail_in_pktns_init: + ngtcp2_gaptr_free(&(*pconn)->dcid.seqgap); +fail_seqgap_push: ngtcp2_mem_free(mem, (uint8_t *)(*pconn)->local.settings.token); fail_token: - ngtcp2_mem_free(mem, (*pconn)->qlog.buf.begin); -fail_qlog_buf: - ngtcp2_idtr_free(&(*pconn)->remote.uni.idtr); - ngtcp2_idtr_free(&(*pconn)->remote.bidi.idtr); - ngtcp2_map_free(&(*pconn)->strms); - delete_scid(&(*pconn)->scid.set, mem); - ngtcp2_ksl_free(&(*pconn)->scid.set); - ngtcp2_gaptr_free(&(*pconn)->dcid.seqgap); - ngtcp2_objalloc_free(&(*pconn)->strm_objalloc); - ngtcp2_objalloc_free(&(*pconn)->rtb_entry_objalloc); - ngtcp2_objalloc_free(&(*pconn)->frc_objalloc); ngtcp2_mem_free(mem, *pconn); -fail_conn: + return rv; } int ngtcp2_conn_client_new_versioned( - ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, - const ngtcp2_path *path, uint32_t client_chosen_version, - int callbacks_version, const ngtcp2_callbacks *callbacks, - int settings_version, const ngtcp2_settings *settings, - int transport_params_version, const ngtcp2_transport_params *params, - const ngtcp2_mem *mem, void *user_data) { + ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + const ngtcp2_path *path, uint32_t client_chosen_version, + int callbacks_version, const ngtcp2_callbacks *callbacks, + int settings_version, const ngtcp2_settings *settings, + int transport_params_version, const ngtcp2_transport_params *params, + const ngtcp2_mem *mem, void *user_data) { int rv; rv = conn_new(pconn, dcid, scid, path, client_chosen_version, @@ -1438,12 +1444,12 @@ int ngtcp2_conn_client_new_versioned( } int ngtcp2_conn_server_new_versioned( - ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, - const ngtcp2_path *path, uint32_t client_chosen_version, - int callbacks_version, const ngtcp2_callbacks *callbacks, - int settings_version, const ngtcp2_settings *settings, - int transport_params_version, const ngtcp2_transport_params *params, - const ngtcp2_mem *mem, void *user_data) { + ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + const ngtcp2_path *path, uint32_t client_chosen_version, + int callbacks_version, const ngtcp2_callbacks *callbacks, + int settings_version, const ngtcp2_settings *settings, + int transport_params_version, const ngtcp2_transport_params *params, + const ngtcp2_mem *mem, void *user_data) { int rv; rv = conn_new(pconn, dcid, scid, path, client_chosen_version, @@ -1471,8 +1477,8 @@ int ngtcp2_conn_server_new_versioned( * credits are considered. */ static uint64_t conn_fc_credits(ngtcp2_conn *conn, ngtcp2_strm *strm) { - return ngtcp2_min(strm->tx.max_offset - strm->tx.offset, - conn->tx.max_offset - conn->tx.offset); + return ngtcp2_min_uint64(strm->tx.max_offset - strm->tx.offset, + conn->tx.max_offset - conn->tx.offset); } /* @@ -1483,7 +1489,7 @@ static uint64_t conn_fc_credits(ngtcp2_conn *conn, ngtcp2_strm *strm) { static uint64_t conn_enforce_flow_control(ngtcp2_conn *conn, ngtcp2_strm *strm, uint64_t len) { uint64_t fc_credits = conn_fc_credits(conn, strm); - return ngtcp2_min(len, fc_credits); + return ngtcp2_min_uint64(len, fc_credits); } static int delete_strms_each(void *data, void *ptr) { @@ -1525,15 +1531,15 @@ void ngtcp2_conn_del(ngtcp2_conn *conn) { if (conn->crypto.key_update.old_rx_ckm) { conn_call_delete_crypto_aead_ctx( - conn, &conn->crypto.key_update.old_rx_ckm->aead_ctx); + conn, &conn->crypto.key_update.old_rx_ckm->aead_ctx); } if (conn->crypto.key_update.new_rx_ckm) { conn_call_delete_crypto_aead_ctx( - conn, &conn->crypto.key_update.new_rx_ckm->aead_ctx); + conn, &conn->crypto.key_update.new_rx_ckm->aead_ctx); } if (conn->crypto.key_update.new_tx_ckm) { conn_call_delete_crypto_aead_ctx( - conn, &conn->crypto.key_update.new_tx_ckm->aead_ctx); + conn, &conn->crypto.key_update.new_tx_ckm->aead_ctx); } if (conn->pktns.crypto.rx.ckm) { @@ -1551,26 +1557,26 @@ void ngtcp2_conn_del(ngtcp2_conn *conn) { if (conn->hs_pktns) { if (conn->hs_pktns->crypto.rx.ckm) { conn_call_delete_crypto_aead_ctx( - conn, &conn->hs_pktns->crypto.rx.ckm->aead_ctx); + conn, &conn->hs_pktns->crypto.rx.ckm->aead_ctx); } conn_call_delete_crypto_cipher_ctx(conn, &conn->hs_pktns->crypto.rx.hp_ctx); if (conn->hs_pktns->crypto.tx.ckm) { conn_call_delete_crypto_aead_ctx( - conn, &conn->hs_pktns->crypto.tx.ckm->aead_ctx); + conn, &conn->hs_pktns->crypto.tx.ckm->aead_ctx); } conn_call_delete_crypto_cipher_ctx(conn, &conn->hs_pktns->crypto.tx.hp_ctx); } if (conn->in_pktns) { if (conn->in_pktns->crypto.rx.ckm) { conn_call_delete_crypto_aead_ctx( - conn, &conn->in_pktns->crypto.rx.ckm->aead_ctx); + conn, &conn->in_pktns->crypto.rx.ckm->aead_ctx); } conn_call_delete_crypto_cipher_ctx(conn, &conn->in_pktns->crypto.rx.hp_ctx); if (conn->in_pktns->crypto.tx.ckm) { conn_call_delete_crypto_aead_ctx( - conn, &conn->in_pktns->crypto.tx.ckm->aead_ctx); + conn, &conn->in_pktns->crypto.tx.ckm->aead_ctx); } conn_call_delete_crypto_cipher_ctx(conn, &conn->in_pktns->crypto.tx.hp_ctx); } @@ -1582,11 +1588,6 @@ void ngtcp2_conn_del(ngtcp2_conn *conn) { conn_vneg_crypto_free(conn); - ngtcp2_mem_free(conn->mem, conn->vneg.preferred_versions); - if (conn->vneg.available_versions != server_default_available_versions) { - ngtcp2_mem_free(conn->mem, conn->vneg.available_versions); - } - ngtcp2_mem_free(conn->mem, conn->crypto.decrypt_buf.base); ngtcp2_mem_free(conn->mem, conn->crypto.decrypt_hp_buf.base); ngtcp2_mem_free(conn->mem, (uint8_t *)conn->local.settings.token); @@ -1600,8 +1601,6 @@ void ngtcp2_conn_del(ngtcp2_conn *conn) { pktns_del(conn->hs_pktns, conn->mem); pktns_del(conn->in_pktns, conn->mem); - ngtcp2_mem_free(conn->mem, conn->qlog.buf.begin); - ngtcp2_pmtud_del(conn->pmtud); ngtcp2_pv_del(conn->pv); @@ -1611,7 +1610,7 @@ void ngtcp2_conn_del(ngtcp2_conn *conn) { ngtcp2_idtr_free(&conn->remote.bidi.idtr); ngtcp2_mem_free(conn->mem, conn->tx.ack); ngtcp2_pq_free(&conn->tx.strmq); - ngtcp2_map_each_free(&conn->strms, delete_strms_each, (void *)conn); + ngtcp2_map_each(&conn->strms, delete_strms_each, (void *)conn); ngtcp2_map_free(&conn->strms); ngtcp2_pq_free(&conn->scid.used); @@ -1665,8 +1664,9 @@ static int conn_ensure_ack_ranges(ngtcp2_conn *conn, size_t n) { * ACK. */ static ngtcp2_duration conn_compute_ack_delay(ngtcp2_conn *conn) { - return ngtcp2_min(conn->local.transport_params.max_ack_delay, - conn->cstat.smoothed_rtt / 8); + return ngtcp2_min_uint64( + conn->local.transport_params.max_ack_delay, + ngtcp2_max_uint64(conn->cstat.smoothed_rtt / 8, NGTCP2_NANOSECONDS)); } int ngtcp2_conn_create_ack_frame(ngtcp2_conn *conn, ngtcp2_frame **pfr, @@ -1702,8 +1702,8 @@ int ngtcp2_conn_create_ack_frame(ngtcp2_conn *conn, ngtcp2_frame **pfr, if (conn->tx.ack == NULL) { conn->tx.ack = ngtcp2_mem_malloc( - conn->mem, - sizeof(ngtcp2_ack) + sizeof(ngtcp2_ack_range) * initial_max_ack_ranges); + conn->mem, + sizeof(ngtcp2_ack) + sizeof(ngtcp2_ack_range) * initial_max_ack_ranges); if (conn->tx.ack == NULL) { return NGTCP2_ERR_NOMEM; } @@ -1844,8 +1844,9 @@ static int conn_ppe_write_frame(ngtcp2_conn *conn, ngtcp2_ppe *ppe, * NGTCP2_ERR_NOMEM * Out of memory */ -static int conn_on_pkt_sent(ngtcp2_conn *conn, ngtcp2_rtb *rtb, +static int conn_on_pkt_sent(ngtcp2_conn *conn, ngtcp2_pktns *pktns, ngtcp2_rtb_entry *ent) { + ngtcp2_rtb *rtb = &pktns->rtb; int rv; /* This function implements OnPacketSent, but it handles only @@ -1856,7 +1857,7 @@ static int conn_on_pkt_sent(ngtcp2_conn *conn, ngtcp2_rtb *rtb, } if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { - conn->cstat.last_tx_pkt_ts[rtb->pktns_id] = ent->ts; + conn->cstat.last_tx_pkt_ts[pktns->id] = ent->ts; } ngtcp2_conn_set_loss_detection_timer(conn, ent->ts); @@ -1898,8 +1899,8 @@ static size_t pktns_select_pkt_numlen(ngtcp2_pktns *pktns) { */ static uint64_t conn_get_cwnd(ngtcp2_conn *conn) { return conn->pv && (conn->pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) - ? ngtcp2_cc_compute_initcwnd(conn->cstat.max_tx_udp_payload_size) - : conn->cstat.cwnd; + ? ngtcp2_cc_compute_initcwnd(conn->cstat.max_tx_udp_payload_size) + : conn->cstat.cwnd; } /* @@ -1947,7 +1948,7 @@ static uint64_t conn_retry_early_payloadlen(ngtcp2_conn *conn) { /* Take the min because in conn_should_pad_pkt we take max in order to deal with unbreakable DATAGRAM. */ - return ngtcp2_min(len, NGTCP2_MIN_COALESCED_PAYLOADLEN); + return ngtcp2_min_uint64(len, NGTCP2_MIN_COALESCED_PAYLOADLEN); } return 0; @@ -2021,12 +2022,13 @@ static int conn_should_pad_pkt(ngtcp2_conn *conn, uint8_t type, size_t left, return 0; } - if (conn->hs_pktns->crypto.tx.ckm && - (conn->hs_pktns->rtb.probe_pkt_left || - !ngtcp2_strm_streamfrq_empty(&conn->hs_pktns->crypto.strm) || - !ngtcp2_acktr_empty(&conn->hs_pktns->acktr))) { - /* If we have something to send in Handshake packet, then add - PADDING in Handshake packet. */ + if ((conn->hs_pktns->crypto.tx.ckm && + (conn->hs_pktns->rtb.probe_pkt_left || + !ngtcp2_strm_streamfrq_empty(&conn->hs_pktns->crypto.strm) || + !ngtcp2_acktr_empty(&conn->hs_pktns->acktr))) || + conn->pktns.crypto.tx.ckm) { + /* If we have something to send in Handshake or 1RTT packet, + then add PADDING in that packet. */ min_payloadlen = NGTCP2_MIN_COALESCED_PAYLOADLEN; } else { return 1; @@ -2044,7 +2046,7 @@ static int conn_should_pad_pkt(ngtcp2_conn *conn, uint8_t type, size_t left, PADDING in that packet. Take maximum in case that write_datalen includes DATAGRAM which cannot be split. */ min_payloadlen = - ngtcp2_max(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN); + ngtcp2_max_uint64(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN); } else { return 1; } @@ -2067,8 +2069,8 @@ static int conn_should_pad_pkt(ngtcp2_conn *conn, uint8_t type, size_t left, return left < /* TODO Assuming that pkt_num is encoded in 1 byte. */ NGTCP2_MIN_LONG_HEADERLEN + conn->dcid.current.cid.datalen + - conn->oscid.datalen + NGTCP2_PKT_LENGTHLEN - 1 + min_payloadlen + - NGTCP2_MAX_AEAD_OVERHEAD; + conn->oscid.datalen + NGTCP2_PKT_LENGTHLEN - 1 + min_payloadlen + + NGTCP2_MAX_AEAD_OVERHEAD; } static void conn_restart_timer_on_write(ngtcp2_conn *conn, ngtcp2_tstamp ts) { @@ -2189,15 +2191,18 @@ static uint8_t conn_pkt_flags_long(ngtcp2_conn *conn) { static uint8_t conn_pkt_flags_short(ngtcp2_conn *conn) { return (uint8_t)(conn_pkt_flags(conn) | ((conn->pktns.crypto.tx.ckm->flags & NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE) - ? NGTCP2_PKT_FLAG_KEY_PHASE - : NGTCP2_PKT_FLAG_NONE)); + ? NGTCP2_PKT_FLAG_KEY_PHASE + : NGTCP2_PKT_FLAG_NONE)); } +static size_t conn_min_pktlen(ngtcp2_conn *conn); + /* * conn_write_handshake_pkt writes handshake packet in the buffer - * pointed by |dest| whose length is |destlen|. |type| specifies long - * packet type. It should be either NGTCP2_PKT_INITIAL or - * NGTCP2_PKT_HANDSHAKE_PKT. + * pointed by |dest| whose length is |destlen|. |dgram_offset| is the + * offset in UDP datagram payload where this QUIC packet is positioned + * at. |type| specifies long packet type. It should be either + * NGTCP2_PKT_INITIAL or NGTCP2_PKT_HANDSHAKE_PKT. * * |write_datalen| is the minimum length of application data ready to * send in subsequent 0RTT packet. @@ -2212,8 +2217,9 @@ static uint8_t conn_pkt_flags_short(ngtcp2_conn *conn) { */ static ngtcp2_ssize conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, - size_t destlen, uint8_t type, uint8_t flags, - uint64_t write_datalen, ngtcp2_tstamp ts) { + size_t destlen, size_t dgram_offset, uint8_t type, + uint8_t flags, uint64_t write_datalen, + ngtcp2_tstamp ts) { int rv; ngtcp2_ppe ppe; ngtcp2_pkt_hd hd; @@ -2228,6 +2234,7 @@ conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, uint16_t rtb_entry_flags = NGTCP2_RTB_ENTRY_FLAG_NONE; int require_padding = (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) != 0; int pkt_empty = 1; + int min_padded = 0; int padded = 0; int hd_logged = 0; uint64_t crypto_offset; @@ -2271,10 +2278,9 @@ conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, cc.encrypt = conn->callbacks.encrypt; cc.hp_mask = conn->callbacks.hp_mask; - ngtcp2_pkt_hd_init(&hd, conn_pkt_flags_long(conn), type, - &conn->dcid.current.cid, &conn->oscid, - pktns->tx.last_pkt_num + 1, pktns_select_pkt_numlen(pktns), - version, 0); + ngtcp2_pkt_hd_init( + &hd, conn_pkt_flags_long(conn), type, &conn->dcid.current.cid, &conn->oscid, + pktns->tx.last_pkt_num + 1, pktns_select_pkt_numlen(pktns), version, 0); if (!conn->server && type == NGTCP2_PKT_INITIAL && conn->local.settings.tokenlen) { @@ -2282,7 +2288,7 @@ conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, hd.tokenlen = conn->local.settings.tokenlen; } - ngtcp2_ppe_init(&ppe, dest, destlen, &cc); + ngtcp2_ppe_init(&ppe, dest, destlen, dgram_offset, &cc); rv = ngtcp2_ppe_encode_hd(&ppe, &hd); if (rv != 0) { @@ -2395,7 +2401,7 @@ conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, assert(rv == NGTCP2_ERR_NOBUF); } else { rtb_entry_flags |= - NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_PROBE; + NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_PROBE; pkt_empty = 0; } } @@ -2412,6 +2418,7 @@ conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, } else { rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING; pktns->tx.non_ack_pkt_start_ts = UINT64_MAX; + pkt_empty = 0; } } else if (pktns->tx.non_ack_pkt_start_ts == UINT64_MAX) { pktns->tx.non_ack_pkt_start_ts = ts; @@ -2429,20 +2436,24 @@ conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, /* If we cannot write another packet, then we need to add padding to Initial here. */ if (conn_should_pad_pkt( - conn, type, ngtcp2_ppe_left(&ppe), write_datalen, - (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) != 0, - require_padding)) { + conn, type, ngtcp2_ppe_left(&ppe), write_datalen, + (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) != 0, + require_padding)) { lfr.type = NGTCP2_FRAME_PADDING; - lfr.padding.len = ngtcp2_ppe_padding(&ppe); + lfr.padding.len = ngtcp2_ppe_dgram_padding(&ppe); } else if (pkt_empty) { return 0; } else { lfr.type = NGTCP2_FRAME_PADDING; - lfr.padding.len = ngtcp2_ppe_padding_hp_sample(&ppe); + lfr.padding.len = ngtcp2_ppe_padding_size(&ppe, conn_min_pktlen(conn)); + min_padded = 1; } if (lfr.padding.len) { - padded = 1; + if (!min_padded || + (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) { + padded = 1; + } ngtcp2_log_tx_fr(&conn->log, &hd, &lfr); ngtcp2_qlog_write_frame(&conn->qlog, &lfr); } @@ -2461,16 +2472,16 @@ conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, conn_handle_tx_ecn(conn, pi, &rtb_entry_flags, pktns, &hd, ts); } - rv = ngtcp2_rtb_entry_objalloc_new(&rtbent, &hd, frq, ts, (size_t)spktlen, - rtb_entry_flags, - &conn->rtb_entry_objalloc); + rv = + ngtcp2_rtb_entry_objalloc_new(&rtbent, &hd, frq, ts, (size_t)spktlen, + rtb_entry_flags, &conn->rtb_entry_objalloc); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc, conn->mem); return rv; } - rv = conn_on_pkt_sent(conn, &pktns->rtb, rtbent); + rv = conn_on_pkt_sent(conn, pktns, rtbent); if (rv != 0) { ngtcp2_rtb_entry_objalloc_del(rtbent, &conn->rtb_entry_objalloc, &conn->frc_objalloc, conn->mem); @@ -2565,8 +2576,8 @@ static ngtcp2_ssize conn_write_ack_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, } spktlen = ngtcp2_conn_write_single_frame_pkt( - conn, pi, dest, destlen, type, NGTCP2_WRITE_PKT_FLAG_NONE, - &conn->dcid.current.cid, ackfr, NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + conn, pi, dest, destlen, type, NGTCP2_WRITE_PKT_FLAG_NONE, + &conn->dcid.current.cid, ackfr, NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); if (spktlen <= 0) { return spktlen; @@ -2588,8 +2599,8 @@ static void conn_discard_pktns(ngtcp2_conn *conn, ngtcp2_pktns **ppktns, conn->cstat.bytes_in_flight -= bytes_in_flight; conn->cstat.pto_count = 0; - conn->cstat.last_tx_pkt_ts[pktns->rtb.pktns_id] = UINT64_MAX; - conn->cstat.loss_time[pktns->rtb.pktns_id] = UINT64_MAX; + conn->cstat.last_tx_pkt_ts[pktns->id] = UINT64_MAX; + conn->cstat.loss_time[pktns->id] = UINT64_MAX; conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.rx.ckm->aead_ctx); conn_call_delete_crypto_cipher_ctx(conn, &pktns->crypto.rx.hp_ctx); @@ -2602,11 +2613,7 @@ static void conn_discard_pktns(ngtcp2_conn *conn, ngtcp2_pktns **ppktns, ngtcp2_conn_set_loss_detection_timer(conn, ts); } -/* - * conn_discard_initial_state discards state for Initial packet number - * space. - */ -static void conn_discard_initial_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) { +void ngtcp2_conn_discard_initial_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) { if (!conn->in_pktns) { return; } @@ -2622,11 +2629,7 @@ static void conn_discard_initial_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) { memset(&conn->vneg.tx, 0, sizeof(conn->vneg.tx)); } -/* - * conn_discard_handshake_state discards state for Handshake packet - * number space. - */ -static void conn_discard_handshake_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) { +void ngtcp2_conn_discard_handshake_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) { if (!conn->hs_pktns) { return; } @@ -2672,7 +2675,7 @@ static ngtcp2_ssize conn_write_handshake_ack_pkts(ngtcp2_conn *conn, to send ACK is give server RTT measurement early. */ if (conn->server && conn->in_pktns) { nwrite = - conn_write_ack_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, ts); + conn_write_ack_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, ts); if (nwrite < 0) { assert(nwrite != NGTCP2_ERR_NOBUF); return nwrite; @@ -2685,7 +2688,7 @@ static ngtcp2_ssize conn_write_handshake_ack_pkts(ngtcp2_conn *conn, if (conn->hs_pktns->crypto.tx.ckm) { nwrite = - conn_write_ack_pkt(conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, ts); + conn_write_ack_pkt(conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, ts); if (nwrite < 0) { assert(nwrite != NGTCP2_ERR_NOBUF); return nwrite; @@ -2694,7 +2697,7 @@ static ngtcp2_ssize conn_write_handshake_ack_pkts(ngtcp2_conn *conn, res += nwrite; if (!conn->server && nwrite) { - conn_discard_initial_state(conn, ts); + ngtcp2_conn_discard_initial_state(conn, ts); } } @@ -2725,9 +2728,9 @@ static ngtcp2_ssize conn_write_client_initial(ngtcp2_conn *conn, return rv; } - return conn_write_handshake_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, - NGTCP2_WRITE_PKT_FLAG_NONE, early_datalen, - ts); + return conn_write_handshake_pkt( + conn, pi, dest, destlen, 0, NGTCP2_PKT_INITIAL, NGTCP2_WRITE_PKT_FLAG_NONE, + early_datalen, ts); } /* @@ -2807,11 +2810,11 @@ static ngtcp2_ssize conn_write_handshake_pkts(ngtcp2_conn *conn, conn->hs_pktns->rtb.probe_pkt_left)) { /* Discard Initial state here so that Handshake packet is not padded. */ - conn_discard_initial_state(conn, ts); + ngtcp2_conn_discard_initial_state(conn, ts); } else if (conn->in_pktns) { nwrite = - conn_write_handshake_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, - NGTCP2_WRITE_PKT_FLAG_NONE, write_datalen, ts); + conn_write_handshake_pkt(conn, pi, dest, destlen, 0, NGTCP2_PKT_INITIAL, + NGTCP2_WRITE_PKT_FLAG_NONE, write_datalen, ts); if (nwrite < 0) { assert(nwrite != NGTCP2_ERR_NOBUF); return nwrite; @@ -2823,11 +2826,11 @@ static ngtcp2_ssize conn_write_handshake_pkts(ngtcp2_conn *conn, !ngtcp2_strm_streamfrq_empty(&conn->in_pktns->crypto.strm))) { if (cstat->loss_detection_timer != UINT64_MAX && conn_server_tx_left(conn, &conn->dcid.current) < - NGTCP2_MAX_UDP_PAYLOAD_SIZE) { + NGTCP2_MAX_UDP_PAYLOAD_SIZE) { ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_LDC, - "loss detection timer canceled due to amplification limit"); - cstat->loss_detection_timer = UINT64_MAX; + &conn->log, NGTCP2_LOG_EVENT_LDC, + "loss detection timer canceled due to amplification limit"); + ngtcp2_conn_cancel_loss_detection_timer(conn); } return 0; @@ -2837,10 +2840,10 @@ static ngtcp2_ssize conn_write_handshake_pkts(ngtcp2_conn *conn, dest += nwrite; destlen -= (size_t)nwrite; - if (destlen) { - /* We might have already added padding to Initial, but in that - case, we should have destlen == 0 and no Handshake packet - will be written. */ + /* If initial packet size is at least + NGTCP2_MAX_UDP_PAYLOAD_SIZE, no extra padding is needed in a + subsequent packet. */ + if (nwrite < NGTCP2_MAX_UDP_PAYLOAD_SIZE) { if (conn->server) { it = ngtcp2_rtb_head(&conn->in_pktns->rtb); if (!ngtcp2_ksl_it_end(&it)) { @@ -2856,8 +2859,9 @@ static ngtcp2_ssize conn_write_handshake_pkts(ngtcp2_conn *conn, } } - nwrite = conn_write_handshake_pkt( - conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, wflags, write_datalen, ts); + nwrite = + conn_write_handshake_pkt(conn, pi, dest, destlen, (size_t)res, + NGTCP2_PKT_HANDSHAKE, wflags, write_datalen, ts); if (nwrite < 0) { assert(nwrite != NGTCP2_ERR_NOBUF); return nwrite; @@ -2869,7 +2873,7 @@ static ngtcp2_ssize conn_write_handshake_pkts(ngtcp2_conn *conn, /* We don't need to send further Initial packet if we have Handshake key and sent something with it. So discard initial state here. */ - conn_discard_initial_state(conn, ts); + ngtcp2_conn_discard_initial_state(conn, ts); } return res; @@ -2905,7 +2909,7 @@ static int conn_should_send_max_stream_data(ngtcp2_conn *conn, uint64_t inc = strm->rx.unsent_max_offset - strm->rx.max_offset; (void)conn; - return strm->rx.window < 2 * inc; + return strm->rx.window < 4 * inc; } /* @@ -2915,7 +2919,7 @@ static int conn_should_send_max_stream_data(ngtcp2_conn *conn, static int conn_should_send_max_data(ngtcp2_conn *conn) { uint64_t inc = conn->rx.unsent_max_offset - conn->rx.max_offset; - return conn->rx.window < 2 * inc; + return conn->rx.window < 4 * inc; } /* @@ -2948,9 +2952,9 @@ static size_t conn_required_num_new_connection_id(ngtcp2_conn *conn) { n = conn->remote.transport_params->active_connection_id_limit + conn->scid.num_retired; - n = ngtcp2_min(NGTCP2_MAX_SCID_POOL_SIZE, n) - len; + n = ngtcp2_min_uint64(NGTCP2_MAX_SCID_POOL_SIZE, n) - len; - return (size_t)ngtcp2_min(lim, n); + return (size_t)ngtcp2_min_uint64(lim, n); } /* @@ -3093,11 +3097,12 @@ static int conn_remove_retired_connection_id(ngtcp2_conn *conn, } /* - * conn_min_short_pktlen returns the minimum length of Short packet - * this endpoint sends. + * conn_min_pktlen returns the minimum length of packet this endpoint + * sends. It may underestimate the length because this does not take + * into account header protection sample. */ -static size_t conn_min_short_pktlen(ngtcp2_conn *conn) { - return conn->dcid.current.cid.datalen + NGTCP2_MIN_PKT_EXPANDLEN; +static size_t conn_min_pktlen(ngtcp2_conn *conn) { + return conn->oscid.datalen + NGTCP2_MIN_PKT_EXPANDLEN; } /* @@ -3155,8 +3160,10 @@ static void conn_reset_ppe_pending(ngtcp2_conn *conn) { /* * conn_write_pkt writes a protected packet in the buffer pointed by - * |dest| whose length if |destlen|. |type| specifies the type of - * packet. It can be NGTCP2_PKT_1RTT or NGTCP2_PKT_0RTT. + * |dest| whose length if |destlen|. |dgram_offset| is the offset in + * UDP datagram payload where this QUIC packet is positioned at. + * |type| specifies the type of packet. It can be NGTCP2_PKT_1RTT or + * NGTCP2_PKT_0RTT. * * This function can send new stream data. In order to send stream * data, specify the underlying stream and parameters to @@ -3183,8 +3190,9 @@ static void conn_reset_ppe_pending(ngtcp2_conn *conn) { */ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, - ngtcp2_vmsg *vmsg, uint8_t type, - uint8_t flags, ngtcp2_tstamp ts) { + size_t dgram_offset, ngtcp2_vmsg *vmsg, + uint8_t type, uint8_t flags, + ngtcp2_tstamp ts) { int rv = 0; ngtcp2_crypto_cc *cc = &conn->pkt.cc; ngtcp2_ppe *ppe = &conn->pkt.ppe; @@ -3211,7 +3219,8 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, int require_padding = (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) != 0; int write_more = (flags & NGTCP2_WRITE_PKT_FLAG_MORE) != 0; int ppe_pending = (conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING) != 0; - size_t min_pktlen = conn_min_short_pktlen(conn); + size_t min_pktlen = conn_min_pktlen(conn); + int min_padded = 0; int padded = 0; ngtcp2_cc_pkt cc_pkt; uint64_t crypto_offset; @@ -3311,7 +3320,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, if (conn->local.settings.max_window && conn->tx.last_max_data_ts != UINT64_MAX && ts - conn->tx.last_max_data_ts < - NGTCP2_FLOW_WINDOW_RTT_FACTOR * cstat->smoothed_rtt && + NGTCP2_FLOW_WINDOW_RTT_FACTOR * cstat->smoothed_rtt && conn->local.settings.max_window > conn->rx.window) { target_max_data = NGTCP2_FLOW_WINDOW_SCALING_FACTOR * conn->rx.window; if (target_max_data > conn->local.settings.max_window) { @@ -3336,7 +3345,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, pktns->tx.frq = nfrc; conn->rx.max_offset = conn->rx.unsent_max_offset = - nfrc->fr.max_data.max_data; + nfrc->fr.max_data.max_data; } if (stream_blocked && conn_should_send_max_data(conn)) { @@ -3369,7 +3378,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, pktns->tx.last_pkt_num + 1, pktns_select_pkt_numlen(pktns), version, 0); - ngtcp2_ppe_init(ppe, dest, destlen, cc); + ngtcp2_ppe_init(ppe, dest, destlen, dgram_offset, cc); rv = ngtcp2_ppe_encode_hd(ppe, hd); if (rv != 0) { @@ -3407,8 +3416,8 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, } rv = ngtcp2_conn_create_ack_frame( - conn, &ackfr, pktns, type, ts, conn_compute_ack_delay(conn), - conn->local.transport_params.ack_delay_exponent); + conn, &ackfr, pktns, type, ts, conn_compute_ack_delay(conn), + conn->local.transport_params.ack_delay_exponent); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); return rv; @@ -3422,10 +3431,9 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, ngtcp2_acktr_commit_ack(&pktns->acktr); ngtcp2_acktr_add_ack(&pktns->acktr, hd->pkt_num, ackfr->ack.largest_ack); - if (type == NGTCP2_PKT_1RTT) { - conn_handle_unconfirmed_key_update_from_remote( - conn, ackfr->ack.largest_ack, ts); - } + assert(NGTCP2_PKT_1RTT == type); + conn_handle_unconfirmed_key_update_from_remote( + conn, ackfr->ack.largest_ack, ts); pkt_empty = 0; } } @@ -3443,7 +3451,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, switch ((*pfrc)->fr.type) { case NGTCP2_FRAME_RESET_STREAM: strm = - ngtcp2_conn_find_stream(conn, (*pfrc)->fr.reset_stream.stream_id); + ngtcp2_conn_find_stream(conn, (*pfrc)->fr.reset_stream.stream_id); if (strm == NULL || !ngtcp2_strm_require_retransmit_reset_stream(strm)) { frc = *pfrc; @@ -3454,7 +3462,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, break; case NGTCP2_FRAME_STOP_SENDING: strm = - ngtcp2_conn_find_stream(conn, (*pfrc)->fr.stop_sending.stream_id); + ngtcp2_conn_find_stream(conn, (*pfrc)->fr.stop_sending.stream_id); if (strm == NULL || !ngtcp2_strm_require_retransmit_stop_sending(strm)) { frc = *pfrc; @@ -3484,10 +3492,10 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, } break; case NGTCP2_FRAME_MAX_STREAM_DATA: - strm = ngtcp2_conn_find_stream(conn, - (*pfrc)->fr.max_stream_data.stream_id); + strm = + ngtcp2_conn_find_stream(conn, (*pfrc)->fr.max_stream_data.stream_id); if (strm == NULL || !ngtcp2_strm_require_retransmit_max_stream_data( - strm, &(*pfrc)->fr.max_stream_data)) { + strm, &(*pfrc)->fr.max_stream_data)) { frc = *pfrc; *pfrc = (*pfrc)->next; ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); @@ -3504,9 +3512,9 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, break; case NGTCP2_FRAME_STREAM_DATA_BLOCKED: strm = ngtcp2_conn_find_stream( - conn, (*pfrc)->fr.stream_data_blocked.stream_id); + conn, (*pfrc)->fr.stream_data_blocked.stream_id); if (strm == NULL || !ngtcp2_strm_require_retransmit_stream_data_blocked( - strm, &(*pfrc)->fr.stream_data_blocked)) { + strm, &(*pfrc)->fr.stream_data_blocked)) { frc = *pfrc; *pfrc = (*pfrc)->next; ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); @@ -3543,7 +3551,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, left = ngtcp2_ppe_left(ppe); crypto_offset = - ngtcp2_strm_streamfrq_unacked_offset(&pktns->crypto.strm); + ngtcp2_strm_streamfrq_unacked_offset(&pktns->crypto.strm); if (crypto_offset == (uint64_t)-1) { ngtcp2_strm_streamfrq_clear(&pktns->crypto.strm); break; @@ -3593,14 +3601,14 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, nfrc->fr.type = NGTCP2_FRAME_RESET_STREAM; nfrc->fr.reset_stream.stream_id = strm->stream_id; nfrc->fr.reset_stream.app_error_code = - strm->tx.reset_stream_app_error_code; + strm->tx.reset_stream_app_error_code; nfrc->fr.reset_stream.final_size = strm->tx.offset; *pfrc = nfrc; strm->flags &= ~NGTCP2_STRM_FLAG_SEND_RESET_STREAM; rv = - conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); + conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); if (rv != 0) { assert(NGTCP2_ERR_NOBUF == rv); @@ -3620,8 +3628,8 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, strm->flags &= ~NGTCP2_STRM_FLAG_SEND_STOP_SENDING; } else { rv = conn_call_stream_stop_sending( - conn, strm->stream_id, strm->tx.stop_sending_app_error_code, - strm->stream_user_data); + conn, strm->stream_id, strm->tx.stop_sending_app_error_code, + strm->stream_user_data); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); @@ -3636,13 +3644,13 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, nfrc->fr.type = NGTCP2_FRAME_STOP_SENDING; nfrc->fr.stop_sending.stream_id = strm->stream_id; nfrc->fr.stop_sending.app_error_code = - strm->tx.stop_sending_app_error_code; + strm->tx.stop_sending_app_error_code; *pfrc = nfrc; strm->flags &= ~NGTCP2_STRM_FLAG_SEND_STOP_SENDING; - rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, - &nfrc->fr); + rv = + conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); if (rv != 0) { assert(NGTCP2_ERR_NOBUF == rv); @@ -3672,7 +3680,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, strm->tx.last_blocked_offset = strm->tx.max_offset; rv = - conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); + conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); if (rv != 0) { assert(NGTCP2_ERR_NOBUF == rv); @@ -3698,10 +3706,10 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, if (conn->local.settings.max_stream_window && strm->tx.last_max_stream_data_ts != UINT64_MAX && ts - strm->tx.last_max_stream_data_ts < - NGTCP2_FLOW_WINDOW_RTT_FACTOR * cstat->smoothed_rtt && + NGTCP2_FLOW_WINDOW_RTT_FACTOR * cstat->smoothed_rtt && conn->local.settings.max_stream_window > strm->rx.window) { target_max_data = - NGTCP2_FLOW_WINDOW_SCALING_FACTOR * strm->rx.window; + NGTCP2_FLOW_WINDOW_SCALING_FACTOR * strm->rx.window; if (target_max_data > conn->local.settings.max_stream_window) { target_max_data = conn->local.settings.max_stream_window; } @@ -3721,14 +3729,14 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, nfrc->fr.type = NGTCP2_FRAME_MAX_STREAM_DATA; nfrc->fr.max_stream_data.stream_id = strm->stream_id; nfrc->fr.max_stream_data.max_stream_data = - strm->rx.unsent_max_offset + delta; + strm->rx.unsent_max_offset + delta; *pfrc = nfrc; strm->rx.max_offset = strm->rx.unsent_max_offset = - nfrc->fr.max_stream_data.max_stream_data; + nfrc->fr.max_stream_data.max_stream_data; rv = - conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); + conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); if (rv != 0) { assert(NGTCP2_ERR_NOBUF == rv); break; @@ -3806,7 +3814,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, if (*pfrc == NULL && conn->remote.bidi.unsent_max_streams > conn->remote.bidi.max_streams) { rv = conn_call_extend_max_remote_streams_bidi( - conn, conn->remote.bidi.unsent_max_streams); + conn, conn->remote.bidi.unsent_max_streams); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); return rv; @@ -3838,7 +3846,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, if (*pfrc == NULL && conn->remote.uni.unsent_max_streams > conn->remote.uni.max_streams) { rv = conn_call_extend_max_remote_streams_uni( - conn, conn->remote.uni.unsent_max_streams); + conn, conn->remote.uni.unsent_max_streams); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); return rv; @@ -3901,8 +3909,8 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, if (*pfrc == NULL && send_stream && (ndatalen = ngtcp2_pkt_stream_max_datalen( - vmsg->stream.strm->stream_id, vmsg->stream.strm->tx.offset, ndatalen, - left)) != (size_t)-1 && + vmsg->stream.strm->stream_id, vmsg->stream.strm->tx.offset, ndatalen, + left)) != (size_t)-1 && (ndatalen || datalen == 0)) { datacnt = ngtcp2_vec_copy_at_most(data, NGTCP2_MAX_STREAM_DATACNT, vmsg->stream.data, vmsg->stream.datacnt, @@ -3912,7 +3920,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, assert((datacnt == 0 && datalen == 0) || (datacnt && datalen)); rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( - &nfrc, datacnt, &conn->frc_objalloc, conn->mem); + &nfrc, datacnt, &conn->frc_objalloc, conn->mem); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); return rv; @@ -3944,6 +3952,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, vmsg->stream.strm->tx.offset += ndatalen; conn->tx.offset += ndatalen; + vmsg->stream.strm->flags |= NGTCP2_STRM_FLAG_ANY_SENT; if (fin) { ngtcp2_strm_shutdown(vmsg->stream.strm, NGTCP2_STRM_FLAG_SHUT_WR); @@ -4095,7 +4104,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, } keep_alive_expired = - type == NGTCP2_PKT_1RTT && conn_keep_alive_expired(conn, ts); + type == NGTCP2_PKT_1RTT && conn_keep_alive_expired(conn, ts); if (conn->pktns.rtb.probe_pkt_left == 0 && !keep_alive_expired && !require_padding) { @@ -4154,8 +4163,11 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING; if (conn->pktns.rtb.probe_pkt_left) { rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_PROBE; + } else { + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING; } pktns->tx.non_ack_pkt_start_ts = UINT64_MAX; + pkt_empty = 0; } } else if (pktns->tx.non_ack_pkt_start_ts == UINT64_MAX) { pktns->tx.non_ack_pkt_start_ts = ts; @@ -4166,19 +4178,19 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, /* TODO Push STREAM frame back to ngtcp2_strm if there is an error before ngtcp2_rtb_entry is safely created and added. */ - if (require_padding || - /* Making full sized packet will help GSO a bit */ - ngtcp2_ppe_left(ppe) < 10) { - lfr.padding.len = ngtcp2_ppe_padding(ppe); - } else if (type == NGTCP2_PKT_1RTT) { - lfr.padding.len = ngtcp2_ppe_padding_size(ppe, min_pktlen); + if (require_padding) { + lfr.padding.len = ngtcp2_ppe_dgram_padding(ppe); } else { - lfr.padding.len = ngtcp2_ppe_padding_hp_sample(ppe); + lfr.padding.len = ngtcp2_ppe_padding_size(ppe, min_pktlen); + min_padded = 1; } if (lfr.padding.len) { + if (!min_padded || + (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) { + padded = 1; + } lfr.type = NGTCP2_FRAME_PADDING; - padded = 1; ngtcp2_log_tx_fr(&conn->log, hd, &lfr); ngtcp2_qlog_write_frame(&conn->qlog, &lfr); } @@ -4200,11 +4212,11 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, conn_handle_tx_ecn(conn, pi, &rtb_entry_flags, pktns, hd, ts); } - rv = ngtcp2_rtb_entry_objalloc_new(&ent, hd, NULL, ts, (size_t)nwrite, - rtb_entry_flags, - &conn->rtb_entry_objalloc); + rv = + ngtcp2_rtb_entry_objalloc_new(&ent, hd, NULL, ts, (size_t)nwrite, + rtb_entry_flags, &conn->rtb_entry_objalloc); if (rv != 0) { - assert(ngtcp2_err_is_fatal((int)nwrite)); + assert(ngtcp2_err_is_fatal(rv)); return rv; } @@ -4220,7 +4232,7 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, ts); } - rv = conn_on_pkt_sent(conn, &pktns->rtb, ent); + rv = conn_on_pkt_sent(conn, pktns, ent); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); ngtcp2_rtb_entry_objalloc_del(ent, &conn->rtb_entry_objalloc, @@ -4231,10 +4243,10 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, if (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { if (conn->cc.on_pkt_sent) { conn->cc.on_pkt_sent( - &conn->cc, &conn->cstat, - ngtcp2_cc_pkt_init(&cc_pkt, hd->pkt_num, (size_t)nwrite, - NGTCP2_PKTNS_ID_APPLICATION, ts, ent->rst.lost, - ent->rst.tx_in_flight, ent->rst.is_app_limited)); + &conn->cc, &conn->cstat, + ngtcp2_cc_pkt_init(&cc_pkt, hd->pkt_num, (size_t)nwrite, + NGTCP2_PKTNS_ID_APPLICATION, ts, ent->rst.lost, + ent->rst.tx_in_flight, ent->rst.is_app_limited)); } if (conn->flags & NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE) { @@ -4269,9 +4281,9 @@ static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, } ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt( - ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, - uint8_t type, uint8_t flags, const ngtcp2_cid *dcid, ngtcp2_frame *fr, - uint16_t rtb_entry_flags, const ngtcp2_path *path, ngtcp2_tstamp ts) { + ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, + uint8_t type, uint8_t flags, const ngtcp2_cid *dcid, ngtcp2_frame *fr, + uint16_t rtb_entry_flags, const ngtcp2_path *path, ngtcp2_tstamp ts) { int rv; ngtcp2_ppe ppe; ngtcp2_pkt_hd hd; @@ -4332,7 +4344,7 @@ ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt( pktns->tx.last_pkt_num + 1, pktns_select_pkt_numlen(pktns), version, 0); - ngtcp2_ppe_init(&ppe, dest, destlen, &cc); + ngtcp2_ppe_init(&ppe, dest, destlen, 0, &cc); rv = ngtcp2_ppe_encode_hd(&ppe, &hd); if (rv != 0) { @@ -4354,25 +4366,22 @@ ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt( } lfr.type = NGTCP2_FRAME_PADDING; - if (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) { - lfr.padding.len = ngtcp2_ppe_padding(&ppe); + if (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING_FULL) { + lfr.padding.len = ngtcp2_ppe_dgram_padding_size(&ppe, destlen); + } else if (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) { + lfr.padding.len = ngtcp2_ppe_dgram_padding(&ppe); } else { switch (fr->type) { case NGTCP2_FRAME_PATH_CHALLENGE: case NGTCP2_FRAME_PATH_RESPONSE: if (!conn->server || destlen >= NGTCP2_MAX_UDP_PAYLOAD_SIZE) { - lfr.padding.len = ngtcp2_ppe_padding(&ppe); + lfr.padding.len = ngtcp2_ppe_dgram_padding(&ppe); } else { lfr.padding.len = 0; } break; default: - if (type == NGTCP2_PKT_1RTT) { - lfr.padding.len = - ngtcp2_ppe_padding_size(&ppe, conn_min_short_pktlen(conn)); - } else { - lfr.padding.len = ngtcp2_ppe_padding_hp_sample(&ppe); - } + lfr.padding.len = ngtcp2_ppe_padding_size(&ppe, conn_min_pktlen(conn)); } } if (lfr.padding.len) { @@ -4402,6 +4411,12 @@ ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt( conn_handle_unconfirmed_key_update_from_remote(conn, fr->ack.largest_ack, ts); } + + if (!(flags & (NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING_FULL | + NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING))) { + padded = 0; + } + break; } @@ -4412,14 +4427,14 @@ ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt( conn_handle_tx_ecn(conn, pi, &rtb_entry_flags, pktns, &hd, ts); } - rv = ngtcp2_rtb_entry_objalloc_new(&rtbent, &hd, NULL, ts, (size_t)nwrite, - rtb_entry_flags, - &conn->rtb_entry_objalloc); + rv = + ngtcp2_rtb_entry_objalloc_new(&rtbent, &hd, NULL, ts, (size_t)nwrite, + rtb_entry_flags, &conn->rtb_entry_objalloc); if (rv != 0) { return rv; } - rv = conn_on_pkt_sent(conn, &pktns->rtb, rtbent); + rv = conn_on_pkt_sent(conn, pktns, rtbent); if (rv != 0) { ngtcp2_rtb_entry_objalloc_del(rtbent, &conn->rtb_entry_objalloc, &conn->frc_objalloc, conn->mem); @@ -4522,6 +4537,10 @@ static int conn_retire_dcid_seq(ngtcp2_conn *conn, uint64_t seq) { ngtcp2_frame_chain *nfrc; int rv; + if (ngtcp2_conn_check_retired_dcid_tracked(conn, seq)) { + return 0; + } + rv = ngtcp2_conn_track_retired_dcid_seq(conn, seq); if (rv != 0) { return rv; @@ -4658,13 +4677,15 @@ static int conn_start_pmtud(ngtcp2_conn *conn) { assert(conn->remote.transport_params->max_udp_payload_size >= NGTCP2_MAX_UDP_PAYLOAD_SIZE); - hard_max_udp_payload_size = (size_t)ngtcp2_min( - conn->remote.transport_params->max_udp_payload_size, - (uint64_t)conn->local.settings.max_tx_udp_payload_size); + hard_max_udp_payload_size = (size_t)ngtcp2_min_uint64( + conn->remote.transport_params->max_udp_payload_size, + (uint64_t)conn->local.settings.max_tx_udp_payload_size); - rv = ngtcp2_pmtud_new(&conn->pmtud, conn->dcid.current.max_udp_payload_size, - hard_max_udp_payload_size, - conn->pktns.tx.last_pkt_num + 1, conn->mem); + rv = + ngtcp2_pmtud_new(&conn->pmtud, conn->dcid.current.max_udp_payload_size, + hard_max_udp_payload_size, conn->pktns.tx.last_pkt_num + 1, + conn->local.settings.pmtud_probes, + conn->local.settings.pmtud_probeslen, conn->mem); if (rv != 0) { return rv; } @@ -4715,12 +4736,11 @@ static ngtcp2_ssize conn_write_pmtud_probe(ngtcp2_conn *conn, lfr.type = NGTCP2_FRAME_PING; nwrite = ngtcp2_conn_write_single_frame_pkt( - conn, pi, dest, probelen, NGTCP2_PKT_1RTT, - NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING, &conn->dcid.current.cid, &lfr, - NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | - NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | - NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE, - NULL, ts); + conn, pi, dest, probelen, NGTCP2_PKT_1RTT, + NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING_FULL, &conn->dcid.current.cid, &lfr, + NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE, + NULL, ts); if (nwrite < 0) { return nwrite; } @@ -4813,19 +4833,19 @@ static size_t conn_shape_udp_payload(ngtcp2_conn *conn, const ngtcp2_dcid *dcid, assert(conn->remote.transport_params->max_udp_payload_size >= NGTCP2_MAX_UDP_PAYLOAD_SIZE); - payloadlen = - (size_t)ngtcp2_min((uint64_t)payloadlen, - conn->remote.transport_params->max_udp_payload_size); + payloadlen = (size_t)ngtcp2_min_uint64( + (uint64_t)payloadlen, + conn->remote.transport_params->max_udp_payload_size); } payloadlen = - ngtcp2_min(payloadlen, conn->local.settings.max_tx_udp_payload_size); + ngtcp2_min_size(payloadlen, conn->local.settings.max_tx_udp_payload_size); if (conn->local.settings.no_tx_udp_payload_size_shaping) { return payloadlen; } - return ngtcp2_min(payloadlen, dcid->max_udp_payload_size); + return ngtcp2_min_size(payloadlen, dcid->max_udp_payload_size); } static void conn_reset_congestion_state(ngtcp2_conn *conn, ngtcp2_tstamp ts); @@ -4920,15 +4940,15 @@ static ngtcp2_ssize conn_write_path_challenge(ngtcp2_conn *conn, initial_pto = conn_compute_initial_pto(conn, &conn->pktns); timeout = conn_compute_pto(conn, &conn->pktns); - timeout = ngtcp2_max(timeout, initial_pto); + timeout = ngtcp2_max_uint64(timeout, initial_pto); expiry = ts + timeout * (1ULL << pv->round); - destlen = ngtcp2_min(destlen, NGTCP2_MAX_UDP_PAYLOAD_SIZE); + destlen = ngtcp2_min_size(destlen, NGTCP2_MAX_UDP_PAYLOAD_SIZE); if (conn->server) { if (!(pv->dcid.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) { tx_left = conn_server_tx_left(conn, &pv->dcid); - destlen = (size_t)ngtcp2_min((uint64_t)destlen, tx_left); + destlen = (size_t)ngtcp2_min_uint64((uint64_t)destlen, tx_left); if (destlen == 0) { return 0; } @@ -4946,10 +4966,10 @@ static ngtcp2_ssize conn_write_path_challenge(ngtcp2_conn *conn, ngtcp2_pv_add_entry(pv, lfr.path_challenge.data, expiry, flags, ts); nwrite = ngtcp2_conn_write_single_frame_pkt( - conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE, - &pv->dcid.cid, &lfr, - NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING, - &pv->dcid.ps.path, ts); + conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE, + &pv->dcid.cid, &lfr, + NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING, + &pv->dcid.ps.path, ts); if (nwrite <= 0) { return nwrite; } @@ -5039,11 +5059,11 @@ static ngtcp2_ssize conn_write_path_response(ngtcp2_conn *conn, } } - destlen = ngtcp2_min(destlen, NGTCP2_MAX_UDP_PAYLOAD_SIZE); + destlen = ngtcp2_min_size(destlen, NGTCP2_MAX_UDP_PAYLOAD_SIZE); if (conn->server && !(dcid->flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) { tx_left = conn_server_tx_left(conn, dcid); - destlen = (size_t)ngtcp2_min((uint64_t)destlen, tx_left); + destlen = (size_t)ngtcp2_min_uint64((uint64_t)destlen, tx_left); if (destlen == 0) { return 0; } @@ -5053,9 +5073,8 @@ static ngtcp2_ssize conn_write_path_response(ngtcp2_conn *conn, memcpy(lfr.path_response.data, pcent->data, sizeof(lfr.path_response.data)); nwrite = ngtcp2_conn_write_single_frame_pkt( - conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE, - &dcid->cid, &lfr, NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING, &pcent->ps.path, - ts); + conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE, + &dcid->cid, &lfr, NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING, &pcent->ps.path, ts); if (nwrite <= 0) { return nwrite; } @@ -5077,10 +5096,10 @@ ngtcp2_ssize ngtcp2_conn_write_pkt_versioned(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_tstamp ts) { return ngtcp2_conn_writev_stream_versioned( - conn, path, pkt_info_version, pi, dest, destlen, - /* pdatalen = */ NULL, NGTCP2_WRITE_STREAM_FLAG_NONE, - /* stream_id = */ -1, - /* datav = */ NULL, /* datavcnt = */ 0, ts); + conn, path, pkt_info_version, pi, dest, destlen, + /* pdatalen = */ NULL, NGTCP2_WRITE_STREAM_FLAG_NONE, + /* stream_id = */ -1, + /* datav = */ NULL, /* datavcnt = */ 0, ts); } /* @@ -5225,8 +5244,8 @@ static int conn_on_retry(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd, retry.odcid = conn->dcid.current.cid; rv = ngtcp2_pkt_verify_retry_tag( - conn->client_chosen_version, &retry, pkt, pktlen, conn->callbacks.encrypt, - &conn->crypto.retry_aead, &conn->crypto.retry_aead_ctx); + conn->client_chosen_version, &retry, pkt, pktlen, conn->callbacks.encrypt, + &conn->crypto.retry_aead, &conn->crypto.retry_aead_ctx); if (rv != 0) { ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, "unable to verify Retry packet integrity"); @@ -5303,15 +5322,14 @@ int ngtcp2_conn_detect_lost_pkt(ngtcp2_conn *conn, ngtcp2_pktns *pktns, * conn_recv_ack processes received ACK frame |fr|. |pkt_ts| is the * timestamp when packet is received. |ts| should be the current * time. Usually they are the same, but for buffered packets, - * |pkt_ts| would be earlier than |ts|. + * |pkt_ts| would be earlier than |ts|. This function needs to be + * called after |fr| is validated by ngtcp2_pkt_validate_ack. * * This function returns 0 if it succeeds, or one of the following * negative error codes: * * NGTCP2_ERR_NOMEM * Out of memory - * NGTCP2_ERR_ACK_FRAME - * ACK frame is malformed. * NGTCP2_ERR_PROTO * |fr| acknowledges a packet this endpoint has not sent. * NGTCP2_ERR_CALLBACK_FAILURE @@ -5319,7 +5337,6 @@ int ngtcp2_conn_detect_lost_pkt(ngtcp2_conn *conn, ngtcp2_pktns *pktns, */ static int conn_recv_ack(ngtcp2_conn *conn, ngtcp2_pktns *pktns, ngtcp2_ack *fr, ngtcp2_tstamp pkt_ts, ngtcp2_tstamp ts) { - int rv; ngtcp2_ssize num_acked; ngtcp2_conn_stat *cstat = &conn->cstat; @@ -5327,15 +5344,10 @@ static int conn_recv_ack(ngtcp2_conn *conn, ngtcp2_pktns *pktns, ngtcp2_ack *fr, return NGTCP2_ERR_PROTO; } - rv = ngtcp2_pkt_validate_ack(fr, conn->local.settings.initial_pkt_num); - if (rv != 0) { - return rv; - } - ngtcp2_acktr_recv_ack(&pktns->acktr, fr); - num_acked = ngtcp2_rtb_recv_ack(&pktns->rtb, fr, &conn->cstat, conn, pktns, - pkt_ts, ts); + num_acked = + ngtcp2_rtb_recv_ack(&pktns->rtb, fr, &conn->cstat, conn, pktns, pkt_ts, ts); if (num_acked < 0) { assert(ngtcp2_err_is_fatal((int)num_acked)); return (int)num_acked; @@ -5351,7 +5363,7 @@ static int conn_recv_ack(ngtcp2_conn *conn, ngtcp2_pktns *pktns, ngtcp2_ack *fr, (conn->server || (conn->flags & NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED))) { /* Reset PTO count but no less than 2 to avoid frequent probe packet transmission. */ - cstat->pto_count = ngtcp2_min(cstat->pto_count, 2); + cstat->pto_count = ngtcp2_min_size(cstat->pto_count, 2); } ngtcp2_conn_set_loss_detection_timer(conn, ts); @@ -5366,7 +5378,7 @@ static int conn_recv_ack(ngtcp2_conn *conn, ngtcp2_pktns *pktns, ngtcp2_ack *fr, static void assign_recved_ack_delay_unscaled(ngtcp2_ack *fr, uint64_t ack_delay_exponent) { fr->ack_delay_unscaled = - fr->ack_delay * (1ULL << ack_delay_exponent) * NGTCP2_MICROSECONDS; + fr->ack_delay * (1ULL << ack_delay_exponent) * NGTCP2_MICROSECONDS; } /* @@ -5467,7 +5479,7 @@ static int conn_recv_max_stream_data(ngtcp2_conn *conn, * conn_recv_max_data processes received MAX_DATA frame |fr|. */ static void conn_recv_max_data(ngtcp2_conn *conn, const ngtcp2_max_data *fr) { - conn->tx.max_offset = ngtcp2_max(conn->tx.max_offset, fr->max_data); + conn->tx.max_offset = ngtcp2_max_uint64(conn->tx.max_offset, fr->max_data); } /* @@ -5496,7 +5508,7 @@ static int conn_buffer_pkt(ngtcp2_conn *conn, ngtcp2_pktns *pktns, } rv = - ngtcp2_pkt_chain_new(&pc, path, pi, pkt, pktlen, dgramlen, ts, conn->mem); + ngtcp2_pkt_chain_new(&pc, path, pi, pkt, pktlen, dgramlen, ts, conn->mem); if (rv != 0) { return rv; } @@ -5695,8 +5707,8 @@ conn_emit_pending_crypto_data(ngtcp2_conn *conn, offset = rx_offset; rx_offset += datalen; - rv = conn_call_recv_crypto_data(conn, encryption_level, offset, data, - datalen); + rv = + conn_call_recv_crypto_data(conn, encryption_level, offset, data, datalen); if (rv != 0) { return rv; } @@ -5735,7 +5747,7 @@ static int conn_recv_connection_close(ngtcp2_conn *conn, } } - ccerr->reasonlen = ngtcp2_min(fr->reasonlen, NGTCP2_CCERR_MAX_REASONLEN); + ccerr->reasonlen = ngtcp2_min_size(fr->reasonlen, NGTCP2_CCERR_MAX_REASONLEN); ngtcp2_cpymem((uint8_t *)ccerr->reason, fr->reason, ccerr->reasonlen); return 0; @@ -5907,9 +5919,9 @@ static int pktns_commit_recv_pkt_num(ngtcp2_pktns *pktns, int64_t pkt_num, if (pktns->rx.max_ack_eliciting_pkt_num != -1) { if (pkt_num < pktns->rx.max_ack_eliciting_pkt_num) { ngtcp2_acktr_immediate_ack(&pktns->acktr); - } else if (pkt_num > pktns->rx.max_ack_eliciting_pkt_num) { + } else if (pkt_num != pktns->rx.max_ack_eliciting_pkt_num + 1) { r = ngtcp2_gaptr_get_first_gap_after( - &pktns->rx.pngap, (uint64_t)pktns->rx.max_ack_eliciting_pkt_num); + &pktns->rx.pngap, (uint64_t)pktns->rx.max_ack_eliciting_pkt_num); if (r.begin < (uint64_t)pkt_num) { ngtcp2_acktr_immediate_ack(&pktns->acktr); @@ -5977,7 +5989,7 @@ static int vneg_available_versions_includes(const uint8_t *available_versions, } for (i = 0; i < available_versionslen; i += sizeof(uint32_t)) { - available_versions = ngtcp2_get_uint32(&v, available_versions); + available_versions = ngtcp2_get_uint32be(&v, available_versions); if (version == v) { return 1; @@ -6097,6 +6109,7 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, ngtcp2_strm *crypto; ngtcp2_encryption_level encryption_level; int invalid_reserved_bits = 0; + size_t num_ack_processed = 0; if (pktlen == 0) { return 0; @@ -6116,8 +6129,8 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "buffering 1RTT packet len=%zu", pktlen); - rv = conn_buffer_pkt(conn, &conn->pktns, path, pi, pkt, pktlen, dgramlen, - ts); + rv = + conn_buffer_pkt(conn, &conn->pktns, path, pi, pkt, pktlen, dgramlen, ts); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); return rv; @@ -6158,8 +6171,8 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, "packet was ignored because of mismatched SCID"); return NGTCP2_ERR_DISCARD_PKT; } - rv = conn_on_version_negotiation(conn, &hd, pkt + hdpktlen, - pktlen - hdpktlen); + rv = + conn_on_version_negotiation(conn, &hd, pkt + hdpktlen, pktlen - hdpktlen); if (rv != 0) { if (ngtcp2_err_is_fatal(rv)) { return rv; @@ -6250,7 +6263,7 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, ngtcp2_ssize nread2; /* TODO Avoid to parse header twice. */ nread2 = - conn_recv_pkt(conn, path, pi, pkt, pktlen, dgramlen, pkt_ts, ts); + conn_recv_pkt(conn, path, pi, pkt, pktlen, dgramlen, pkt_ts, ts); if (nread2 < 0) { return nread2; } @@ -6275,8 +6288,8 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, case NGTCP2_PKT_INITIAL: if (!conn->in_pktns) { ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_PKT, - "Initial packet is discarded because keys have been discarded"); + &conn->log, NGTCP2_LOG_EVENT_PKT, + "Initial packet is discarded because keys have been discarded"); return (ngtcp2_ssize)pktlen; } @@ -6285,10 +6298,10 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, if (conn->server) { if (dgramlen < NGTCP2_MAX_UDP_PAYLOAD_SIZE) { ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_PKT, - "Initial packet was ignored because it is included in UDP datagram " - "less than %zu bytes: %zu bytes", - NGTCP2_MAX_UDP_PAYLOAD_SIZE, dgramlen); + &conn->log, NGTCP2_LOG_EVENT_PKT, + "Initial packet was ignored because it is included in UDP datagram " + "less than %zu bytes: %zu bytes", + NGTCP2_MAX_UDP_PAYLOAD_SIZE, dgramlen); return NGTCP2_ERR_DISCARD_PKT; } if (conn->local.settings.tokenlen) { @@ -6329,10 +6342,9 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, /* Install new Initial keys using QUIC version = hd.version */ rv = conn_call_version_negotiation( - conn, hd.version, - (conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY) - ? &conn->dcid.current.cid - : &conn->rcid); + conn, hd.version, + (conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY) ? &conn->dcid.current.cid + : &conn->rcid); if (rv != 0) { return rv; } @@ -6364,8 +6376,8 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, if (!conn->hs_pktns->crypto.rx.ckm) { if (conn->server) { ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_PKT, - "Handshake packet at this point is unexpected and discarded"); + &conn->log, NGTCP2_LOG_EVENT_PKT, + "Handshake packet at this point is unexpected and discarded"); return (ngtcp2_ssize)pktlen; } ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, @@ -6420,8 +6432,8 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, payload = pkt + hdpktlen; payloadlen = hd.len - hd.pkt_numlen; - hd.pkt_num = ngtcp2_pkt_adjust_pkt_num(pktns->rx.max_pkt_num, hd.pkt_num, - hd.pkt_numlen); + hd.pkt_num = + ngtcp2_pkt_adjust_pkt_num(pktns->rx.max_pkt_num, hd.pkt_num, hd.pkt_numlen); if (hd.pkt_num > NGTCP2_MAX_PKT_NUM) { ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, "pkn=%" PRId64 " is greater than maximum pkn", hd.pkt_num); @@ -6542,6 +6554,13 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, case NGTCP2_FRAME_ACK_ECN: fr->ack.ack_delay = 0; fr->ack.ack_delay_unscaled = 0; + + rv = + ngtcp2_pkt_validate_ack(&fr->ack, conn->local.settings.initial_pkt_num); + if (rv != 0) { + return rv; + } + break; } @@ -6550,6 +6569,9 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, switch (fr->type) { case NGTCP2_FRAME_ACK: case NGTCP2_FRAME_ACK_ECN: + if (num_ack_processed >= NGTCP2_MAX_ACK_PER_PKT) { + break; + } if (!conn->server && hd.type == NGTCP2_PKT_HANDSHAKE) { conn->flags |= NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED; } @@ -6557,6 +6579,7 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, if (rv != 0) { return rv; } + ++num_ack_processed; break; case NGTCP2_FRAME_PADDING: break; @@ -6609,8 +6632,8 @@ conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, /* Initial and Handshake are always acknowledged without delay. No need to call ngtcp2_acktr_immediate_ack(). */ - rv = ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd.pkt_num, require_ack, - pkt_ts); + rv = + ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd.pkt_num, require_ack, pkt_ts); if (rv != 0) { return rv; } @@ -6661,7 +6684,7 @@ static ngtcp2_ssize conn_recv_handshake_cpkt(ngtcp2_conn *conn, while (pktlen) { nread = - conn_recv_handshake_pkt(conn, path, pi, pkt, pktlen, dgramlen, ts, ts); + conn_recv_handshake_pkt(conn, path, pi, pkt, pktlen, dgramlen, ts, ts); if (nread < 0) { if (ngtcp2_err_is_fatal((int)nread)) { return nread; @@ -6673,7 +6696,7 @@ static ngtcp2_ssize conn_recv_handshake_cpkt(ngtcp2_conn *conn, if ((pkt[0] & NGTCP2_HEADER_FORM_BIT) && pktlen > 4) { /* Not a Version Negotiation packet */ - ngtcp2_get_uint32(&version, &pkt[1]); + ngtcp2_get_uint32be(&version, &pkt[1]); if (ngtcp2_pkt_get_type_long(version, pkt[0]) == NGTCP2_PKT_INITIAL) { if (conn->server) { if (is_unrecoverable_error((int)nread)) { @@ -6738,14 +6761,14 @@ int ngtcp2_conn_init_stream(ngtcp2_conn *conn, ngtcp2_strm *strm, if (bidi_stream(stream_id)) { if (local_stream) { max_rx_offset = - conn->local.transport_params.initial_max_stream_data_bidi_local; + conn->local.transport_params.initial_max_stream_data_bidi_local; max_tx_offset = - conn->remote.transport_params->initial_max_stream_data_bidi_remote; + conn->remote.transport_params->initial_max_stream_data_bidi_remote; } else { max_rx_offset = - conn->local.transport_params.initial_max_stream_data_bidi_remote; + conn->local.transport_params.initial_max_stream_data_bidi_remote; max_tx_offset = - conn->remote.transport_params->initial_max_stream_data_bidi_local; + conn->remote.transport_params->initial_max_stream_data_bidi_local; } } else if (local_stream) { max_rx_offset = 0; @@ -6759,8 +6782,8 @@ int ngtcp2_conn_init_stream(ngtcp2_conn *conn, ngtcp2_strm *strm, max_tx_offset, stream_user_data, &conn->frc_objalloc, conn->mem); - rv = ngtcp2_map_insert(&conn->strms, (ngtcp2_map_key_type)strm->stream_id, - strm); + rv = + ngtcp2_map_insert(&conn->strms, (ngtcp2_map_key_type)strm->stream_id, strm); if (rv != 0) { assert(rv != NGTCP2_ERR_INVALID_ARGUMENT); goto fail; @@ -6833,6 +6856,12 @@ static int conn_emit_pending_stream_data(ngtcp2_conn *conn, ngtcp2_strm *strm, return rv; } + /* ngtcp2_conn_shutdown_stream_read from a callback will free + strm->rx.rob. */ + if (!strm->rx.rob) { + return 0; + } + ngtcp2_rob_pop(strm->rx.rob, rx_offset - datalen, datalen); } } @@ -6901,7 +6930,8 @@ static int conn_recv_crypto(ngtcp2_conn *conn, return 0; } - crypto->rx.last_offset = ngtcp2_max(crypto->rx.last_offset, fr_end_offset); + crypto->rx.last_offset = + ngtcp2_max_uint64(crypto->rx.last_offset, fr_end_offset); /* TODO Before dispatching incoming data to TLS stack, make sure that previous data in previous encryption level has been @@ -6917,14 +6947,14 @@ static int conn_recv_crypto(ngtcp2_conn *conn, rx_offset += datalen; ngtcp2_strm_update_rx_offset(crypto, rx_offset); - rv = conn_call_recv_crypto_data(conn, encryption_level, offset, data, - datalen); + rv = + conn_call_recv_crypto_data(conn, encryption_level, offset, data, datalen); if (rv != 0) { return rv; } - rv = conn_emit_pending_crypto_data(conn, encryption_level, crypto, - rx_offset); + rv = + conn_emit_pending_crypto_data(conn, encryption_level, crypto, rx_offset); if (rv != 0) { return rv; } @@ -7100,7 +7130,8 @@ static int conn_recv_stream(ngtcp2_conn *conn, const ngtcp2_stream *fr) { return NGTCP2_ERR_FINAL_SIZE; } - strm->rx.last_offset = ngtcp2_max(strm->rx.last_offset, fr_end_offset); + strm->rx.last_offset = + ngtcp2_max_uint64(strm->rx.last_offset, fr_end_offset); if (fr_end_offset <= rx_offset) { return 0; @@ -7220,9 +7251,9 @@ handle_max_remote_streams_extension(uint64_t *punsent_max_remote_streams, size_t n) { if ( #if SIZE_MAX > UINT32_MAX - NGTCP2_MAX_STREAMS < n || + NGTCP2_MAX_STREAMS < n || #endif /* SIZE_MAX > UINT32_MAX */ - *punsent_max_remote_streams > (uint64_t)(NGTCP2_MAX_STREAMS - n)) { + *punsent_max_remote_streams > (uint64_t)(NGTCP2_MAX_STREAMS - n)) { *punsent_max_remote_streams = NGTCP2_MAX_STREAMS; } else { *punsent_max_remote_streams += n; @@ -7360,7 +7391,7 @@ static int conn_recv_reset_stream(ngtcp2_conn *conn, which are not passed to application. */ if (!(strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING)) { ngtcp2_conn_extend_max_offset(conn, strm->rx.last_offset - - ngtcp2_strm_rx_offset(strm)); + ngtcp2_strm_rx_offset(strm)); } conn->rx.offset += datalen; @@ -7368,7 +7399,7 @@ static int conn_recv_reset_stream(ngtcp2_conn *conn, strm->rx.last_offset = fr->final_size; strm->flags |= - NGTCP2_STRM_FLAG_SHUT_RD | NGTCP2_STRM_FLAG_RESET_STREAM_RECVED; + NGTCP2_STRM_FLAG_SHUT_RD | NGTCP2_STRM_FLAG_RESET_STREAM_RECVED; ngtcp2_strm_set_app_error_code(strm, fr->app_error_code); @@ -7471,7 +7502,7 @@ static int conn_recv_stop_sending(ngtcp2_conn *conn, } strm->flags |= - NGTCP2_STRM_FLAG_SHUT_WR | NGTCP2_STRM_FLAG_STOP_SENDING_RECVED; + NGTCP2_STRM_FLAG_SHUT_WR | NGTCP2_STRM_FLAG_STOP_SENDING_RECVED; ngtcp2_strm_streamfrq_clear(strm); @@ -7487,7 +7518,7 @@ static int check_stateless_reset(const ngtcp2_dcid *dcid, const ngtcp2_pkt_stateless_reset *sr) { return ngtcp2_path_eq(&dcid->ps.path, path) && ngtcp2_dcid_verify_stateless_reset_token( - dcid, sr->stateless_reset_token) == 0; + dcid, sr->stateless_reset_token) == 0; } /* @@ -7576,7 +7607,7 @@ static int conn_recv_max_streams(ngtcp2_conn *conn, return NGTCP2_ERR_FRAME_ENCODING; } - n = ngtcp2_min(fr->max_streams, NGTCP2_MAX_STREAMS); + n = ngtcp2_min_uint64(fr->max_streams, NGTCP2_MAX_STREAMS); if (fr->type == NGTCP2_FRAME_MAX_STREAMS_BIDI) { if (conn->local.bidi.max_streams < n) { @@ -8221,7 +8252,7 @@ static int conn_recv_handshake_done(ngtcp2_conn *conn, ngtcp2_tstamp ts) { conn->pktns.rtb.persistent_congestion_start_ts = ts; - conn_discard_handshake_state(conn, ts); + ngtcp2_conn_discard_handshake_state(conn, ts); assert(conn->remote.transport_params); @@ -8323,9 +8354,9 @@ static int conn_prepare_key_update(ngtcp2_conn *conn, ngtcp2_tstamp ts) { new_tx_ckm = conn->crypto.key_update.new_tx_ckm; rv = conn_call_update_key( - conn, new_rx_ckm->secret.base, new_tx_ckm->secret.base, &rx_aead_ctx, - new_rx_ckm->iv.base, &tx_aead_ctx, new_tx_ckm->iv.base, - rx_ckm->secret.base, tx_ckm->secret.base, secretlen); + conn, new_rx_ckm->secret.base, new_tx_ckm->secret.base, &rx_aead_ctx, + new_rx_ckm->iv.base, &tx_aead_ctx, new_tx_ckm->iv.base, rx_ckm->secret.base, + tx_ckm->secret.base, secretlen); if (rv != 0) { return rv; } @@ -8340,7 +8371,7 @@ static int conn_prepare_key_update(ngtcp2_conn *conn, ngtcp2_tstamp ts) { if (conn->crypto.key_update.old_rx_ckm) { conn_call_delete_crypto_aead_ctx( - conn, &conn->crypto.key_update.old_rx_ckm->aead_ctx); + conn, &conn->crypto.key_update.old_rx_ckm->aead_ctx); ngtcp2_crypto_km_del(conn->crypto.key_update.old_rx_ckm, conn->mem); conn->crypto.key_update.old_rx_ckm = NULL; } @@ -8412,7 +8443,6 @@ static int conn_recv_non_probing_pkt_on_new_path(ngtcp2_conn *conn, size_t dgramlen, int new_cid_used, ngtcp2_tstamp ts) { - ngtcp2_dcid dcid, *bound_dcid, *last; ngtcp2_pv *pv; int rv; @@ -8448,9 +8478,9 @@ static int conn_recv_non_probing_pkt_on_new_path(ngtcp2_conn *conn, } remote_addr_cmp = - ngtcp2_addr_compare(&conn->dcid.current.ps.path.remote, &path->remote); + ngtcp2_addr_compare(&conn->dcid.current.ps.path.remote, &path->remote); local_addr_eq = - ngtcp2_addr_eq(&conn->dcid.current.ps.path.local, &path->local); + ngtcp2_addr_eq(&conn->dcid.current.ps.path.local, &path->local); /* * When to change DCID? RFC 9002 section 9.5 says: @@ -8480,8 +8510,8 @@ static int conn_recv_non_probing_pkt_on_new_path(ngtcp2_conn *conn, bound_dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i); if (ngtcp2_path_eq(&bound_dcid->ps.path, path)) { ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_CON, - "Found DCID which has already been bound to the new path"); + &conn->log, NGTCP2_LOG_EVENT_CON, + "Found DCID which has already been bound to the new path"); ngtcp2_dcid_copy(&dcid, bound_dcid); if (i == 0) { @@ -8550,21 +8580,21 @@ static int conn_recv_non_probing_pkt_on_new_path(ngtcp2_conn *conn, pv->fallback_pto = pto; } + ngtcp2_dcid_copy(&conn->dcid.current, &dcid); + if (!local_addr_eq || (remote_addr_cmp & (NGTCP2_ADDR_COMPARE_FLAG_ADDR | NGTCP2_ADDR_COMPARE_FLAG_FAMILY))) { conn_reset_congestion_state(conn, ts); } - ngtcp2_dcid_copy(&conn->dcid.current, &dcid); - conn_reset_ecn_validation_state(conn); ngtcp2_conn_stop_pmtud(conn); if (conn->pv) { ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_PTV, - "path migration is aborted because new migration has started"); + &conn->log, NGTCP2_LOG_EVENT_PTV, + "path migration is aborted because new migration has started"); rv = conn_abort_pv(conn, ts); if (rv != 0) { return rv; @@ -8658,6 +8688,7 @@ conn_recv_delayed_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_pkt_info *pi, int rv; int require_ack = 0; ngtcp2_pktns *pktns; + size_t num_ack_processed = 0; assert(hd->type == NGTCP2_PKT_HANDSHAKE); @@ -8684,6 +8715,13 @@ conn_recv_delayed_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_pkt_info *pi, case NGTCP2_FRAME_ACK_ECN: fr->ack.ack_delay = 0; fr->ack.ack_delay_unscaled = 0; + + rv = + ngtcp2_pkt_validate_ack(&fr->ack, conn->local.settings.initial_pkt_num); + if (rv != 0) { + return rv; + } + break; } @@ -8692,6 +8730,9 @@ conn_recv_delayed_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_pkt_info *pi, switch (fr->type) { case NGTCP2_FRAME_ACK: case NGTCP2_FRAME_ACK_ECN: + if (num_ack_processed >= NGTCP2_MAX_ACK_PER_PKT) { + break; + } if (!conn->server) { conn->flags |= NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED; } @@ -8699,6 +8740,7 @@ conn_recv_delayed_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_pkt_info *pi, if (rv != 0) { return rv; } + ++num_ack_processed; break; case NGTCP2_FRAME_PADDING: break; @@ -8762,7 +8804,7 @@ conn_allow_path_change_under_disable_active_migration(ngtcp2_conn *conn, (NAT rebinding). */ if (ngtcp2_addr_eq(&conn->dcid.current.ps.path.local, &path->local)) { remote_addr_cmp = - ngtcp2_addr_compare(&conn->dcid.current.ps.path.remote, &path->remote); + ngtcp2_addr_compare(&conn->dcid.current.ps.path.remote, &path->remote); return (remote_addr_cmp | NGTCP2_ADDR_COMPARE_FLAG_PORT) == NGTCP2_ADDR_COMPARE_FLAG_PORT; @@ -8860,6 +8902,7 @@ static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, int recv_ncid = 0; int new_cid_used = 0; int path_challenge_recved = 0; + size_t num_ack_processed = 0; if (conn->server && conn->local.transport_params.disable_active_migration && !ngtcp2_path_eq(&conn->dcid.current.ps.path, path) && @@ -8992,8 +9035,8 @@ static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, payload = pkt + hdpktlen; payloadlen = pktlen - hdpktlen; - hd.pkt_num = ngtcp2_pkt_adjust_pkt_num(pktns->rx.max_pkt_num, hd.pkt_num, - hd.pkt_numlen); + hd.pkt_num = + ngtcp2_pkt_adjust_pkt_num(pktns->rx.max_pkt_num, hd.pkt_num, hd.pkt_numlen); if (hd.pkt_num > NGTCP2_MAX_PKT_NUM) { ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, "pkn=%" PRId64 " is greater than maximum pkn", hd.pkt_num); @@ -9057,7 +9100,7 @@ static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, if (hd.type == NGTCP2_PKT_1RTT && ++conn->crypto.decryption_failure_count >= - pktns->crypto.ctx.max_decryption_failure) { + pktns->crypto.ctx.max_decryption_failure) { return NGTCP2_ERR_AEAD_LIMIT_REACHED; } @@ -9163,7 +9206,14 @@ static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, } assert(conn->remote.transport_params); assign_recved_ack_delay_unscaled( - &fr->ack, conn->remote.transport_params->ack_delay_exponent); + &fr->ack, conn->remote.transport_params->ack_delay_exponent); + + rv = + ngtcp2_pkt_validate_ack(&fr->ack, conn->local.settings.initial_pkt_num); + if (rv != 0) { + return rv; + } + break; } @@ -9210,6 +9260,9 @@ static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, switch (fr->type) { case NGTCP2_FRAME_ACK: case NGTCP2_FRAME_ACK_ECN: + if (num_ack_processed >= NGTCP2_MAX_ACK_PER_PKT) { + break; + } if (!conn->server) { conn->flags |= NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED; } @@ -9218,6 +9271,7 @@ static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, return rv; } non_probing_pkt = 1; + ++num_ack_processed; break; case NGTCP2_FRAME_STREAM: rv = conn_recv_stream(conn, &fr->stream); @@ -9429,8 +9483,8 @@ static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, ngtcp2_acktr_immediate_ack(&pktns->acktr); } - rv = ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd.pkt_num, require_ack, - pkt_ts); + rv = + ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd.pkt_num, require_ack, pkt_ts); if (rv != 0) { return rv; } @@ -9602,8 +9656,8 @@ static int conn_handshake_completed(ngtcp2_conn *conn) { } } if (conn->local.uni.max_streams > 0) { - rv = conn_call_extend_max_local_streams_uni(conn, - conn->local.uni.max_streams); + rv = + conn_call_extend_max_local_streams_uni(conn, conn->local.uni.max_streams); if (rv != 0) { return rv; } @@ -9822,7 +9876,7 @@ static ngtcp2_ssize conn_read_handshake(ngtcp2_conn *conn, } if (conn->hs_pktns->rx.max_pkt_num != -1) { - conn_discard_initial_state(conn, ts); + ngtcp2_conn_discard_initial_state(conn, ts); } if (!conn_is_tls_handshake_completed(conn)) { @@ -9888,7 +9942,7 @@ static ngtcp2_ssize conn_read_handshake(ngtcp2_conn *conn, return rv; } - conn_discard_handshake_state(conn, ts); + ngtcp2_conn_discard_handshake_state(conn, ts); rv = conn_enqueue_handshake_done(conn); if (rv != 0) { @@ -9936,7 +9990,7 @@ int ngtcp2_conn_read_pkt_versioned(ngtcp2_conn *conn, const ngtcp2_path *path, pktlen); if (pktlen == 0) { - return NGTCP2_ERR_INVALID_ARGUMENT; + return 0; } /* client does not expect a packet from unknown path. */ @@ -10035,13 +10089,12 @@ static int conn_check_pkt_num_exhausted(ngtcp2_conn *conn) { * conn_retransmit_retry_early retransmits 0RTT packet after Retry is * received from server. */ -static ngtcp2_ssize conn_retransmit_retry_early(ngtcp2_conn *conn, - ngtcp2_pkt_info *pi, - uint8_t *dest, size_t destlen, - uint8_t flags, - ngtcp2_tstamp ts) { - return conn_write_pkt(conn, pi, dest, destlen, NULL, NGTCP2_PKT_0RTT, flags, - ts); +static ngtcp2_ssize +conn_retransmit_retry_early(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, size_t dgram_offset, + uint8_t flags, ngtcp2_tstamp ts) { + return conn_write_pkt(conn, pi, dest, destlen, dgram_offset, NULL, + NGTCP2_PKT_0RTT, flags, ts); } /* @@ -10067,21 +10120,21 @@ static int conn_validate_early_transport_params_limits(ngtcp2_conn *conn) { assert(params); if (conn->early.transport_params.active_connection_id_limit > - params->active_connection_id_limit || + params->active_connection_id_limit || conn->early.transport_params.initial_max_data > - params->initial_max_data || + params->initial_max_data || conn->early.transport_params.initial_max_stream_data_bidi_local > - params->initial_max_stream_data_bidi_local || + params->initial_max_stream_data_bidi_local || conn->early.transport_params.initial_max_stream_data_bidi_remote > - params->initial_max_stream_data_bidi_remote || + params->initial_max_stream_data_bidi_remote || conn->early.transport_params.initial_max_stream_data_uni > - params->initial_max_stream_data_uni || + params->initial_max_stream_data_uni || conn->early.transport_params.initial_max_streams_bidi > - params->initial_max_streams_bidi || + params->initial_max_streams_bidi || conn->early.transport_params.initial_max_streams_uni > - params->initial_max_streams_uni || + params->initial_max_streams_uni || conn->early.transport_params.max_datagram_frame_size > - params->max_datagram_frame_size) { + params->max_datagram_frame_size) { return NGTCP2_ERR_PROTO; } @@ -10131,14 +10184,14 @@ static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, if (!(conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY)) { nwrite = - conn_write_client_initial(conn, pi, dest, destlen, write_datalen, ts); + conn_write_client_initial(conn, pi, dest, destlen, write_datalen, ts); if (nwrite <= 0) { return nwrite; } } else { - nwrite = conn_write_handshake_pkt( - conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, - NGTCP2_WRITE_PKT_FLAG_NONE, write_datalen, ts); + nwrite = + conn_write_handshake_pkt(conn, pi, dest, destlen, 0, NGTCP2_PKT_INITIAL, + NGTCP2_WRITE_PKT_FLAG_NONE, write_datalen, ts); if (nwrite < 0) { return nwrite; } @@ -10146,10 +10199,10 @@ static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, if (pending_early_datalen) { early_spktlen = conn_retransmit_retry_early( - conn, pi, dest + nwrite, destlen - (size_t)nwrite, - nwrite ? NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING - : NGTCP2_WRITE_PKT_FLAG_NONE, - ts); + conn, pi, dest + nwrite, destlen - (size_t)nwrite, (size_t)nwrite, + nwrite ? NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING + : NGTCP2_WRITE_PKT_FLAG_NONE, + ts); if (early_spktlen < 0) { assert(ngtcp2_err_is_fatal((int)early_spktlen)); @@ -10163,6 +10216,8 @@ static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, return res; case NGTCP2_CS_CLIENT_WAIT_HANDSHAKE: + pending_early_datalen = 0; + if (!conn_handshake_probe_left(conn) && conn_cwnd_is_zero(conn)) { destlen = 0; } else { @@ -10174,7 +10229,7 @@ static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, } nwrite = - conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts); + conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts); if (nwrite < 0) { return nwrite; } @@ -10185,9 +10240,10 @@ static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, } if (!conn_is_tls_handshake_completed(conn)) { - if (!(conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED)) { - nwrite = conn_retransmit_retry_early(conn, pi, dest, destlen, - NGTCP2_WRITE_PKT_FLAG_NONE, ts); + if (pending_early_datalen && + !(conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED)) { + nwrite = conn_retransmit_retry_early( + conn, pi, dest, destlen, (size_t)res, NGTCP2_WRITE_PKT_FLAG_NONE, ts); if (nwrite < 0) { return nwrite; } @@ -10250,8 +10306,8 @@ static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, assert(conn->dcid.current.seq == 0); assert(!(conn->dcid.current.flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT)); ngtcp2_dcid_set_token( - &conn->dcid.current, - conn->remote.transport_params->stateless_reset_token); + &conn->dcid.current, + conn->remote.transport_params->stateless_reset_token); } rv = conn_call_activate_dcid(conn, &conn->dcid.current); @@ -10271,7 +10327,7 @@ static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, return res; case NGTCP2_CS_SERVER_INITIAL: nwrite = - conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts); + conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts); if (nwrite < 0) { return nwrite; } @@ -10284,7 +10340,7 @@ static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, case NGTCP2_CS_SERVER_WAIT_HANDSHAKE: if (conn_handshake_probe_left(conn) || !conn_cwnd_is_zero(conn)) { nwrite = - conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts); + conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts); if (nwrite < 0) { return nwrite; } @@ -10363,8 +10419,8 @@ static ngtcp2_ssize conn_client_write_handshake(ngtcp2_conn *conn, datalen = ngtcp2_vec_len(vmsg->stream.data, vmsg->stream.datacnt); send_stream = conn_retry_early_payloadlen(conn) == 0; if (send_stream) { - write_datalen = ngtcp2_min(datalen + NGTCP2_STREAM_OVERHEAD, - NGTCP2_MIN_COALESCED_PAYLOADLEN); + write_datalen = ngtcp2_min_uint64(datalen + NGTCP2_STREAM_OVERHEAD, + NGTCP2_MIN_COALESCED_PAYLOADLEN); if (vmsg->stream.flags & NGTCP2_WRITE_STREAM_FLAG_MORE) { wflags |= NGTCP2_WRITE_PKT_FLAG_MORE; @@ -10428,8 +10484,8 @@ static ngtcp2_ssize conn_client_write_handshake(ngtcp2_conn *conn, return spktlen; } - early_spktlen = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_0RTT, - wflags, ts); + early_spktlen = conn_write_pkt(conn, pi, dest, destlen, (size_t)spktlen, vmsg, + NGTCP2_PKT_0RTT, wflags, ts); if (early_spktlen < 0) { switch (early_spktlen) { case NGTCP2_ERR_STREAM_DATA_BLOCKED: @@ -10520,10 +10576,10 @@ int ngtcp2_accept(ngtcp2_pkt_hd *dest, const uint8_t *pkt, size_t pktlen) { } int ngtcp2_conn_install_initial_key( - ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *rx_aead_ctx, - const uint8_t *rx_iv, const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, - const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, - const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen) { + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *rx_aead_ctx, + const uint8_t *rx_iv, const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, + const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, + const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen) { ngtcp2_pktns *pktns = conn->in_pktns; int rv; @@ -10571,11 +10627,11 @@ int ngtcp2_conn_install_initial_key( } int ngtcp2_conn_install_vneg_initial_key( - ngtcp2_conn *conn, uint32_t version, - const ngtcp2_crypto_aead_ctx *rx_aead_ctx, const uint8_t *rx_iv, - const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, - const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, - const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen) { + ngtcp2_conn *conn, uint32_t version, + const ngtcp2_crypto_aead_ctx *rx_aead_ctx, const uint8_t *rx_iv, + const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, + const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, + const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen) { int rv; assert(ivlen >= 8); @@ -10622,8 +10678,8 @@ int ngtcp2_conn_install_vneg_initial_key( } int ngtcp2_conn_install_rx_handshake_key( - ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, - const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx) { + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, + size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx) { ngtcp2_pktns *pktns = conn->hs_pktns; int rv; @@ -10654,8 +10710,8 @@ int ngtcp2_conn_install_rx_handshake_key( } int ngtcp2_conn_install_tx_handshake_key( - ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, - const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx) { + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, + size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx) { ngtcp2_pktns *pktns = conn->hs_pktns; int rv; @@ -10905,7 +10961,7 @@ ngtcp2_tstamp ngtcp2_conn_loss_detection_expiry(ngtcp2_conn *conn) { } ngtcp2_tstamp ngtcp2_conn_internal_expiry(ngtcp2_conn *conn) { - ngtcp2_tstamp res = UINT64_MAX, t; + ngtcp2_tstamp res = UINT64_MAX; ngtcp2_duration pto = conn_compute_pto(conn, &conn->pktns); ngtcp2_scid *scid; ngtcp2_dcid *dcid; @@ -10916,21 +10972,19 @@ ngtcp2_tstamp ngtcp2_conn_internal_expiry(ngtcp2_conn *conn) { } if (conn->pmtud) { - res = ngtcp2_min(res, conn->pmtud->expiry); + res = ngtcp2_min_uint64(res, conn->pmtud->expiry); } if (!ngtcp2_pq_empty(&conn->scid.used)) { scid = ngtcp2_struct_of(ngtcp2_pq_top(&conn->scid.used), ngtcp2_scid, pe); if (scid->retired_ts != UINT64_MAX) { - t = scid->retired_ts + pto; - res = ngtcp2_min(res, t); + res = ngtcp2_min_uint64(res, scid->retired_ts + pto); } } if (ngtcp2_ringbuf_len(&conn->dcid.retired.rb)) { dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, 0); - t = dcid->retired_ts + pto; - res = ngtcp2_min(res, t); + res = ngtcp2_min_uint64(res, dcid->retired_ts + pto); } if (conn->dcid.current.cid.datalen) { @@ -10941,15 +10995,13 @@ ngtcp2_tstamp ngtcp2_conn_internal_expiry(ngtcp2_conn *conn) { assert(dcid->cid.datalen); assert(dcid->bound_ts != UINT64_MAX); - t = dcid->bound_ts + 3 * pto; - res = ngtcp2_min(res, t); + res = ngtcp2_min_uint64(res, dcid->bound_ts + 3 * pto); } } if (conn->server && conn->early.ckm && conn->early.discard_started_ts != UINT64_MAX) { - t = conn->early.discard_started_ts + 3 * pto; - res = ngtcp2_min(res, t); + res = ngtcp2_min_uint64(res, conn->early.discard_started_ts + 3 * pto); } return res; @@ -10969,7 +11021,7 @@ static ngtcp2_tstamp conn_handshake_expiry(ngtcp2_conn *conn) { if (conn_is_tls_handshake_completed(conn) || conn->local.settings.handshake_timeout == UINT64_MAX || conn->local.settings.initial_ts >= - UINT64_MAX - conn->local.settings.handshake_timeout) { + UINT64_MAX - conn->local.settings.handshake_timeout) { return UINT64_MAX; } @@ -10978,20 +11030,14 @@ static ngtcp2_tstamp conn_handshake_expiry(ngtcp2_conn *conn) { } ngtcp2_tstamp ngtcp2_conn_get_expiry(ngtcp2_conn *conn) { - ngtcp2_tstamp t1 = ngtcp2_conn_loss_detection_expiry(conn); - ngtcp2_tstamp t2 = ngtcp2_conn_ack_delay_expiry(conn); - ngtcp2_tstamp t3 = ngtcp2_conn_internal_expiry(conn); - ngtcp2_tstamp t4 = ngtcp2_conn_lost_pkt_expiry(conn); - ngtcp2_tstamp t5 = conn_keep_alive_expiry(conn); - ngtcp2_tstamp t6 = conn_handshake_expiry(conn); - ngtcp2_tstamp t7 = ngtcp2_conn_get_idle_expiry(conn); - ngtcp2_tstamp res = ngtcp2_min(t1, t2); - res = ngtcp2_min(res, t3); - res = ngtcp2_min(res, t4); - res = ngtcp2_min(res, t5); - res = ngtcp2_min(res, t6); - res = ngtcp2_min(res, t7); - return ngtcp2_min(res, conn->tx.pacing.next_ts); + ngtcp2_tstamp res = ngtcp2_min_uint64(ngtcp2_conn_loss_detection_expiry(conn), + ngtcp2_conn_ack_delay_expiry(conn)); + res = ngtcp2_min_uint64(res, ngtcp2_conn_internal_expiry(conn)); + res = ngtcp2_min_uint64(res, ngtcp2_conn_lost_pkt_expiry(conn)); + res = ngtcp2_min_uint64(res, conn_keep_alive_expiry(conn)); + res = ngtcp2_min_uint64(res, conn_handshake_expiry(conn)); + res = ngtcp2_min_uint64(res, ngtcp2_conn_get_idle_expiry(conn)); + return ngtcp2_min_uint64(res, conn->tx.pacing.next_ts); } int ngtcp2_conn_handle_expiry(ngtcp2_conn *conn, ngtcp2_tstamp ts) { @@ -11089,7 +11135,7 @@ ngtcp2_tstamp ngtcp2_conn_lost_pkt_expiry(ngtcp2_conn *conn) { ts = ngtcp2_rtb_lost_pkt_ts(&conn->in_pktns->rtb); if (ts != UINT64_MAX) { ts += conn_compute_pto(conn, conn->in_pktns); - res = ngtcp2_min(res, ts); + res = ngtcp2_min_uint64(res, ts); } } @@ -11097,14 +11143,14 @@ ngtcp2_tstamp ngtcp2_conn_lost_pkt_expiry(ngtcp2_conn *conn) { ts = ngtcp2_rtb_lost_pkt_ts(&conn->hs_pktns->rtb); if (ts != UINT64_MAX) { ts += conn_compute_pto(conn, conn->hs_pktns); - res = ngtcp2_min(res, ts); + res = ngtcp2_min_uint64(res, ts); } } ts = ngtcp2_rtb_lost_pkt_ts(&conn->pktns.rtb); if (ts != UINT64_MAX) { ts += conn_compute_pto(conn, &conn->pktns); - res = ngtcp2_min(res, ts); + res = ngtcp2_min_uint64(res, ts); } return res; @@ -11153,7 +11199,7 @@ static uint32_t select_preferred_version(const uint32_t *preferred_versions, } for (j = 0, p = available_versions; j < available_versionslen; j += sizeof(uint32_t)) { - p = ngtcp2_get_uint32(&v, p); + p = ngtcp2_get_uint32be(&v, p); if (preferred_versions[i] == v) { return v; @@ -11266,13 +11312,13 @@ ngtcp2_conn_server_negotiate_version(ngtcp2_conn *conn, assert(conn->client_chosen_version == version_info->chosen_version); return select_preferred_version( - conn->vneg.preferred_versions, conn->vneg.preferred_versionslen, - version_info->chosen_version, version_info->available_versions, - version_info->available_versionslen, version_info->chosen_version); + conn->vneg.preferred_versions, conn->vneg.preferred_versionslen, + version_info->chosen_version, version_info->available_versions, + version_info->available_versionslen, version_info->chosen_version); } int ngtcp2_conn_set_remote_transport_params( - ngtcp2_conn *conn, const ngtcp2_transport_params *params) { + ngtcp2_conn *conn, const ngtcp2_transport_params *params) { int rv; /* We expect this function is called once per QUIC connection, but @@ -11315,9 +11361,9 @@ int ngtcp2_conn_set_remote_transport_params( if (params->version_info_present) { if (!vneg_available_versions_includes( - params->version_info.available_versions, - params->version_info.available_versionslen, - params->version_info.chosen_version)) { + params->version_info.available_versions, + params->version_info.available_versionslen, + params->version_info.chosen_version)) { return NGTCP2_ERR_TRANSPORT_PARAM; } @@ -11326,7 +11372,7 @@ int ngtcp2_conn_set_remote_transport_params( } conn->negotiated_version = - ngtcp2_conn_server_negotiate_version(conn, ¶ms->version_info); + ngtcp2_conn_server_negotiate_version(conn, ¶ms->version_info); if (conn->negotiated_version != conn->client_chosen_version) { rv = conn_call_version_negotiation(conn, conn->negotiated_version, &conn->rcid); @@ -11339,7 +11385,7 @@ int ngtcp2_conn_set_remote_transport_params( } conn->local.transport_params.version_info.chosen_version = - conn->negotiated_version; + conn->negotiated_version; ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "the negotiated version is 0x%08x", @@ -11372,7 +11418,7 @@ int ngtcp2_conn_set_remote_transport_params( assert(!conn->remote.pending_transport_params); rv = ngtcp2_transport_params_copy_new( - &conn->remote.pending_transport_params, params, conn->mem); + &conn->remote.pending_transport_params, params, conn->mem); if (rv != 0) { return rv; } @@ -11424,9 +11470,9 @@ ngtcp2_ssize ngtcp2_conn_encode_0rtt_transport_params(ngtcp2_conn *conn, params.initial_max_streams_bidi = src->initial_max_streams_bidi; params.initial_max_streams_uni = src->initial_max_streams_uni; params.initial_max_stream_data_bidi_local = - src->initial_max_stream_data_bidi_local; + src->initial_max_stream_data_bidi_local; params.initial_max_stream_data_bidi_remote = - src->initial_max_stream_data_bidi_remote; + src->initial_max_stream_data_bidi_remote; params.initial_max_stream_data_uni = src->initial_max_stream_data_uni; params.initial_max_data = src->initial_max_data; params.active_connection_id_limit = src->active_connection_id_limit; @@ -11456,7 +11502,7 @@ int ngtcp2_conn_decode_and_set_0rtt_transport_params(ngtcp2_conn *conn, } int ngtcp2_conn_set_0rtt_remote_transport_params( - ngtcp2_conn *conn, const ngtcp2_transport_params *params) { + ngtcp2_conn *conn, const ngtcp2_transport_params *params) { ngtcp2_transport_params *p; assert(!conn->server); @@ -11475,41 +11521,41 @@ int ngtcp2_conn_set_0rtt_remote_transport_params( p->initial_max_streams_bidi = params->initial_max_streams_bidi; p->initial_max_streams_uni = params->initial_max_streams_uni; p->initial_max_stream_data_bidi_local = - params->initial_max_stream_data_bidi_local; + params->initial_max_stream_data_bidi_local; p->initial_max_stream_data_bidi_remote = - params->initial_max_stream_data_bidi_remote; + params->initial_max_stream_data_bidi_remote; p->initial_max_stream_data_uni = params->initial_max_stream_data_uni; p->initial_max_data = params->initial_max_data; /* we might hit garbage, then set the sane default. */ p->active_connection_id_limit = - ngtcp2_max(NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT, - params->active_connection_id_limit); + ngtcp2_max_uint64(NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT, + params->active_connection_id_limit); p->max_datagram_frame_size = params->max_datagram_frame_size; /* we might hit garbage, then set the sane default. */ if (params->max_udp_payload_size) { - p->max_udp_payload_size = - ngtcp2_max(NGTCP2_MAX_UDP_PAYLOAD_SIZE, params->max_udp_payload_size); + p->max_udp_payload_size = ngtcp2_max_uint64(NGTCP2_MAX_UDP_PAYLOAD_SIZE, + params->max_udp_payload_size); } /* These parameters are treated specially. If server accepts early data, it must not set values for these parameters that are smaller than these remembered values. */ conn->early.transport_params.initial_max_streams_bidi = - params->initial_max_streams_bidi; + params->initial_max_streams_bidi; conn->early.transport_params.initial_max_streams_uni = - params->initial_max_streams_uni; + params->initial_max_streams_uni; conn->early.transport_params.initial_max_stream_data_bidi_local = - params->initial_max_stream_data_bidi_local; + params->initial_max_stream_data_bidi_local; conn->early.transport_params.initial_max_stream_data_bidi_remote = - params->initial_max_stream_data_bidi_remote; + params->initial_max_stream_data_bidi_remote; conn->early.transport_params.initial_max_stream_data_uni = - params->initial_max_stream_data_uni; + params->initial_max_stream_data_uni; conn->early.transport_params.initial_max_data = params->initial_max_data; conn->early.transport_params.active_connection_id_limit = - params->active_connection_id_limit; + params->active_connection_id_limit; conn->early.transport_params.max_datagram_frame_size = - params->max_datagram_frame_size; + params->max_datagram_frame_size; conn_sync_stream_id_limit(conn); @@ -11522,12 +11568,12 @@ int ngtcp2_conn_set_0rtt_remote_transport_params( } int ngtcp2_conn_set_local_transport_params_versioned( - ngtcp2_conn *conn, int transport_params_version, - const ngtcp2_transport_params *params) { + ngtcp2_conn *conn, int transport_params_version, + const ngtcp2_transport_params *params) { ngtcp2_transport_params paramsbuf; params = ngtcp2_transport_params_convert_to_latest( - ¶msbuf, transport_params_version, params); + ¶msbuf, transport_params_version, params); assert(conn->server); assert(params->active_connection_id_limit >= @@ -11576,7 +11622,7 @@ int ngtcp2_conn_commit_local_transport_params(ngtcp2_conn *conn) { } conn->rx.window = conn->rx.unsent_max_offset = conn->rx.max_offset = - params->initial_max_data; + params->initial_max_data; conn->remote.bidi.unsent_max_streams = params->initial_max_streams_bidi; conn->remote.bidi.max_streams = params->initial_max_streams_bidi; conn->remote.uni.unsent_max_streams = params->initial_max_streams_uni; @@ -11662,10 +11708,10 @@ ngtcp2_strm *ngtcp2_conn_find_stream(ngtcp2_conn *conn, int64_t stream_id) { } ngtcp2_ssize ngtcp2_conn_write_stream_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, - uint32_t flags, int64_t stream_id, const uint8_t *data, size_t datalen, - ngtcp2_tstamp ts) { + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, + uint32_t flags, int64_t stream_id, const uint8_t *data, size_t datalen, + ngtcp2_tstamp ts) { ngtcp2_vec datav; datav.len = datalen; @@ -11693,9 +11739,10 @@ static ngtcp2_ssize conn_write_vmsg_wrapper(ngtcp2_conn *conn, if (cstat->bytes_in_flight >= cstat->cwnd) { conn->rst.is_cwnd_limited = 1; - } - - if (nwrite == 0 && cstat->bytes_in_flight < cstat->cwnd) { + } else if ((cstat->cwnd >= cstat->ssthresh || + cstat->bytes_in_flight * 2 < cstat->cwnd) && + nwrite == 0 && conn_pacing_pkt_tx_allowed(conn, ts) && + (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED)) { conn->rst.app_limited = conn->rst.delivered + cstat->bytes_in_flight; if (conn->rst.app_limited == 0) { @@ -11707,10 +11754,10 @@ static ngtcp2_ssize conn_write_vmsg_wrapper(ngtcp2_conn *conn, } ngtcp2_ssize ngtcp2_conn_writev_stream_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, - uint32_t flags, int64_t stream_id, const ngtcp2_vec *datav, size_t datavcnt, - ngtcp2_tstamp ts) { + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, + uint32_t flags, int64_t stream_id, const ngtcp2_vec *datav, size_t datavcnt, + ngtcp2_tstamp ts) { ngtcp2_vmsg vmsg, *pvmsg; ngtcp2_strm *strm; int64_t datalen; @@ -11734,19 +11781,24 @@ ngtcp2_ssize ngtcp2_conn_writev_stream_versioned( return NGTCP2_ERR_INVALID_ARGUMENT; } - if ((uint64_t)datalen > NGTCP2_MAX_VARINT - strm->tx.offset || - (uint64_t)datalen > NGTCP2_MAX_VARINT - conn->tx.offset) { - return NGTCP2_ERR_INVALID_ARGUMENT; - } + if (datalen == 0 && !(flags & NGTCP2_WRITE_STREAM_FLAG_FIN) && + (strm->flags & NGTCP2_STRM_FLAG_ANY_SENT)) { + pvmsg = NULL; + } else { + if ((uint64_t)datalen > NGTCP2_MAX_VARINT - strm->tx.offset || + (uint64_t)datalen > NGTCP2_MAX_VARINT - conn->tx.offset) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } - vmsg.type = NGTCP2_VMSG_TYPE_STREAM; - vmsg.stream.strm = strm; - vmsg.stream.flags = flags; - vmsg.stream.data = datav; - vmsg.stream.datacnt = datavcnt; - vmsg.stream.pdatalen = pdatalen; + vmsg.type = NGTCP2_VMSG_TYPE_STREAM; + vmsg.stream.strm = strm; + vmsg.stream.flags = flags; + vmsg.stream.data = datav; + vmsg.stream.datacnt = datavcnt; + vmsg.stream.pdatalen = pdatalen; - pvmsg = &vmsg; + pvmsg = &vmsg; + } } else { pvmsg = NULL; } @@ -11756,10 +11808,10 @@ ngtcp2_ssize ngtcp2_conn_writev_stream_versioned( } ngtcp2_ssize ngtcp2_conn_write_datagram_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, - uint32_t flags, uint64_t dgram_id, const uint8_t *data, size_t datalen, - ngtcp2_tstamp ts) { + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, + uint32_t flags, uint64_t dgram_id, const uint8_t *data, size_t datalen, + ngtcp2_tstamp ts) { ngtcp2_vec datav; datav.len = datalen; @@ -11771,10 +11823,10 @@ ngtcp2_ssize ngtcp2_conn_write_datagram_versioned( } ngtcp2_ssize ngtcp2_conn_writev_datagram_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, - uint32_t flags, uint64_t dgram_id, const ngtcp2_vec *datav, size_t datavcnt, - ngtcp2_tstamp ts) { + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, + uint32_t flags, uint64_t dgram_id, const ngtcp2_vec *datav, size_t datavcnt, + ngtcp2_tstamp ts) { ngtcp2_vmsg vmsg; int64_t datalen; @@ -11837,7 +11889,7 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, } origlen = destlen = - conn_shape_udp_payload(conn, &conn->dcid.current, destlen); + conn_shape_udp_payload(conn, &conn->dcid.current, destlen); if (!ppe_pending && pi) { pi->ecn = NGTCP2_ECN_NOT_ECT; @@ -11867,10 +11919,9 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, assert(dest[0] & NGTCP2_HEADER_FORM_BIT); assert(conn->negotiated_version); - if (ngtcp2_pkt_get_type_long(conn->negotiated_version, dest[0]) == - NGTCP2_PKT_INITIAL) { - /* We have added padding already, but in that case, there is no - space left to write 1RTT packet. */ + if (nwrite < NGTCP2_MAX_UDP_PAYLOAD_SIZE && + ngtcp2_pkt_get_type_long(conn->negotiated_version, dest[0]) == + NGTCP2_PKT_INITIAL) { wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; } @@ -11890,7 +11941,7 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, return 0; } - origlen = (size_t)ngtcp2_min((uint64_t)origlen, server_tx_left); + origlen = (size_t)ngtcp2_min_uint64((uint64_t)origlen, server_tx_left); } return conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts); @@ -11902,15 +11953,15 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, if (server_tx_left == 0) { if (cstat->loss_detection_timer != UINT64_MAX) { ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_LDC, - "loss detection timer canceled due to amplification limit"); - cstat->loss_detection_timer = UINT64_MAX; + &conn->log, NGTCP2_LOG_EVENT_LDC, + "loss detection timer canceled due to amplification limit"); + ngtcp2_conn_cancel_loss_detection_timer(conn); } return 0; } - destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left); + destlen = (size_t)ngtcp2_min_uint64((uint64_t)destlen, server_tx_left); } if (conn->in_pktns) { @@ -11931,14 +11982,12 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, dest += nwrite; destlen -= (size_t)nwrite; - if (conn->in_pktns && nwrite > 0) { + if (res < NGTCP2_MAX_UDP_PAYLOAD_SIZE && conn->in_pktns && nwrite > 0) { it = ngtcp2_rtb_head(&conn->in_pktns->rtb); if (!ngtcp2_ksl_it_end(&it)) { rtbent = ngtcp2_ksl_it_get(&it); if (rtbent->hd.pkt_num != prev_in_pkt_num && (rtbent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) { - /* We have added padding already, but in that case, there - is no space left to write 1RTT packet. */ wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; } } @@ -11959,7 +12008,7 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, return 0; } - origlen = (size_t)ngtcp2_min((uint64_t)origlen, server_tx_left); + origlen = (size_t)ngtcp2_min_uint64((uint64_t)origlen, server_tx_left); } return conn_write_ack_pkt(conn, pi, dest, origlen, NGTCP2_PKT_1RTT, ts); @@ -12004,12 +12053,12 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, } /* dest and destlen have already been adjusted in ppe in the first run. They are adjusted for probe packet later. */ - nwrite = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_1RTT, - wflags, ts); + nwrite = conn_write_pkt(conn, pi, dest, destlen, (size_t)res, vmsg, + NGTCP2_PKT_1RTT, wflags, ts); goto fin; } else { conn->pkt.require_padding = - (wflags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING); + (wflags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING); if (conn->state == NGTCP2_CS_POST_HANDSHAKE) { rv = conn_prepare_key_update(conn, ts); @@ -12023,14 +12072,14 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, } else { if (res == 0) { nwrite = - conn_write_path_response(conn, path, pi, dest, origdestlen, ts); + conn_write_path_response(conn, path, pi, dest, origdestlen, ts); if (nwrite) { goto fin; } if (conn->pv) { nwrite = - conn_write_path_challenge(conn, path, pi, dest, origdestlen, ts); + conn_write_path_challenge(conn, path, pi, dest, origdestlen, ts); if (nwrite) { goto fin; } @@ -12051,15 +12100,15 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, if (conn->server && !(conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) { server_tx_left = conn_server_tx_left(conn, &conn->dcid.current); - origlen = (size_t)ngtcp2_min((uint64_t)origlen, server_tx_left); - destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left); + origlen = (size_t)ngtcp2_min_uint64((uint64_t)origlen, server_tx_left); + destlen = (size_t)ngtcp2_min_uint64((uint64_t)destlen, server_tx_left); if (server_tx_left == 0 && conn->cstat.loss_detection_timer != UINT64_MAX) { ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_LDC, - "loss detection timer canceled due to amplification limit"); - conn->cstat.loss_detection_timer = UINT64_MAX; + &conn->log, NGTCP2_LOG_EVENT_LDC, + "loss detection timer canceled due to amplification limit"); + ngtcp2_conn_cancel_loss_detection_timer(conn); } } } @@ -12100,14 +12149,14 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, "transmit probe pkt left=%zu", conn->pktns.rtb.probe_pkt_left); - nwrite = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_1RTT, - wflags, ts); + nwrite = conn_write_pkt(conn, pi, dest, destlen, (size_t)res, vmsg, + NGTCP2_PKT_1RTT, wflags, ts); goto fin; } - nwrite = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_1RTT, - wflags, ts); + nwrite = conn_write_pkt(conn, pi, dest, destlen, (size_t)res, vmsg, + NGTCP2_PKT_1RTT, wflags, ts); if (nwrite) { assert(nwrite != NGTCP2_ERR_NOBUF); goto fin; @@ -12162,9 +12211,8 @@ conn_write_connection_close(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, pkt_type != NGTCP2_PKT_INITIAL) { if (in_pktns && conn->server) { nwrite = ngtcp2_conn_write_single_frame_pkt( - conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, - NGTCP2_WRITE_PKT_FLAG_NONE, &conn->dcid.current.cid, &fr, - NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, NGTCP2_WRITE_PKT_FLAG_NONE, + &conn->dcid.current.cid, &fr, NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); if (nwrite < 0) { return nwrite; } @@ -12177,9 +12225,9 @@ conn_write_connection_close(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, if (pkt_type != NGTCP2_PKT_HANDSHAKE && hs_pktns && hs_pktns->crypto.tx.ckm) { nwrite = ngtcp2_conn_write_single_frame_pkt( - conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, - NGTCP2_WRITE_PKT_FLAG_NONE, &conn->dcid.current.cid, &fr, - NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, + NGTCP2_WRITE_PKT_FLAG_NONE, &conn->dcid.current.cid, &fr, + NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); if (nwrite < 0) { return nwrite; } @@ -12195,8 +12243,8 @@ conn_write_connection_close(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, } nwrite = ngtcp2_conn_write_single_frame_pkt( - conn, pi, dest, destlen, pkt_type, flags, &conn->dcid.current.cid, &fr, - NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + conn, pi, dest, destlen, pkt_type, flags, &conn->dcid.current.cid, &fr, + NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); if (nwrite < 0) { return nwrite; @@ -12212,9 +12260,9 @@ conn_write_connection_close(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, } ngtcp2_ssize ngtcp2_conn_write_connection_close_pkt( - ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, - size_t destlen, uint64_t error_code, const uint8_t *reason, - size_t reasonlen, ngtcp2_tstamp ts) { + ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, uint64_t error_code, const uint8_t *reason, size_t reasonlen, + ngtcp2_tstamp ts) { ngtcp2_pktns *in_pktns = conn->in_pktns; ngtcp2_pktns *hs_pktns = conn->hs_pktns; uint8_t pkt_type; @@ -12247,7 +12295,7 @@ ngtcp2_ssize ngtcp2_conn_write_connection_close_pkt( if (conn->server) { server_tx_left = conn_server_tx_left(conn, &conn->dcid.current); - destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left); + destlen = (size_t)ngtcp2_min_uint64((uint64_t)destlen, server_tx_left); } if (conn->state == NGTCP2_CS_POST_HANDSHAKE || @@ -12275,9 +12323,9 @@ ngtcp2_ssize ngtcp2_conn_write_connection_close_pkt( } ngtcp2_ssize ngtcp2_conn_write_application_close_pkt( - ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, - size_t destlen, uint64_t app_error_code, const uint8_t *reason, - size_t reasonlen, ngtcp2_tstamp ts) { + ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, uint64_t app_error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_tstamp ts) { ngtcp2_ssize nwrite; ngtcp2_ssize res = 0; ngtcp2_frame fr; @@ -12309,15 +12357,14 @@ ngtcp2_ssize ngtcp2_conn_write_application_close_pkt( if (conn->server) { server_tx_left = conn_server_tx_left(conn, &conn->dcid.current); - destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left); + destlen = (size_t)ngtcp2_min_uint64((uint64_t)destlen, server_tx_left); } if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)) { - nwrite = conn_write_connection_close(conn, pi, dest, destlen, - conn->hs_pktns->crypto.tx.ckm - ? NGTCP2_PKT_HANDSHAKE - : NGTCP2_PKT_INITIAL, - NGTCP2_APPLICATION_ERROR, NULL, 0, ts); + nwrite = conn_write_connection_close( + conn, pi, dest, destlen, + conn->hs_pktns->crypto.tx.ckm ? NGTCP2_PKT_HANDSHAKE : NGTCP2_PKT_INITIAL, + NGTCP2_APPLICATION_ERROR, NULL, 0, ts); if (nwrite < 0) { return nwrite; } @@ -12326,12 +12373,9 @@ ngtcp2_ssize ngtcp2_conn_write_application_close_pkt( destlen -= (size_t)nwrite; } - if (conn->state != NGTCP2_CS_POST_HANDSHAKE) { - assert(res); - - if (!conn->server || !conn->pktns.crypto.tx.ckm) { - return res; - } + if (conn->state != NGTCP2_CS_POST_HANDSHAKE && + (!conn->server || !conn->pktns.crypto.tx.ckm)) { + return res; } assert(conn->pktns.crypto.tx.ckm); @@ -12343,8 +12387,8 @@ ngtcp2_ssize ngtcp2_conn_write_application_close_pkt( fr.connection_close.reason = (uint8_t *)reason; nwrite = ngtcp2_conn_write_single_frame_pkt( - conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE, - &conn->dcid.current.cid, &fr, NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE, + &conn->dcid.current.cid, &fr, NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); if (nwrite < 0) { return nwrite; @@ -12392,12 +12436,22 @@ void ngtcp2_ccerr_set_liberr(ngtcp2_ccerr *ccerr, int liberr, ccerr_init(ccerr, NGTCP2_CCERR_TYPE_IDLE_CLOSE, NGTCP2_NO_ERROR, reason, reasonlen); + return; + case NGTCP2_ERR_DROP_CONN: + ccerr_init(ccerr, NGTCP2_CCERR_TYPE_DROP_CONN, NGTCP2_NO_ERROR, reason, + reasonlen); + + return; + case NGTCP2_ERR_RETRY: + ccerr_init(ccerr, NGTCP2_CCERR_TYPE_RETRY, NGTCP2_NO_ERROR, reason, + reasonlen); + return; }; ngtcp2_ccerr_set_transport_error( - ccerr, ngtcp2_err_infer_quic_transport_error_code(liberr), reason, - reasonlen); + ccerr, ngtcp2_err_infer_quic_transport_error_code(liberr), reason, + reasonlen); } void ngtcp2_ccerr_set_tls_alert(ngtcp2_ccerr *ccerr, uint8_t tls_alert, @@ -12415,9 +12469,9 @@ void ngtcp2_ccerr_set_application_error(ngtcp2_ccerr *ccerr, } ngtcp2_ssize ngtcp2_conn_write_connection_close_versioned( - ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, - ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, - const ngtcp2_ccerr *ccerr, ngtcp2_tstamp ts) { + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, const ngtcp2_ccerr *ccerr, + ngtcp2_tstamp ts) { (void)pkt_info_version; conn_update_timestamp(conn, ts); @@ -12425,12 +12479,12 @@ ngtcp2_ssize ngtcp2_conn_write_connection_close_versioned( switch (ccerr->type) { case NGTCP2_CCERR_TYPE_TRANSPORT: return ngtcp2_conn_write_connection_close_pkt( - conn, path, pi, dest, destlen, ccerr->error_code, ccerr->reason, - ccerr->reasonlen, ts); + conn, path, pi, dest, destlen, ccerr->error_code, ccerr->reason, + ccerr->reasonlen, ts); case NGTCP2_CCERR_TYPE_APPLICATION: return ngtcp2_conn_write_application_close_pkt( - conn, path, pi, dest, destlen, ccerr->error_code, ccerr->reason, - ccerr->reasonlen, ts); + conn, path, pi, dest, destlen, ccerr->error_code, ccerr->reason, + ccerr->reasonlen, ts); default: return 0; } @@ -12471,7 +12525,7 @@ int ngtcp2_conn_close_stream(ngtcp2_conn *conn, ngtcp2_strm *strm) { int ngtcp2_conn_close_stream_if_shut_rdwr(ngtcp2_conn *conn, ngtcp2_strm *strm) { if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RDWR) == - NGTCP2_STRM_FLAG_SHUT_RDWR && + NGTCP2_STRM_FLAG_SHUT_RDWR && ((strm->flags & NGTCP2_STRM_FLAG_RESET_STREAM_RECVED) || ngtcp2_strm_rx_offset(strm) == strm->rx.last_offset) && (((strm->flags & NGTCP2_STRM_FLAG_RESET_STREAM) && @@ -12537,7 +12591,7 @@ static int conn_shutdown_stream_read(ngtcp2_conn *conn, ngtcp2_strm *strm, which are not passed to application. */ if (!(strm->flags & NGTCP2_STRM_FLAG_RESET_STREAM_RECVED)) { ngtcp2_conn_extend_max_offset(conn, strm->rx.last_offset - - ngtcp2_strm_rx_offset(strm)); + ngtcp2_strm_rx_offset(strm)); } strm->flags |= NGTCP2_STRM_FLAG_STOP_SENDING; @@ -12720,20 +12774,20 @@ static void conn_discard_early_data_state(ngtcp2_conn *conn) { ngtcp2_rtb_remove_early_data(&conn->pktns.rtb, &conn->cstat); - ngtcp2_map_each_free(&conn->strms, delete_strms_pq_each, conn); + ngtcp2_map_each(&conn->strms, delete_strms_pq_each, conn); ngtcp2_map_clear(&conn->strms); conn->tx.offset = 0; conn->tx.last_blocked_offset = UINT64_MAX; conn->rx.unsent_max_offset = conn->rx.max_offset = - conn->local.transport_params.initial_max_data; + conn->local.transport_params.initial_max_data; conn->remote.bidi.unsent_max_streams = conn->remote.bidi.max_streams = - conn->local.transport_params.initial_max_streams_bidi; + conn->local.transport_params.initial_max_streams_bidi; conn->remote.uni.unsent_max_streams = conn->remote.uni.max_streams = - conn->local.transport_params.initial_max_streams_uni; + conn->local.transport_params.initial_max_streams_uni; if (conn->server) { conn->local.bidi.next_stream_id = 1; @@ -12778,6 +12832,8 @@ int ngtcp2_conn_update_rtt(ngtcp2_conn *conn, ngtcp2_duration rtt, ngtcp2_duration ack_delay, ngtcp2_tstamp ts) { ngtcp2_conn_stat *cstat = &conn->cstat; + assert(rtt > 0); + if (cstat->min_rtt == UINT64_MAX) { cstat->latest_rtt = rtt; cstat->min_rtt = rtt; @@ -12788,43 +12844,43 @@ int ngtcp2_conn_update_rtt(ngtcp2_conn *conn, ngtcp2_duration rtt, if (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) { assert(conn->remote.transport_params); - ack_delay = - ngtcp2_min(ack_delay, conn->remote.transport_params->max_ack_delay); + ack_delay = ngtcp2_min_uint64( + ack_delay, conn->remote.transport_params->max_ack_delay); } else if (ack_delay > 0 && rtt >= cstat->min_rtt && rtt < cstat->min_rtt + ack_delay) { /* Ignore RTT sample if adjusting ack_delay causes the sample less than min_rtt before handshake confirmation. */ ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_LDC, - "ignore rtt sample because ack_delay is too large latest_rtt=%" PRIu64 - " min_rtt=%" PRIu64 " ack_delay=%" PRIu64, - rtt / NGTCP2_MILLISECONDS, cstat->min_rtt / NGTCP2_MILLISECONDS, - ack_delay / NGTCP2_MILLISECONDS); + &conn->log, NGTCP2_LOG_EVENT_LDC, + "ignore rtt sample because ack_delay is too large latest_rtt=%" PRIu64 + " min_rtt=%" PRIu64 " ack_delay=%" PRIu64, + rtt / NGTCP2_MILLISECONDS, cstat->min_rtt / NGTCP2_MILLISECONDS, + ack_delay / NGTCP2_MILLISECONDS); return NGTCP2_ERR_INVALID_ARGUMENT; } cstat->latest_rtt = rtt; - cstat->min_rtt = ngtcp2_min(cstat->min_rtt, rtt); + cstat->min_rtt = ngtcp2_min_uint64(cstat->min_rtt, rtt); if (rtt >= cstat->min_rtt + ack_delay) { rtt -= ack_delay; } cstat->rttvar = (cstat->rttvar * 3 + (cstat->smoothed_rtt < rtt - ? rtt - cstat->smoothed_rtt - : cstat->smoothed_rtt - rtt)) / + ? rtt - cstat->smoothed_rtt + : cstat->smoothed_rtt - rtt)) / 4; cstat->smoothed_rtt = (cstat->smoothed_rtt * 7 + rtt) / 8; } ngtcp2_log_info( - &conn->log, NGTCP2_LOG_EVENT_LDC, - "latest_rtt=%" PRIu64 " min_rtt=%" PRIu64 " smoothed_rtt=%" PRIu64 - " rttvar=%" PRIu64 " ack_delay=%" PRIu64, - cstat->latest_rtt / NGTCP2_MILLISECONDS, - cstat->min_rtt / NGTCP2_MILLISECONDS, - cstat->smoothed_rtt / NGTCP2_MILLISECONDS, - cstat->rttvar / NGTCP2_MILLISECONDS, ack_delay / NGTCP2_MILLISECONDS); + &conn->log, NGTCP2_LOG_EVENT_LDC, + "latest_rtt=%" PRIu64 " min_rtt=%" PRIu64 " smoothed_rtt=%" PRIu64 + " rttvar=%" PRIu64 " ack_delay=%" PRIu64, + cstat->latest_rtt / NGTCP2_MILLISECONDS, + cstat->min_rtt / NGTCP2_MILLISECONDS, + cstat->smoothed_rtt / NGTCP2_MILLISECONDS, + cstat->rttvar / NGTCP2_MILLISECONDS, ack_delay / NGTCP2_MILLISECONDS); return 0; } @@ -12879,8 +12935,8 @@ static ngtcp2_tstamp conn_get_earliest_pto_expiry(ngtcp2_conn *conn, ngtcp2_conn_stat *cstat = &conn->cstat; ngtcp2_tstamp *times = cstat->last_tx_pkt_ts; ngtcp2_duration duration = - compute_pto(cstat->smoothed_rtt, cstat->rttvar, /* max_ack_delay = */ 0) * - (1ULL << cstat->pto_count); + compute_pto(cstat->smoothed_rtt, cstat->rttvar, /* max_ack_delay = */ 0) * + (1ULL << cstat->pto_count); for (i = NGTCP2_PKTNS_ID_INITIAL; i < NGTCP2_PKTNS_ID_MAX; ++i) { if (ns[i] == NULL || ns[i]->rtb.num_pto_eliciting == 0 || @@ -12939,8 +12995,7 @@ void ngtcp2_conn_set_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts) { if (cstat->loss_detection_timer != UINT64_MAX) { ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_LDC, "loss detection timer canceled"); - cstat->loss_detection_timer = UINT64_MAX; - cstat->pto_count = 0; + ngtcp2_conn_cancel_loss_detection_timer(conn); } return; } @@ -12948,13 +13003,20 @@ void ngtcp2_conn_set_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts) { cstat->loss_detection_timer = conn_get_earliest_pto_expiry(conn, ts); timeout = - cstat->loss_detection_timer > ts ? cstat->loss_detection_timer - ts : 0; + cstat->loss_detection_timer > ts ? cstat->loss_detection_timer - ts : 0; ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_LDC, "loss_detection_timer=%" PRIu64 " timeout=%" PRIu64, cstat->loss_detection_timer, timeout / NGTCP2_MILLISECONDS); } +void ngtcp2_conn_cancel_loss_detection_timer(ngtcp2_conn *conn) { + ngtcp2_conn_stat *cstat = &conn->cstat; + + cstat->loss_detection_timer = UINT64_MAX; + cstat->pto_count = 0; +} + int ngtcp2_conn_on_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts) { ngtcp2_conn_stat *cstat = &conn->cstat; int rv; @@ -12966,8 +13028,7 @@ int ngtcp2_conn_on_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts) { switch (conn->state) { case NGTCP2_CS_CLOSING: case NGTCP2_CS_DRAINING: - cstat->loss_detection_timer = UINT64_MAX; - cstat->pto_count = 0; + ngtcp2_conn_cancel_loss_detection_timer(conn); return 0; default: break; @@ -13042,7 +13103,8 @@ static int conn_buffer_crypto_data(ngtcp2_conn *conn, const uint8_t **pdata, } if (!*pbufchain) { - rv = ngtcp2_buf_chain_new(pbufchain, ngtcp2_max(1024, datalen), conn->mem); + rv = ngtcp2_buf_chain_new(pbufchain, ngtcp2_max_size(1024, datalen), + conn->mem); if (rv != 0) { return rv; } @@ -13125,7 +13187,7 @@ int ngtcp2_conn_submit_new_token(ngtcp2_conn *conn, const uint8_t *token, assert(tokenlen); rv = ngtcp2_frame_chain_new_token_objalloc_new( - &nfrc, token, tokenlen, &conn->frc_objalloc, conn->mem); + &nfrc, token, tokenlen, &conn->frc_objalloc, conn->mem); if (rv != 0) { return rv; } @@ -13208,7 +13270,7 @@ static void copy_dcid_to_cid_token(ngtcp2_cid_token *dest, dest->cid = src->cid; ngtcp2_path_storage_init2(&dest->ps, &src->ps.path); if ((dest->token_present = - (src->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) != 0)) { + (src->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) != 0)) { memcpy(dest->token, src->token, NGTCP2_STATELESS_RESET_TOKENLEN); } } @@ -13411,8 +13473,8 @@ uint64_t ngtcp2_conn_get_streams_bidi_left(ngtcp2_conn *conn) { uint64_t n = ngtcp2_ord_stream_id(conn->local.bidi.next_stream_id); return n > conn->local.bidi.max_streams - ? 0 - : conn->local.bidi.max_streams - n + 1; + ? 0 + : conn->local.bidi.max_streams - n + 1; } uint64_t ngtcp2_conn_get_streams_uni_left(ngtcp2_conn *conn) { @@ -13444,7 +13506,7 @@ ngtcp2_tstamp ngtcp2_conn_get_idle_expiry(ngtcp2_conn *conn) { conn->remote.transport_params->max_idle_timeout == 0 || (conn->local.transport_params.max_idle_timeout && conn->local.transport_params.max_idle_timeout < - conn->remote.transport_params->max_idle_timeout)) { + conn->remote.transport_params->max_idle_timeout)) { idle_timeout = conn->local.transport_params.max_idle_timeout; } else { idle_timeout = conn->remote.transport_params->max_idle_timeout; @@ -13455,10 +13517,10 @@ ngtcp2_tstamp ngtcp2_conn_get_idle_expiry(ngtcp2_conn *conn) { } trpto = 3 * conn_compute_pto(conn, conn_is_tls_handshake_completed(conn) - ? &conn->pktns - : conn->hs_pktns); + ? &conn->pktns + : conn->hs_pktns); - idle_timeout = ngtcp2_max(idle_timeout, trpto); + idle_timeout = ngtcp2_max_uint64(idle_timeout, trpto); if (conn->idle_ts >= UINT64_MAX - idle_timeout) { return UINT64_MAX; @@ -13469,8 +13531,8 @@ ngtcp2_tstamp ngtcp2_conn_get_idle_expiry(ngtcp2_conn *conn) { ngtcp2_duration ngtcp2_conn_get_pto(ngtcp2_conn *conn) { return conn_compute_pto(conn, conn_is_tls_handshake_completed(conn) - ? &conn->pktns - : conn->hs_pktns); + ? &conn->pktns + : conn->hs_pktns); } void ngtcp2_conn_set_initial_crypto_ctx(ngtcp2_conn *conn, @@ -13581,8 +13643,8 @@ void ngtcp2_conn_update_pkt_tx_time(ngtcp2_conn *conn, ngtcp2_tstamp ts) { /* 1.25 is the under-utilization avoidance factor described in https://datatracker.ietf.org/doc/html/rfc9002#section-7.7 */ pacing_interval = (conn->cstat.first_rtt_sample_ts == UINT64_MAX - ? NGTCP2_MILLISECONDS - : conn->cstat.smoothed_rtt) * + ? NGTCP2_MILLISECONDS + : conn->cstat.smoothed_rtt) * 100 / 125 / conn->cstat.cwnd; } @@ -13597,20 +13659,11 @@ size_t ngtcp2_conn_get_send_quantum(ngtcp2_conn *conn) { } int ngtcp2_conn_track_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq) { - size_t i; - if (conn->dcid.retire_unacked.len >= ngtcp2_arraylen(conn->dcid.retire_unacked.seqs)) { return NGTCP2_ERR_CONNECTION_ID_LIMIT; } - /* Make sure that we do not have a duplicate */ - for (i = 0; i < conn->dcid.retire_unacked.len; ++i) { - if (conn->dcid.retire_unacked.seqs[i] == seq) { - ngtcp2_unreachable(); - } - } - conn->dcid.retire_unacked.seqs[conn->dcid.retire_unacked.len++] = seq; return 0; @@ -13626,7 +13679,7 @@ void ngtcp2_conn_untrack_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq) { if (i != conn->dcid.retire_unacked.len - 1) { conn->dcid.retire_unacked.seqs[i] = - conn->dcid.retire_unacked.seqs[conn->dcid.retire_unacked.len - 1]; + conn->dcid.retire_unacked.seqs[conn->dcid.retire_unacked.len - 1]; } --conn->dcid.retire_unacked.len; @@ -13635,6 +13688,18 @@ void ngtcp2_conn_untrack_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq) { } } +int ngtcp2_conn_check_retired_dcid_tracked(ngtcp2_conn *conn, uint64_t seq) { + size_t i; + + for (i = 0; i < conn->dcid.retire_unacked.len; ++i) { + if (conn->dcid.retire_unacked.seqs[i] == seq) { + return 1; + } + } + + return 0; +} + size_t ngtcp2_conn_get_stream_loss_count(ngtcp2_conn *conn, int64_t stream_id) { ngtcp2_strm *strm = ngtcp2_conn_find_stream(conn, stream_id); @@ -13652,56 +13717,17 @@ void ngtcp2_path_challenge_entry_init(ngtcp2_path_challenge_entry *pcent, memcpy(pcent->data, data, sizeof(pcent->data)); } -void ngtcp2_settings_default_versioned(int settings_version, - ngtcp2_settings *settings) { - (void)settings_version; - - memset(settings, 0, sizeof(*settings)); - settings->cc_algo = NGTCP2_CC_ALGO_CUBIC; - settings->initial_rtt = NGTCP2_DEFAULT_INITIAL_RTT; - settings->ack_thresh = 2; - settings->max_tx_udp_payload_size = 1500 - 48; - settings->handshake_timeout = UINT64_MAX; -} - -void ngtcp2_transport_params_default_versioned( - int transport_params_version, ngtcp2_transport_params *params) { - size_t len; - - switch (transport_params_version) { - case NGTCP2_TRANSPORT_PARAMS_VERSION: - len = sizeof(*params); - - break; - default: - ngtcp2_unreachable(); - } - - memset(params, 0, len); - - switch (transport_params_version) { - case NGTCP2_TRANSPORT_PARAMS_VERSION: - params->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE; - params->active_connection_id_limit = - NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT; - params->ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT; - params->max_ack_delay = NGTCP2_DEFAULT_MAX_ACK_DELAY; - - break; - } -} - /* The functions prefixed with ngtcp2_pkt_ are usually put inside ngtcp2_pkt.c. This function uses encryption construct and uses test data defined only in ngtcp2_conn_test.c, so it is written here. */ ngtcp2_ssize ngtcp2_pkt_write_connection_close( - uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, - const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, - size_t reasonlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, - const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, - ngtcp2_hp_mask hp_mask, const ngtcp2_crypto_cipher *hp, - const ngtcp2_crypto_cipher_ctx *hp_ctx) { + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, + ngtcp2_hp_mask hp_mask, const ngtcp2_crypto_cipher *hp, + const ngtcp2_crypto_cipher_ctx *hp_ctx) { ngtcp2_pkt_hd hd; ngtcp2_crypto_km ckm; ngtcp2_crypto_cc cc; @@ -13726,7 +13752,7 @@ ngtcp2_ssize ngtcp2_pkt_write_connection_close( cc.encrypt = encrypt; cc.hp_mask = hp_mask; - ngtcp2_ppe_init(&ppe, dest, destlen, &cc); + ngtcp2_ppe_init(&ppe, dest, destlen, 0, &cc); rv = ngtcp2_ppe_encode_hd(&ppe, &hd); if (rv != 0) { diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn.h index 4ed67876bc3749..55073fcc828d73 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -65,9 +65,6 @@ typedef enum { NGTCP2_CS_DRAINING, } ngtcp2_conn_state; -/* NGTCP2_MAX_STREAMS is the maximum number of streams. */ -#define NGTCP2_MAX_STREAMS (1LL << 60) - /* NGTCP2_MAX_NUM_BUFFED_RX_PKTS is the maximum number of buffered reordered packets. */ #define NGTCP2_MAX_NUM_BUFFED_RX_PKTS 4 @@ -77,15 +74,6 @@ typedef enum { unreceived data. */ #define NGTCP2_MAX_REORDERED_CRYPTO_DATA 65536 -/* NGTCP2_MAX_RX_INITIAL_CRYPTO_DATA is the maximum offset of received - crypto stream in Initial packet. We set this hard limit here - because crypto stream is unbounded. */ -#define NGTCP2_MAX_RX_INITIAL_CRYPTO_DATA 65536 -/* NGTCP2_MAX_RX_HANDSHAKE_CRYPTO_DATA is the maximum offset of - received crypto stream in Handshake packet. We set this hard limit - here because crypto stream is unbounded. */ -#define NGTCP2_MAX_RX_HANDSHAKE_CRYPTO_DATA 65536 - /* NGTCP2_MAX_RETRIES is the number of Retry packet which client can accept. */ #define NGTCP2_MAX_RETRIES 3 @@ -121,12 +109,18 @@ typedef enum { /* NGTCP2_WRITE_PKT_FLAG_NONE indicates that no flag is set. */ #define NGTCP2_WRITE_PKT_FLAG_NONE 0x00u /* NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING indicates that packet other - than Initial packet should be padded. Initial packet might be - padded based on QUIC requirement regardless of this flag. */ + than Initial packet should be padded so that UDP datagram payload + is at least NGTCP2_MAX_UDP_PAYLOAD_SIZE bytes. Initial packet + might be padded based on QUIC requirement regardless of this + flag. */ #define NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING 0x01u /* NGTCP2_WRITE_PKT_FLAG_MORE indicates that more frames might come and it should be encoded into the current packet. */ #define NGTCP2_WRITE_PKT_FLAG_MORE 0x02u +/* NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING_FULL is just like + NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING, but it requests to add + padding to the full UDP datagram payload size. */ +#define NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING_FULL 0x04u /* * ngtcp2_max_frame is defined so that it covers the largest ACK @@ -319,6 +313,7 @@ typedef struct ngtcp2_pktns { ngtcp2_acktr acktr; ngtcp2_rtb rtb; + ngtcp2_pktns_id id; } ngtcp2_pktns; typedef enum ngtcp2_ecn_state { @@ -342,15 +337,15 @@ typedef struct ngtcp2_early_transport_params { } ngtcp2_early_transport_params; ngtcp2_static_ringbuf_def(dcid_bound, NGTCP2_MAX_BOUND_DCID_POOL_SIZE, - sizeof(ngtcp2_dcid)); + sizeof(ngtcp2_dcid)) ngtcp2_static_ringbuf_def(dcid_unused, NGTCP2_MAX_DCID_POOL_SIZE, - sizeof(ngtcp2_dcid)); + sizeof(ngtcp2_dcid)) ngtcp2_static_ringbuf_def(dcid_retired, NGTCP2_MAX_DCID_RETIRED_SIZE, - sizeof(ngtcp2_dcid)); + sizeof(ngtcp2_dcid)) ngtcp2_static_ringbuf_def(path_challenge, 4, - sizeof(ngtcp2_path_challenge_entry)); + sizeof(ngtcp2_path_challenge_entry)) -ngtcp2_objalloc_decl(strm, ngtcp2_strm, oplent); +ngtcp2_objalloc_decl(strm, ngtcp2_strm, oplent) struct ngtcp2_conn { ngtcp2_objalloc frc_objalloc; @@ -813,6 +808,8 @@ int ngtcp2_conn_update_rtt(ngtcp2_conn *conn, ngtcp2_duration rtt, void ngtcp2_conn_set_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts); +void ngtcp2_conn_cancel_loss_detection_timer(ngtcp2_conn *conn); + int ngtcp2_conn_on_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts); /* @@ -878,9 +875,9 @@ ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, * User-defined callback function failed. */ ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt( - ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, - uint8_t type, uint8_t flags, const ngtcp2_cid *dcid, ngtcp2_frame *fr, - uint16_t rtb_entry_flags, const ngtcp2_path *path, ngtcp2_tstamp ts); + ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, + uint8_t type, uint8_t flags, const ngtcp2_cid *dcid, ngtcp2_frame *fr, + uint16_t rtb_entry_flags, const ngtcp2_path *path, ngtcp2_tstamp ts); /* * ngtcp2_conn_commit_local_transport_params commits the local @@ -973,6 +970,12 @@ int ngtcp2_conn_track_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq); */ void ngtcp2_conn_untrack_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq); +/* + * ngtcp2_conn_check_retired_dcid_tracked returns nonzero if |seq| has + * already been tracked. + */ +int ngtcp2_conn_check_retired_dcid_tracked(ngtcp2_conn *conn, uint64_t seq); + /* * ngtcp2_conn_server_negotiate_version negotiates QUIC version. It * is compatible version negotiation. It returns the negotiated QUIC @@ -1020,9 +1023,9 @@ ngtcp2_conn_server_negotiate_version(ngtcp2_conn *conn, * User callback failed */ ngtcp2_ssize ngtcp2_conn_write_connection_close_pkt( - ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, - size_t destlen, uint64_t error_code, const uint8_t *reason, - size_t reasonlen, ngtcp2_tstamp ts); + ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, uint64_t error_code, const uint8_t *reason, size_t reasonlen, + ngtcp2_tstamp ts); /** * @function @@ -1066,9 +1069,9 @@ ngtcp2_ssize ngtcp2_conn_write_connection_close_pkt( * User callback failed */ ngtcp2_ssize ngtcp2_conn_write_application_close_pkt( - ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, - size_t destlen, uint64_t app_error_code, const uint8_t *reason, - size_t reasonlen, ngtcp2_tstamp ts); + ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, uint64_t app_error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_tstamp ts); int ngtcp2_conn_start_pmtud(ngtcp2_conn *conn); @@ -1093,7 +1096,7 @@ void ngtcp2_conn_stop_pmtud(ngtcp2_conn *conn); * Out of memory. */ int ngtcp2_conn_set_remote_transport_params( - ngtcp2_conn *conn, const ngtcp2_transport_params *params); + ngtcp2_conn *conn, const ngtcp2_transport_params *params); /** * @function @@ -1132,7 +1135,7 @@ int ngtcp2_conn_set_remote_transport_params( * Out of memory. */ int ngtcp2_conn_set_0rtt_remote_transport_params( - ngtcp2_conn *conn, const ngtcp2_transport_params *params); + ngtcp2_conn *conn, const ngtcp2_transport_params *params); /* * ngtcp2_conn_create_ack_frame creates ACK frame, and assigns its @@ -1156,4 +1159,16 @@ int ngtcp2_conn_create_ack_frame(ngtcp2_conn *conn, ngtcp2_frame **pfr, ngtcp2_tstamp ts, ngtcp2_duration ack_delay, uint64_t ack_delay_exponent); -#endif /* NGTCP2_CONN_H */ +/* + * ngtcp2_conn_discard_initial_state discards state for Initial packet + * number space. + */ +void ngtcp2_conn_discard_initial_state(ngtcp2_conn *conn, ngtcp2_tstamp ts); + +/* + * ngtcp2_conn_discard_handshake_state discards state for Handshake + * packet number space. + */ +void ngtcp2_conn_discard_handshake_state(ngtcp2_conn *conn, ngtcp2_tstamp ts); + +#endif /* !defined(NGTCP2_CONN_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn_stat.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn_stat.h index 1a93867aab3cae..ad2b7329f48df2 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn_stat.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conn_stat.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -105,8 +105,9 @@ typedef struct ngtcp2_conn_stat { uint64_t bytes_in_flight; /** * :member:`max_tx_udp_payload_size` is the maximum size of UDP - * datagram payload that this endpoint transmits. It is used by - * congestion controller to compute congestion window. + * datagram payload that this endpoint transmits to the current + * path. It is used by congestion controller to compute congestion + * window. */ size_t max_tx_udp_payload_size; /** @@ -129,4 +130,4 @@ typedef struct ngtcp2_conn_stat { size_t send_quantum; } ngtcp2_conn_stat; -#endif /* NGTCP2_CONN_STAT_H */ +#endif /* !defined(NGTCP2_CONN_STAT_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conv.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conv.c index 336721772b4e4c..6528011cc0edf4 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conv.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conv.c @@ -32,47 +32,37 @@ #include "ngtcp2_net.h" #include "ngtcp2_unreachable.h" -const uint8_t *ngtcp2_get_uint64(uint64_t *dest, const uint8_t *p) { - uint64_t n; - memcpy(&n, p, sizeof(n)); - *dest = ngtcp2_ntohl64(n); - return p + sizeof(n); -} - -const uint8_t *ngtcp2_get_uint48(uint64_t *dest, const uint8_t *p) { - uint64_t n = 0; - memcpy(((uint8_t *)&n) + 2, p, 6); - *dest = ngtcp2_ntohl64(n); - return p + 6; +const uint8_t *ngtcp2_get_uint64be(uint64_t *dest, const uint8_t *p) { + memcpy(dest, p, sizeof(*dest)); + *dest = ngtcp2_ntohl64(*dest); + return p + sizeof(*dest); } -const uint8_t *ngtcp2_get_uint32(uint32_t *dest, const uint8_t *p) { - uint32_t n; - memcpy(&n, p, sizeof(n)); - *dest = ngtcp2_ntohl(n); - return p + sizeof(n); +const uint8_t *ngtcp2_get_uint32be(uint32_t *dest, const uint8_t *p) { + memcpy(dest, p, sizeof(*dest)); + *dest = ngtcp2_ntohl(*dest); + return p + sizeof(*dest); } -const uint8_t *ngtcp2_get_uint24(uint32_t *dest, const uint8_t *p) { - uint32_t n = 0; - memcpy(((uint8_t *)&n) + 1, p, 3); - *dest = ngtcp2_ntohl(n); +const uint8_t *ngtcp2_get_uint24be(uint32_t *dest, const uint8_t *p) { + *dest = 0; + memcpy(((uint8_t *)dest) + 1, p, 3); + *dest = ngtcp2_ntohl(*dest); return p + 3; } -const uint8_t *ngtcp2_get_uint16(uint16_t *dest, const uint8_t *p) { - uint16_t n; - memcpy(&n, p, sizeof(n)); - *dest = ngtcp2_ntohs(n); - return p + sizeof(n); +const uint8_t *ngtcp2_get_uint16be(uint16_t *dest, const uint8_t *p) { + memcpy(dest, p, sizeof(*dest)); + *dest = ngtcp2_ntohs(*dest); + return p + sizeof(*dest); } -const uint8_t *ngtcp2_get_uint16be(uint16_t *dest, const uint8_t *p) { +const uint8_t *ngtcp2_get_uint16(uint16_t *dest, const uint8_t *p) { memcpy(dest, p, sizeof(*dest)); return p + sizeof(*dest); } -static uint64_t get_uvarint(size_t *plen, const uint8_t *p) { +static const uint8_t *get_uvarint(uint64_t *dest, const uint8_t *p) { union { uint8_t n8; uint16_t n16; @@ -80,42 +70,39 @@ static uint64_t get_uvarint(size_t *plen, const uint8_t *p) { uint64_t n64; } n; - *plen = (size_t)(1u << (*p >> 6)); - - switch (*plen) { + switch (*p >> 6) { + case 0: + *dest = *p++; + return p; case 1: - return *p; - case 2: memcpy(&n, p, 2); n.n8 &= 0x3f; - return ngtcp2_ntohs(n.n16); - case 4: + *dest = ngtcp2_ntohs(n.n16); + + return p + 2; + case 2: memcpy(&n, p, 4); n.n8 &= 0x3f; - return ngtcp2_ntohl(n.n32); - case 8: + *dest = ngtcp2_ntohl(n.n32); + + return p + 4; + case 3: memcpy(&n, p, 8); n.n8 &= 0x3f; - return ngtcp2_ntohl64(n.n64); + *dest = ngtcp2_ntohl64(n.n64); + + return p + 8; default: ngtcp2_unreachable(); } } const uint8_t *ngtcp2_get_uvarint(uint64_t *dest, const uint8_t *p) { - size_t len; - - *dest = get_uvarint(&len, p); - - return p + len; + return get_uvarint(dest, p); } const uint8_t *ngtcp2_get_varint(int64_t *dest, const uint8_t *p) { - size_t len; - - *dest = (int64_t)get_uvarint(&len, p); - - return p + len; + return get_uvarint((uint64_t *)dest, p); } int64_t ngtcp2_get_pkt_num(const uint8_t *p, size_t pkt_numlen) { @@ -126,13 +113,13 @@ int64_t ngtcp2_get_pkt_num(const uint8_t *p, size_t pkt_numlen) { case 1: return *p; case 2: - ngtcp2_get_uint16(&s, p); + ngtcp2_get_uint16be(&s, p); return (int64_t)s; case 3: - ngtcp2_get_uint24(&l, p); + ngtcp2_get_uint24be(&l, p); return (int64_t)l; case 4: - ngtcp2_get_uint32(&l, p); + ngtcp2_get_uint32be(&l, p); return (int64_t)l; default: ngtcp2_unreachable(); @@ -144,11 +131,6 @@ uint8_t *ngtcp2_put_uint64be(uint8_t *p, uint64_t n) { return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n)); } -uint8_t *ngtcp2_put_uint48be(uint8_t *p, uint64_t n) { - n = ngtcp2_htonl64(n); - return ngtcp2_cpymem(p, ((const uint8_t *)&n) + 2, 6); -} - uint8_t *ngtcp2_put_uint32be(uint8_t *p, uint32_t n) { n = ngtcp2_htonl(n); return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n)); @@ -207,14 +189,11 @@ uint8_t *ngtcp2_put_pkt_num(uint8_t *p, int64_t pkt_num, size_t len) { *p++ = (uint8_t)pkt_num; return p; case 2: - ngtcp2_put_uint16be(p, (uint16_t)pkt_num); - return p + 2; + return ngtcp2_put_uint16be(p, (uint16_t)pkt_num); case 3: - ngtcp2_put_uint24be(p, (uint32_t)pkt_num); - return p + 3; + return ngtcp2_put_uint24be(p, (uint32_t)pkt_num); case 4: - ngtcp2_put_uint32be(p, (uint32_t)pkt_num); - return p + 4; + return ngtcp2_put_uint32be(p, (uint32_t)pkt_num); default: ngtcp2_unreachable(); } @@ -238,54 +217,6 @@ size_t ngtcp2_put_uvarintlen(uint64_t n) { return 8; } -int64_t ngtcp2_nth_server_bidi_id(uint64_t n) { - if (n == 0) { - return 0; - } - - if ((NGTCP2_MAX_VARINT >> 2) < n - 1) { - return NGTCP2_MAX_SERVER_STREAM_ID_BIDI; - } - - return (int64_t)(((n - 1) << 2) | 0x01); -} - -int64_t ngtcp2_nth_client_bidi_id(uint64_t n) { - if (n == 0) { - return 0; - } - - if ((NGTCP2_MAX_VARINT >> 2) < n - 1) { - return NGTCP2_MAX_CLIENT_STREAM_ID_BIDI; - } - - return (int64_t)((n - 1) << 2); -} - -int64_t ngtcp2_nth_server_uni_id(uint64_t n) { - if (n == 0) { - return 0; - } - - if ((NGTCP2_MAX_VARINT >> 2) < n - 1) { - return NGTCP2_MAX_SERVER_STREAM_ID_UNI; - } - - return (int64_t)(((n - 1) << 2) | 0x03); -} - -int64_t ngtcp2_nth_client_uni_id(uint64_t n) { - if (n == 0) { - return 0; - } - - if ((NGTCP2_MAX_VARINT >> 2) < n - 1) { - return NGTCP2_MAX_CLIENT_STREAM_ID_UNI; - } - - return (int64_t)(((n - 1) << 2) | 0x02); -} - uint64_t ngtcp2_ord_stream_id(int64_t stream_id) { return (uint64_t)(stream_id >> 2) + 1; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conv.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conv.h index ef089a971a37f1..ad924683b8dc10 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conv.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conv.h @@ -27,51 +27,44 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include /* - * ngtcp2_get_uint64 reads 8 bytes from |p| as 64 bits unsigned + * ngtcp2_get_uint64be reads 8 bytes from |p| as 64 bits unsigned * integer encoded as network byte order, and stores it in the buffer * pointed by |dest| in host byte order. It returns |p| + 8. */ -const uint8_t *ngtcp2_get_uint64(uint64_t *dest, const uint8_t *p); +const uint8_t *ngtcp2_get_uint64be(uint64_t *dest, const uint8_t *p); /* - * ngtcp2_get_uint48 reads 6 bytes from |p| as 48 bits unsigned - * integer encoded as network byte order, and stores it in the buffer - * pointed by |dest| in host byte order. It returns |p| + 6. - */ -const uint8_t *ngtcp2_get_uint48(uint64_t *dest, const uint8_t *p); - -/* - * ngtcp2_get_uint32 reads 4 bytes from |p| as 32 bits unsigned + * ngtcp2_get_uint32be reads 4 bytes from |p| as 32 bits unsigned * integer encoded as network byte order, and stores it in the buffer * pointed by |dest| in host byte order. It returns |p| + 4. */ -const uint8_t *ngtcp2_get_uint32(uint32_t *dest, const uint8_t *p); +const uint8_t *ngtcp2_get_uint32be(uint32_t *dest, const uint8_t *p); /* - * ngtcp2_get_uint24 reads 3 bytes from |p| as 24 bits unsigned + * ngtcp2_get_uint24be reads 3 bytes from |p| as 24 bits unsigned * integer encoded as network byte order, and stores it in the buffer * pointed by |dest| in host byte order. It returns |p| + 3. */ -const uint8_t *ngtcp2_get_uint24(uint32_t *dest, const uint8_t *p); +const uint8_t *ngtcp2_get_uint24be(uint32_t *dest, const uint8_t *p); /* - * ngtcp2_get_uint16 reads 2 bytes from |p| as 16 bits unsigned + * ngtcp2_get_uint16be reads 2 bytes from |p| as 16 bits unsigned * integer encoded as network byte order, and stores it in the buffer * pointed by |dest| in host byte order. It returns |p| + 2. */ -const uint8_t *ngtcp2_get_uint16(uint16_t *dest, const uint8_t *p); +const uint8_t *ngtcp2_get_uint16be(uint16_t *dest, const uint8_t *p); /* - * ngtcp2_get_uint16be reads 2 bytes from |p| as 16 bits unsigned + * ngtcp2_get_uint16 reads 2 bytes from |p| as 16 bits unsigned * integer encoded as network byte order, and stores it in the buffer * pointed by |dest| as is. It returns |p| + 2. */ -const uint8_t *ngtcp2_get_uint16be(uint16_t *dest, const uint8_t *p); +const uint8_t *ngtcp2_get_uint16(uint16_t *dest, const uint8_t *p); /* * ngtcp2_get_uvarint reads variable-length unsigned integer from |p|, @@ -102,13 +95,6 @@ int64_t ngtcp2_get_pkt_num(const uint8_t *p, size_t pkt_numlen); */ uint8_t *ngtcp2_put_uint64be(uint8_t *p, uint64_t n); -/* - * ngtcp2_put_uint48be writes |n| in host byte order in |p| in network - * byte order. It writes only least significant 48 bits. It returns - * the one beyond of the last written position. - */ -uint8_t *ngtcp2_put_uint48be(uint8_t *p, uint64_t n); - /* * ngtcp2_put_uint32be writes |n| in host byte order in |p| in network * byte order. It returns the one beyond of the last written @@ -168,41 +154,9 @@ size_t ngtcp2_get_uvarintlen(const uint8_t *p); */ size_t ngtcp2_put_uvarintlen(uint64_t n); -/* - * ngtcp2_nth_server_bidi_id returns |n|-th server bidirectional - * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is - * larger than NGTCP2_MAX_SERVER_STREAM_ID_BIDI, this function returns - * NGTCP2_MAX_SERVER_STREAM_ID_BIDI. - */ -int64_t ngtcp2_nth_server_bidi_id(uint64_t n); - -/* - * ngtcp2_nth_client_bidi_id returns |n|-th client bidirectional - * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is - * larger than NGTCP2_MAX_CLIENT_STREAM_ID_BIDI, this function returns - * NGTCP2_MAX_CLIENT_STREAM_ID_BIDI. - */ -int64_t ngtcp2_nth_client_bidi_id(uint64_t n); - -/* - * ngtcp2_nth_server_uni_id returns |n|-th server unidirectional - * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is - * larger than NGTCP2_MAX_SERVER_STREAM_ID_UNI, this function returns - * NGTCP2_MAX_SERVER_STREAM_ID_UNI. - */ -int64_t ngtcp2_nth_server_uni_id(uint64_t n); - -/* - * ngtcp2_nth_client_uni_id returns |n|-th client unidirectional - * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is - * larger than NGTCP2_MAX_CLIENT_STREAM_ID_UNI, this function returns - * NGTCP2_MAX_CLIENT_STREAM_ID_UNI. - */ -int64_t ngtcp2_nth_client_uni_id(uint64_t n); - /* * ngtcp2_ord_stream_id returns the ordinal number of |stream_id|. */ uint64_t ngtcp2_ord_stream_id(int64_t stream_id); -#endif /* NGTCP2_CONV_H */ +#endif /* !defined(NGTCP2_CONV_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conversion.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_conversion.c deleted file mode 100644 index eb85687a068449..00000000000000 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conversion.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * ngtcp2 - * - * Copyright (c) 2023 ngtcp2 contributors - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include "ngtcp2_conversion.h" - -#include -#include - -static void transport_params_copy(int transport_params_version, - ngtcp2_transport_params *dest, - const ngtcp2_transport_params *src) { - assert(transport_params_version != NGTCP2_TRANSPORT_PARAMS_VERSION); - - switch (transport_params_version) { - case NGTCP2_TRANSPORT_PARAMS_V1: - memcpy(dest, src, - offsetof(ngtcp2_transport_params, version_info_present) + - sizeof(src->version_info_present)); - - break; - } -} - -const ngtcp2_transport_params * -ngtcp2_transport_params_convert_to_latest(ngtcp2_transport_params *dest, - int transport_params_version, - const ngtcp2_transport_params *src) { - if (transport_params_version == NGTCP2_TRANSPORT_PARAMS_VERSION) { - return src; - } - - ngtcp2_transport_params_default(dest); - - transport_params_copy(transport_params_version, dest, src); - - return dest; -} - -void ngtcp2_transport_params_convert_to_old( - int transport_params_version, ngtcp2_transport_params *dest, - const ngtcp2_transport_params *src) { - assert(transport_params_version != NGTCP2_TRANSPORT_PARAMS_VERSION); - - transport_params_copy(transport_params_version, dest, src); -} diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_crypto.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_crypto.c index 0a3ecf6a2440cb..1f74e8c58397b1 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_crypto.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_crypto.c @@ -27,11 +27,7 @@ #include #include -#include "ngtcp2_str.h" -#include "ngtcp2_conv.h" -#include "ngtcp2_conn.h" #include "ngtcp2_net.h" -#include "ngtcp2_conversion.h" int ngtcp2_crypto_km_new(ngtcp2_crypto_km **pckm, const uint8_t *secret, size_t secretlen, @@ -46,9 +42,11 @@ int ngtcp2_crypto_km_new(ngtcp2_crypto_km **pckm, const uint8_t *secret, if (secretlen) { memcpy((*pckm)->secret.base, secret, secretlen); } + if (aead_ctx) { (*pckm)->aead_ctx = *aead_ctx; } + memcpy((*pckm)->iv.base, iv, ivlen); return 0; @@ -85,825 +83,30 @@ void ngtcp2_crypto_km_del(ngtcp2_crypto_km *ckm, const ngtcp2_mem *mem) { return; } + if (ckm->secret.len) { +#ifdef WIN32 + SecureZeroMemory(ckm->secret.base, ckm->secret.len); +#elif defined(HAVE_EXPLICIT_BZERO) + explicit_bzero(ckm->secret.base, ckm->secret.len); +#elif defined(HAVE_MEMSET_S) + memset_s(ckm->secret.base, ckm->secret.len, 0, ckm->secret.len); +#endif /* defined(HAVE_MEMSET_S) */ + } + ngtcp2_mem_free(mem, ckm); } void ngtcp2_crypto_create_nonce(uint8_t *dest, const uint8_t *iv, size_t ivlen, int64_t pkt_num) { - size_t i; uint64_t n; - assert(ivlen >= 8); + assert(ivlen >= sizeof(n)); memcpy(dest, iv, ivlen); - n = ngtcp2_htonl64((uint64_t)pkt_num); - - for (i = 0; i < 8; ++i) { - dest[ivlen - 8 + i] ^= ((uint8_t *)&n)[i]; - } -} - -/* - * varint_paramlen returns the length of a single transport parameter - * which has variable integer in its parameter. - */ -static size_t varint_paramlen(ngtcp2_transport_param_id id, uint64_t param) { - size_t valuelen = ngtcp2_put_uvarintlen(param); - return ngtcp2_put_uvarintlen(id) + ngtcp2_put_uvarintlen(valuelen) + valuelen; -} - -/* - * write_varint_param writes parameter |id| of the given |value| in - * varint encoding. It returns p + the number of bytes written. - */ -static uint8_t *write_varint_param(uint8_t *p, ngtcp2_transport_param_id id, - uint64_t value) { - p = ngtcp2_put_uvarint(p, id); - p = ngtcp2_put_uvarint(p, ngtcp2_put_uvarintlen(value)); - return ngtcp2_put_uvarint(p, value); -} - -/* - * zero_paramlen returns the length of a single transport parameter - * which has zero length value in its parameter. - */ -static size_t zero_paramlen(ngtcp2_transport_param_id id) { - return ngtcp2_put_uvarintlen(id) + 1; -} - -/* - * write_zero_param writes parameter |id| that has zero length value. - * It returns p + the number of bytes written. - */ -static uint8_t *write_zero_param(uint8_t *p, ngtcp2_transport_param_id id) { - p = ngtcp2_put_uvarint(p, id); - *p++ = 0; - - return p; -} - -/* - * cid_paramlen returns the length of a single transport parameter - * which has |cid| as value. - */ -static size_t cid_paramlen(ngtcp2_transport_param_id id, - const ngtcp2_cid *cid) { - return ngtcp2_put_uvarintlen(id) + ngtcp2_put_uvarintlen(cid->datalen) + - cid->datalen; -} - -/* - * write_cid_param writes parameter |id| of the given |cid|. It - * returns p + the number of bytes written. - */ -static uint8_t *write_cid_param(uint8_t *p, ngtcp2_transport_param_id id, - const ngtcp2_cid *cid) { - assert(cid->datalen == 0 || cid->datalen >= NGTCP2_MIN_CIDLEN); - assert(cid->datalen <= NGTCP2_MAX_CIDLEN); - - p = ngtcp2_put_uvarint(p, id); - p = ngtcp2_put_uvarint(p, cid->datalen); - if (cid->datalen) { - p = ngtcp2_cpymem(p, cid->data, cid->datalen); - } - return p; -} - -static const uint8_t empty_address[16]; - -ngtcp2_ssize ngtcp2_transport_params_encode_versioned( - uint8_t *dest, size_t destlen, int transport_params_version, - const ngtcp2_transport_params *params) { - uint8_t *p; - size_t len = 0; - /* For some reason, gcc 7.3.0 requires this initialization. */ - size_t preferred_addrlen = 0; - size_t version_infolen = 0; - const ngtcp2_sockaddr_in *sa_in; - const ngtcp2_sockaddr_in6 *sa_in6; - ngtcp2_transport_params paramsbuf; - - params = ngtcp2_transport_params_convert_to_latest( - ¶msbuf, transport_params_version, params); - - if (params->original_dcid_present) { - len += - cid_paramlen(NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID, - ¶ms->original_dcid); - } - - if (params->stateless_reset_token_present) { - len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN) + - ngtcp2_put_uvarintlen(NGTCP2_STATELESS_RESET_TOKENLEN) + - NGTCP2_STATELESS_RESET_TOKENLEN; - } - - if (params->preferred_addr_present) { - assert(params->preferred_addr.cid.datalen >= NGTCP2_MIN_CIDLEN); - assert(params->preferred_addr.cid.datalen <= NGTCP2_MAX_CIDLEN); - preferred_addrlen = 4 /* ipv4Address */ + 2 /* ipv4Port */ + - 16 /* ipv6Address */ + 2 /* ipv6Port */ - + 1 + params->preferred_addr.cid.datalen /* CID */ + - NGTCP2_STATELESS_RESET_TOKENLEN; - len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS) + - ngtcp2_put_uvarintlen(preferred_addrlen) + preferred_addrlen; - } - if (params->retry_scid_present) { - len += cid_paramlen(NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID, - ¶ms->retry_scid); - } - - if (params->initial_scid_present) { - len += cid_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID, - ¶ms->initial_scid); - } - - if (params->initial_max_stream_data_bidi_local) { - len += varint_paramlen( - NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL, - params->initial_max_stream_data_bidi_local); - } - if (params->initial_max_stream_data_bidi_remote) { - len += varint_paramlen( - NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE, - params->initial_max_stream_data_bidi_remote); - } - if (params->initial_max_stream_data_uni) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI, - params->initial_max_stream_data_uni); - } - if (params->initial_max_data) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA, - params->initial_max_data); - } - if (params->initial_max_streams_bidi) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI, - params->initial_max_streams_bidi); - } - if (params->initial_max_streams_uni) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI, - params->initial_max_streams_uni); - } - if (params->max_udp_payload_size != - NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE, - params->max_udp_payload_size); - } - if (params->ack_delay_exponent != NGTCP2_DEFAULT_ACK_DELAY_EXPONENT) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT, - params->ack_delay_exponent); - } - if (params->disable_active_migration) { - len += zero_paramlen(NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION); - } - if (params->max_ack_delay != NGTCP2_DEFAULT_MAX_ACK_DELAY) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY, - params->max_ack_delay / NGTCP2_MILLISECONDS); - } - if (params->max_idle_timeout) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT, - params->max_idle_timeout / NGTCP2_MILLISECONDS); - } - if (params->active_connection_id_limit && - params->active_connection_id_limit != - NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT, - params->active_connection_id_limit); - } - if (params->max_datagram_frame_size) { - len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE, - params->max_datagram_frame_size); - } - if (params->grease_quic_bit) { - len += zero_paramlen(NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT); - } - if (params->version_info_present) { - version_infolen = - sizeof(uint32_t) + params->version_info.available_versionslen; - len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION) + - ngtcp2_put_uvarintlen(version_infolen) + version_infolen; - } - - if (dest == NULL && destlen == 0) { - return (ngtcp2_ssize)len; - } - - if (destlen < len) { - return NGTCP2_ERR_NOBUF; - } - - p = dest; - - if (params->original_dcid_present) { - p = write_cid_param( - p, NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID, - ¶ms->original_dcid); - } - - if (params->stateless_reset_token_present) { - p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN); - p = ngtcp2_put_uvarint(p, sizeof(params->stateless_reset_token)); - p = ngtcp2_cpymem(p, params->stateless_reset_token, - sizeof(params->stateless_reset_token)); - } - - if (params->preferred_addr_present) { - p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS); - p = ngtcp2_put_uvarint(p, preferred_addrlen); - - if (params->preferred_addr.ipv4_present) { - sa_in = ¶ms->preferred_addr.ipv4; - p = ngtcp2_cpymem(p, &sa_in->sin_addr, sizeof(sa_in->sin_addr)); - p = ngtcp2_put_uint16(p, sa_in->sin_port); - } else { - p = ngtcp2_cpymem(p, empty_address, sizeof(sa_in->sin_addr)); - p = ngtcp2_put_uint16(p, 0); - } - - if (params->preferred_addr.ipv6_present) { - sa_in6 = ¶ms->preferred_addr.ipv6; - p = ngtcp2_cpymem(p, &sa_in6->sin6_addr, sizeof(sa_in6->sin6_addr)); - p = ngtcp2_put_uint16(p, sa_in6->sin6_port); - } else { - p = ngtcp2_cpymem(p, empty_address, sizeof(sa_in6->sin6_addr)); - p = ngtcp2_put_uint16(p, 0); - } - - *p++ = (uint8_t)params->preferred_addr.cid.datalen; - if (params->preferred_addr.cid.datalen) { - p = ngtcp2_cpymem(p, params->preferred_addr.cid.data, - params->preferred_addr.cid.datalen); - } - p = ngtcp2_cpymem(p, params->preferred_addr.stateless_reset_token, - sizeof(params->preferred_addr.stateless_reset_token)); - } - - if (params->retry_scid_present) { - p = write_cid_param(p, NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID, - ¶ms->retry_scid); - } - - if (params->initial_scid_present) { - p = write_cid_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID, - ¶ms->initial_scid); - } - - if (params->initial_max_stream_data_bidi_local) { - p = write_varint_param( - p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL, - params->initial_max_stream_data_bidi_local); - } - - if (params->initial_max_stream_data_bidi_remote) { - p = write_varint_param( - p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE, - params->initial_max_stream_data_bidi_remote); - } - - if (params->initial_max_stream_data_uni) { - p = write_varint_param(p, - NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI, - params->initial_max_stream_data_uni); - } - - if (params->initial_max_data) { - p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA, - params->initial_max_data); - } - - if (params->initial_max_streams_bidi) { - p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI, - params->initial_max_streams_bidi); - } - - if (params->initial_max_streams_uni) { - p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI, - params->initial_max_streams_uni); - } - - if (params->max_udp_payload_size != - NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE) { - p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE, - params->max_udp_payload_size); - } - - if (params->ack_delay_exponent != NGTCP2_DEFAULT_ACK_DELAY_EXPONENT) { - p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT, - params->ack_delay_exponent); - } - - if (params->disable_active_migration) { - p = write_zero_param(p, NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION); - } - - if (params->max_ack_delay != NGTCP2_DEFAULT_MAX_ACK_DELAY) { - p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY, - params->max_ack_delay / NGTCP2_MILLISECONDS); - } - - if (params->max_idle_timeout) { - p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT, - params->max_idle_timeout / NGTCP2_MILLISECONDS); - } - - if (params->active_connection_id_limit && - params->active_connection_id_limit != - NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) { - p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT, - params->active_connection_id_limit); - } - - if (params->max_datagram_frame_size) { - p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE, - params->max_datagram_frame_size); - } - - if (params->grease_quic_bit) { - p = write_zero_param(p, NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT); - } - - if (params->version_info_present) { - p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION); - p = ngtcp2_put_uvarint(p, version_infolen); - p = ngtcp2_put_uint32be(p, params->version_info.chosen_version); - if (params->version_info.available_versionslen) { - p = ngtcp2_cpymem(p, params->version_info.available_versions, - params->version_info.available_versionslen); - } - } - - assert((size_t)(p - dest) == len); - - return (ngtcp2_ssize)len; -} - -/* - * decode_varint decodes a single varint from the buffer pointed by - * |*pp| of length |end - *pp|. If it decodes an integer - * successfully, it stores the integer in |*pdest|, increment |*pp| by - * the number of bytes read from |*pp|, and returns 0. Otherwise it - * returns -1. - */ -static int decode_varint(uint64_t *pdest, const uint8_t **pp, - const uint8_t *end) { - const uint8_t *p = *pp; - size_t len; - - if (p == end) { - return -1; - } - - len = ngtcp2_get_uvarintlen(p); - if ((uint64_t)(end - p) < len) { - return -1; - } - - *pp = ngtcp2_get_uvarint(pdest, p); - - return 0; -} - -/* - * decode_varint_param decodes length prefixed value from the buffer - * pointed by |*pp| of length |end - *pp|. The length and value are - * encoded in varint form. If it decodes a value successfully, it - * stores the value in |*pdest|, increment |*pp| by the number of - * bytes read from |*pp|, and returns 0. Otherwise it returns -1. - */ -static int decode_varint_param(uint64_t *pdest, const uint8_t **pp, - const uint8_t *end) { - const uint8_t *p = *pp; - uint64_t valuelen; - - if (decode_varint(&valuelen, &p, end) != 0) { - return -1; - } - - if (p == end) { - return -1; - } - - if ((uint64_t)(end - p) < valuelen) { - return -1; - } - - if (ngtcp2_get_uvarintlen(p) != valuelen) { - return -1; - } - - *pp = ngtcp2_get_uvarint(pdest, p); - - return 0; -} - -/* - * decode_zero_param decodes zero length value from the buffer pointed - * by |*pp| of length |end - *pp|. The length is encoded in varint - * form. If it decodes zero length value successfully, it increments - * |*pp| by 1, and returns 0. Otherwise it returns -1. - */ -static int decode_zero_param(const uint8_t **pp, const uint8_t *end) { - if (*pp == end || **pp != 0) { - return -1; - } - - ++*pp; - - return 0; -} - -/* - * decode_cid_param decodes length prefixed ngtcp2_cid from the buffer - * pointed by |*pp| of length |end - *pp|. The length is encoded in - * varint form. If it decodes a value successfully, it stores the - * value in |*pdest|, increment |*pp| by the number of read from - * |*pp|, and returns the number of bytes read. Otherwise it returns - * the one of the negative error code: - * - * NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM - * Could not decode Connection ID. - */ -static int decode_cid_param(ngtcp2_cid *pdest, const uint8_t **pp, - const uint8_t *end) { - const uint8_t *p = *pp; - uint64_t valuelen; - - if (decode_varint(&valuelen, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - - if ((valuelen != 0 && valuelen < NGTCP2_MIN_CIDLEN) || - valuelen > NGTCP2_MAX_CIDLEN || (size_t)(end - p) < valuelen) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - - ngtcp2_cid_init(pdest, p, (size_t)valuelen); - - p += valuelen; - - *pp = p; - - return 0; -} - -int ngtcp2_transport_params_decode_versioned(int transport_params_version, - ngtcp2_transport_params *dest, - const uint8_t *data, - size_t datalen) { - const uint8_t *p, *end, *lend; - size_t len; - uint64_t param_type; - uint64_t valuelen; - int rv; - ngtcp2_sockaddr_in *sa_in; - ngtcp2_sockaddr_in6 *sa_in6; - uint32_t version; - ngtcp2_transport_params *params, paramsbuf; - if (transport_params_version == NGTCP2_TRANSPORT_PARAMS_VERSION) { - params = dest; - } else { - params = ¶msbuf; - } - - /* Set default values */ - memset(params, 0, sizeof(*params)); - params->original_dcid_present = 0; - params->initial_scid_present = 0; - params->initial_max_streams_bidi = 0; - params->initial_max_streams_uni = 0; - params->initial_max_stream_data_bidi_local = 0; - params->initial_max_stream_data_bidi_remote = 0; - params->initial_max_stream_data_uni = 0; - params->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE; - params->ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT; - params->stateless_reset_token_present = 0; - params->preferred_addr_present = 0; - params->disable_active_migration = 0; - params->max_ack_delay = NGTCP2_DEFAULT_MAX_ACK_DELAY; - params->max_idle_timeout = 0; - params->active_connection_id_limit = - NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT; - params->retry_scid_present = 0; - params->max_datagram_frame_size = 0; - memset(¶ms->retry_scid, 0, sizeof(params->retry_scid)); - memset(¶ms->initial_scid, 0, sizeof(params->initial_scid)); - memset(¶ms->original_dcid, 0, sizeof(params->original_dcid)); - params->version_info_present = 0; - - p = data; - end = data + datalen; - - for (; (size_t)(end - p) >= 2;) { - if (decode_varint(¶m_type, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - - switch (param_type) { - case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL: - if (decode_varint_param(¶ms->initial_max_stream_data_bidi_local, &p, - end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE: - if (decode_varint_param(¶ms->initial_max_stream_data_bidi_remote, &p, - end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI: - if (decode_varint_param(¶ms->initial_max_stream_data_uni, &p, end) != - 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA: - if (decode_varint_param(¶ms->initial_max_data, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI: - if (decode_varint_param(¶ms->initial_max_streams_bidi, &p, end) != - 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if (params->initial_max_streams_bidi > NGTCP2_MAX_STREAMS) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI: - if (decode_varint_param(¶ms->initial_max_streams_uni, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if (params->initial_max_streams_uni > NGTCP2_MAX_STREAMS) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT: - if (decode_varint_param(¶ms->max_idle_timeout, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - params->max_idle_timeout *= NGTCP2_MILLISECONDS; - break; - case NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE: - if (decode_varint_param(¶ms->max_udp_payload_size, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN: - if (decode_varint(&valuelen, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if ((size_t)valuelen != sizeof(params->stateless_reset_token)) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if ((size_t)(end - p) < sizeof(params->stateless_reset_token)) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - - p = ngtcp2_get_bytes(params->stateless_reset_token, p, - sizeof(params->stateless_reset_token)); - params->stateless_reset_token_present = 1; - - break; - case NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT: - if (decode_varint_param(¶ms->ack_delay_exponent, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if (params->ack_delay_exponent > 20) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS: - if (decode_varint(&valuelen, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if ((size_t)(end - p) < valuelen) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - len = 4 /* ipv4Address */ + 2 /* ipv4Port */ + 16 /* ipv6Address */ + - 2 /* ipv6Port */ - + 1 /* cid length */ + NGTCP2_STATELESS_RESET_TOKENLEN; - if (valuelen < len) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - - sa_in = ¶ms->preferred_addr.ipv4; - - p = ngtcp2_get_bytes(&sa_in->sin_addr, p, sizeof(sa_in->sin_addr)); - p = ngtcp2_get_uint16be(&sa_in->sin_port, p); - - if (sa_in->sin_port || memcmp(empty_address, &sa_in->sin_addr, - sizeof(sa_in->sin_addr)) != 0) { - sa_in->sin_family = NGTCP2_AF_INET; - params->preferred_addr.ipv4_present = 1; - } - - sa_in6 = ¶ms->preferred_addr.ipv6; - - p = ngtcp2_get_bytes(&sa_in6->sin6_addr, p, sizeof(sa_in6->sin6_addr)); - p = ngtcp2_get_uint16be(&sa_in6->sin6_port, p); - - if (sa_in6->sin6_port || memcmp(empty_address, &sa_in6->sin6_addr, - sizeof(sa_in6->sin6_addr)) != 0) { - sa_in6->sin6_family = NGTCP2_AF_INET6; - params->preferred_addr.ipv6_present = 1; - } - - /* cid */ - params->preferred_addr.cid.datalen = *p++; - len += params->preferred_addr.cid.datalen; - if (valuelen != len || - params->preferred_addr.cid.datalen > NGTCP2_MAX_CIDLEN || - params->preferred_addr.cid.datalen < NGTCP2_MIN_CIDLEN) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if (params->preferred_addr.cid.datalen) { - p = ngtcp2_get_bytes(params->preferred_addr.cid.data, p, - params->preferred_addr.cid.datalen); - } - - /* stateless reset token */ - p = ngtcp2_get_bytes( - params->preferred_addr.stateless_reset_token, p, - sizeof(params->preferred_addr.stateless_reset_token)); - params->preferred_addr_present = 1; - break; - case NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION: - if (decode_zero_param(&p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - params->disable_active_migration = 1; - break; - case NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID: - rv = decode_cid_param(¶ms->original_dcid, &p, end); - if (rv != 0) { - return rv; - } - params->original_dcid_present = 1; - break; - case NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID: - rv = decode_cid_param(¶ms->retry_scid, &p, end); - if (rv != 0) { - return rv; - } - params->retry_scid_present = 1; - break; - case NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID: - rv = decode_cid_param(¶ms->initial_scid, &p, end); - if (rv != 0) { - return rv; - } - params->initial_scid_present = 1; - break; - case NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY: - if (decode_varint_param(¶ms->max_ack_delay, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if (params->max_ack_delay >= 16384) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - params->max_ack_delay *= NGTCP2_MILLISECONDS; - break; - case NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT: - if (decode_varint_param(¶ms->active_connection_id_limit, &p, end) != - 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE: - if (decode_varint_param(¶ms->max_datagram_frame_size, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - break; - case NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT: - if (decode_zero_param(&p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - params->grease_quic_bit = 1; - break; - case NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION: - if (decode_varint(&valuelen, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if ((size_t)(end - p) < valuelen) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if (valuelen < sizeof(uint32_t) || (valuelen & 0x3)) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - p = ngtcp2_get_uint32(¶ms->version_info.chosen_version, p); - if (params->version_info.chosen_version == 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if (valuelen > sizeof(uint32_t)) { - params->version_info.available_versions = (uint8_t *)p; - params->version_info.available_versionslen = - (size_t)valuelen - sizeof(uint32_t); - - for (lend = p + (valuelen - sizeof(uint32_t)); p != lend;) { - p = ngtcp2_get_uint32(&version, p); - if (version == 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - } - } - params->version_info_present = 1; - break; - default: - /* Ignore unknown parameter */ - if (decode_varint(&valuelen, &p, end) != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - if ((size_t)(end - p) < valuelen) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - p += valuelen; - break; - } - } - - if (end - p != 0) { - return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; - } - - if (transport_params_version != NGTCP2_TRANSPORT_PARAMS_VERSION) { - ngtcp2_transport_params_convert_to_old(transport_params_version, dest, - params); - } - - return 0; -} - -static int transport_params_copy_new(ngtcp2_transport_params **pdest, - const ngtcp2_transport_params *src, - const ngtcp2_mem *mem) { - size_t len = sizeof(**pdest); - ngtcp2_transport_params *dest; - uint8_t *p; - - if (src->version_info_present) { - len += src->version_info.available_versionslen; - } - - dest = ngtcp2_mem_malloc(mem, len); - if (dest == NULL) { - return NGTCP2_ERR_NOMEM; - } - - *dest = *src; - - if (src->version_info_present && src->version_info.available_versionslen) { - p = (uint8_t *)dest + sizeof(*dest); - memcpy(p, src->version_info.available_versions, - src->version_info.available_versionslen); - dest->version_info.available_versions = p; - } - - *pdest = dest; - - return 0; -} - -int ngtcp2_transport_params_decode_new(ngtcp2_transport_params **pparams, - const uint8_t *data, size_t datalen, - const ngtcp2_mem *mem) { - int rv; - ngtcp2_transport_params params; - - rv = ngtcp2_transport_params_decode(¶ms, data, datalen); - if (rv < 0) { - return rv; - } - - if (mem == NULL) { - mem = ngtcp2_mem_default(); - } - - return transport_params_copy_new(pparams, ¶ms, mem); -} - -void ngtcp2_transport_params_del(ngtcp2_transport_params *params, - const ngtcp2_mem *mem) { - if (params == NULL) { - return; - } - - if (mem == NULL) { - mem = ngtcp2_mem_default(); - } - - ngtcp2_mem_free(mem, params); -} - -int ngtcp2_transport_params_copy_new(ngtcp2_transport_params **pdest, - const ngtcp2_transport_params *src, - const ngtcp2_mem *mem) { - if (src == NULL) { - *pdest = NULL; - return 0; - } + dest += ivlen - sizeof(n); - return transport_params_copy_new(pdest, src, mem); + memcpy(&n, dest, sizeof(n)); + n ^= ngtcp2_htonl64((uint64_t)pkt_num); + memcpy(dest, &n, sizeof(n)); } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_crypto.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_crypto.h index b78429bb38f582..ca6d494846f324 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_crypto.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_crypto.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -38,36 +38,9 @@ bytes. */ #define NGTCP2_INITIAL_AEAD_OVERHEAD 16 -/* NGTCP2_MAX_AEAD_OVERHEAD is expected maximum AEAD overhead. */ +/* NGTCP2_MAX_AEAD_OVERHEAD is the maximum AEAD overhead. */ #define NGTCP2_MAX_AEAD_OVERHEAD 16 -/* ngtcp2_transport_param_id is the registry of QUIC transport - parameter ID. */ -typedef uint64_t ngtcp2_transport_param_id; - -#define NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID 0x00 -#define NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT 0x01 -#define NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN 0x02 -#define NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE 0x03 -#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA 0x04 -#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL 0x05 -#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE 0x06 -#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI 0x07 -#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI 0x08 -#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI 0x09 -#define NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT 0x0a -#define NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY 0x0b -#define NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION 0x0c -#define NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS 0x0d -#define NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT 0x0e -#define NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID 0x0f -#define NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID 0x10 -/* https://datatracker.ietf.org/doc/html/rfc9221 */ -#define NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE 0x20 -#define NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT 0x2ab2 -/* https://datatracker.ietf.org/doc/html/rfc9368 */ -#define NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION 0x11 - /* NGTCP2_CRYPTO_KM_FLAG_NONE indicates that no flag is set. */ #define NGTCP2_CRYPTO_KM_FLAG_NONE 0x00u /* NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE is set if key phase bit is @@ -83,8 +56,7 @@ typedef struct ngtcp2_crypto_km { a packet. For decryption key, it is the lowest packet number of a packet which can be decrypted with this keying material. */ int64_t pkt_num; - /* use_count is the number of encryption applied with this key. - This field is only used for tx key. */ + /* use_count is the number of encryption applied with this key. */ uint64_t use_count; /* flags is the bitwise OR of zero or more of NGTCP2_CRYPTO_KM_FLAG_*. */ @@ -92,12 +64,12 @@ typedef struct ngtcp2_crypto_km { } ngtcp2_crypto_km; /* - * ngtcp2_crypto_km_new creates new ngtcp2_crypto_km object and + * ngtcp2_crypto_km_new creates new ngtcp2_crypto_km object, and * assigns its pointer to |*pckm|. The |secret| of length - * |secretlen|, the |key| of length |keylen| and the |iv| of length - * |ivlen| are copied to |*pckm|. If |secretlen| == 0, the function - * assumes no secret is given which is acceptable. The sole reason to - * store secret is update keys. Only 1RTT key can be updated. + * |secretlen|, |aead_ctx|, and the |iv| of length |ivlen| are copied + * to |*pckm|. If |secretlen| == 0, the function assumes no secret is + * given which is acceptable. The sole reason to store secret is + * update keys. Only 1RTT key can be updated. */ int ngtcp2_crypto_km_new(ngtcp2_crypto_km **pckm, const uint8_t *secret, size_t secretlen, @@ -107,7 +79,7 @@ int ngtcp2_crypto_km_new(ngtcp2_crypto_km **pckm, const uint8_t *secret, /* * ngtcp2_crypto_km_nocopy_new is similar to ngtcp2_crypto_km_new, but - * it does not copy secret, key and IV. + * it does not copy secret, aead context, and IV. */ int ngtcp2_crypto_km_nocopy_new(ngtcp2_crypto_km **pckm, size_t secretlen, size_t ivlen, const ngtcp2_mem *mem); @@ -127,21 +99,4 @@ typedef struct ngtcp2_crypto_cc { void ngtcp2_crypto_create_nonce(uint8_t *dest, const uint8_t *iv, size_t ivlen, int64_t pkt_num); -/* - * ngtcp2_transport_params_copy_new makes a copy of |src|, and assigns - * it to |*pdest|. If |src| is NULL, NULL is assigned to |*pdest|. - * - * Caller is responsible to call ngtcp2_transport_params_del to free - * the memory assigned to |*pdest|. - * - * This function returns 0 if it succeeds, or one of the following - * negative error codes: - * - * NGTCP2_ERR_NOMEM - * Out of memory. - */ -int ngtcp2_transport_params_copy_new(ngtcp2_transport_params **pdest, - const ngtcp2_transport_params *src, - const ngtcp2_mem *mem); - -#endif /* NGTCP2_CRYPTO_H */ +#endif /* !defined(NGTCP2_CRYPTO_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_err.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_err.h index 9229f5425a63cf..44527b11bdec47 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_err.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_err.h @@ -27,8 +27,8 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include -#endif /* NGTCP2_ERR_H */ +#endif /* !defined(NGTCP2_ERR_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_frame_chain.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_frame_chain.c index 41c2a6a755cc8a..6a8a22c3f0d010 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_frame_chain.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_frame_chain.c @@ -27,7 +27,7 @@ #include #include -ngtcp2_objalloc_def(frame_chain, ngtcp2_frame_chain, oplent); +ngtcp2_objalloc_def(frame_chain, ngtcp2_frame_chain, oplent) int ngtcp2_frame_chain_new(ngtcp2_frame_chain **pfrc, const ngtcp2_mem *mem) { *pfrc = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_frame_chain)); @@ -68,14 +68,11 @@ int ngtcp2_frame_chain_stream_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc, size_t datacnt, ngtcp2_objalloc *objalloc, const ngtcp2_mem *mem) { - size_t need, avail = sizeof(ngtcp2_frame) - sizeof(ngtcp2_stream); - - if (datacnt > 1) { - need = sizeof(ngtcp2_vec) * (datacnt - 1); - - if (need > avail) { - return ngtcp2_frame_chain_extralen_new(pfrc, need - avail, mem); - } + if (datacnt > NGTCP2_FRAME_CHAIN_STREAM_DATACNT_THRES) { + return ngtcp2_frame_chain_extralen_new(pfrc, + sizeof(ngtcp2_vec) * (datacnt - 1) - + NGTCP2_FRAME_CHAIN_STREAM_AVAIL, + mem); } return ngtcp2_frame_chain_objalloc_new(pfrc, objalloc); @@ -139,9 +136,7 @@ void ngtcp2_frame_chain_objalloc_del(ngtcp2_frame_chain *frc, switch (frc->fr.type) { case NGTCP2_FRAME_CRYPTO: case NGTCP2_FRAME_STREAM: - if (frc->fr.stream.datacnt && - sizeof(ngtcp2_vec) * (frc->fr.stream.datacnt - 1) > - sizeof(ngtcp2_frame) - sizeof(ngtcp2_stream)) { + if (frc->fr.stream.datacnt > NGTCP2_FRAME_CHAIN_STREAM_DATACNT_THRES) { ngtcp2_frame_chain_del(frc, mem); return; diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_frame_chain.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_frame_chain.h index 656fa5b799450e..e5b6779c0f03c2 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_frame_chain.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_frame_chain.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -74,7 +74,7 @@ struct ngtcp2_frame_chain { }; }; -ngtcp2_objalloc_decl(frame_chain, ngtcp2_frame_chain, oplent); +ngtcp2_objalloc_decl(frame_chain, ngtcp2_frame_chain, oplent) /* * ngtcp2_bind_frame_chains binds two frame chains |a| and |b| using @@ -121,12 +121,29 @@ int ngtcp2_frame_chain_objalloc_new(ngtcp2_frame_chain **pfrc, int ngtcp2_frame_chain_extralen_new(ngtcp2_frame_chain **pfrc, size_t extralen, const ngtcp2_mem *mem); +/* NGTCP2_FRAME_CHAIN_STREAM_AVAIL is the number of additional bytes + available after ngtcp2_stream when it is embedded in + ngtcp2_frame. */ +#define NGTCP2_FRAME_CHAIN_STREAM_AVAIL \ + (sizeof(ngtcp2_frame) - sizeof(ngtcp2_stream)) + +/* NGTCP2_FRAME_CHAIN_STREAM_DATACNT_THRES is the number of datacnt + that changes allocation method. If datacnt is more than this + value, ngtcp2_frame_chain is allocated without ngtcp2_objalloc. + Otherwise, it is allocated using ngtcp2_objalloc. */ +#define NGTCP2_FRAME_CHAIN_STREAM_DATACNT_THRES \ + (NGTCP2_FRAME_CHAIN_STREAM_AVAIL / sizeof(ngtcp2_vec) + 1) + /* * ngtcp2_frame_chain_stream_datacnt_objalloc_new works like * ngtcp2_frame_chain_new, but it allocates enough data to store * additional |datacnt| - 1 ngtcp2_vec object after ngtcp2_stream - * object. If no additional space is required, - * ngtcp2_frame_chain_objalloc_new is called internally. + * object. If no additional space is required, in other words, + * |datacnt| <= NGTCP2_FRAME_CHAIN_STREAM_DATACNT_THRES, + * ngtcp2_frame_chain_objalloc_new is called internally. Otherwise, + * ngtcp2_frame_chain_extralen_new is used and objalloc is not used. + * Therefore, it is important to call ngtcp2_frame_chain_objalloc_del + * without changing datacnt field. */ int ngtcp2_frame_chain_stream_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc, size_t datacnt, @@ -168,4 +185,4 @@ void ngtcp2_frame_chain_list_objalloc_del(ngtcp2_frame_chain *frc, ngtcp2_objalloc *objalloc, const ngtcp2_mem *mem); -#endif /* NGTCP2_FRAME_CHAIN_H */ +#endif /* !defined(NGTCP2_FRAME_CHAIN_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_gaptr.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_gaptr.c index 87c23898e8207d..3bfa398480c382 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_gaptr.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_gaptr.c @@ -28,22 +28,16 @@ #include void ngtcp2_gaptr_init(ngtcp2_gaptr *gaptr, const ngtcp2_mem *mem) { - ngtcp2_ksl_init(&gaptr->gap, ngtcp2_ksl_range_compar, sizeof(ngtcp2_range), - mem); + ngtcp2_ksl_init(&gaptr->gap, ngtcp2_ksl_range_compar, ngtcp2_ksl_range_search, + sizeof(ngtcp2_range), mem); gaptr->mem = mem; } static int gaptr_gap_init(ngtcp2_gaptr *gaptr) { ngtcp2_range range = {0, UINT64_MAX}; - int rv; - - rv = ngtcp2_ksl_insert(&gaptr->gap, NULL, &range, NULL); - if (rv != 0) { - return rv; - } - return 0; + return ngtcp2_ksl_insert(&gaptr->gap, NULL, &range, NULL); } void ngtcp2_gaptr_free(ngtcp2_gaptr *gaptr) { @@ -66,8 +60,8 @@ int ngtcp2_gaptr_push(ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen) { } } - it = ngtcp2_ksl_lower_bound_compar(&gaptr->gap, &q, - ngtcp2_ksl_range_exclusive_compar); + it = ngtcp2_ksl_lower_bound_search(&gaptr->gap, &q, + ngtcp2_ksl_range_exclusive_search); for (; !ngtcp2_ksl_it_end(&it);) { k = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it); @@ -80,7 +74,9 @@ int ngtcp2_gaptr_push(ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen) { ngtcp2_ksl_remove_hint(&gaptr->gap, &it, &it, &k); continue; } + ngtcp2_range_cut(&l, &r, &k, &m); + if (ngtcp2_range_len(&l)) { ngtcp2_ksl_update_key(&gaptr->gap, &k, &l); @@ -93,26 +89,26 @@ int ngtcp2_gaptr_push(ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen) { } else if (ngtcp2_range_len(&r)) { ngtcp2_ksl_update_key(&gaptr->gap, &k, &r); } + ngtcp2_ksl_it_next(&it); } + return 0; } -uint64_t ngtcp2_gaptr_first_gap_offset(ngtcp2_gaptr *gaptr) { +uint64_t ngtcp2_gaptr_first_gap_offset(const ngtcp2_gaptr *gaptr) { ngtcp2_ksl_it it; - ngtcp2_range r; if (ngtcp2_ksl_len(&gaptr->gap) == 0) { return 0; } it = ngtcp2_ksl_begin(&gaptr->gap); - r = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it); - return r.begin; + return ((ngtcp2_range *)ngtcp2_ksl_it_key(&it))->begin; } -ngtcp2_range ngtcp2_gaptr_get_first_gap_after(ngtcp2_gaptr *gaptr, +ngtcp2_range ngtcp2_gaptr_get_first_gap_after(const ngtcp2_gaptr *gaptr, uint64_t offset) { ngtcp2_range q = {offset, offset + 1}; ngtcp2_ksl_it it; @@ -122,29 +118,27 @@ ngtcp2_range ngtcp2_gaptr_get_first_gap_after(ngtcp2_gaptr *gaptr, return r; } - it = ngtcp2_ksl_lower_bound_compar(&gaptr->gap, &q, - ngtcp2_ksl_range_exclusive_compar); + it = ngtcp2_ksl_lower_bound_search(&gaptr->gap, &q, + ngtcp2_ksl_range_exclusive_search); assert(!ngtcp2_ksl_it_end(&it)); return *(ngtcp2_range *)ngtcp2_ksl_it_key(&it); } -int ngtcp2_gaptr_is_pushed(ngtcp2_gaptr *gaptr, uint64_t offset, +int ngtcp2_gaptr_is_pushed(const ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen) { ngtcp2_range q = {offset, offset + datalen}; ngtcp2_ksl_it it; - ngtcp2_range k; ngtcp2_range m; if (ngtcp2_ksl_len(&gaptr->gap) == 0) { return 0; } - it = ngtcp2_ksl_lower_bound_compar(&gaptr->gap, &q, - ngtcp2_ksl_range_exclusive_compar); - k = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it); - m = ngtcp2_range_intersect(&q, &k); + it = ngtcp2_ksl_lower_bound_search(&gaptr->gap, &q, + ngtcp2_ksl_range_exclusive_search); + m = ngtcp2_range_intersect(&q, (ngtcp2_range *)ngtcp2_ksl_it_key(&it)); return ngtcp2_range_len(&m) == 0; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_gaptr.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_gaptr.h index 0f100a81c4286c..3120676cf849d4 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_gaptr.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_gaptr.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -39,8 +39,9 @@ * ngtcp2_gaptr maintains the gap in the range [0, UINT64_MAX). */ typedef struct ngtcp2_gaptr { - /* gap maintains the range of offset which is not received - yet. Initially, its range is [0, UINT64_MAX). */ + /* gap maintains the range of offset which is not pushed + yet. Initially, its range is [0, UINT64_MAX). "gap" is the range + that is not pushed yet. */ ngtcp2_ksl gap; /* mem is custom memory allocator */ const ngtcp2_mem *mem; @@ -57,8 +58,7 @@ void ngtcp2_gaptr_init(ngtcp2_gaptr *gaptr, const ngtcp2_mem *mem); void ngtcp2_gaptr_free(ngtcp2_gaptr *gaptr); /* - * ngtcp2_gaptr_push adds new data of length |datalen| at the stream - * offset |offset|. + * ngtcp2_gaptr_push pushes the range [offset, offset + datalen). * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -72,20 +72,20 @@ int ngtcp2_gaptr_push(ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen); * ngtcp2_gaptr_first_gap_offset returns the offset to the first gap. * If there is no gap, it returns UINT64_MAX. */ -uint64_t ngtcp2_gaptr_first_gap_offset(ngtcp2_gaptr *gaptr); +uint64_t ngtcp2_gaptr_first_gap_offset(const ngtcp2_gaptr *gaptr); /* * ngtcp2_gaptr_get_first_gap_after returns the first gap which - * overlaps or comes after |offset|. + * includes or comes after |offset|. */ -ngtcp2_range ngtcp2_gaptr_get_first_gap_after(ngtcp2_gaptr *gaptr, +ngtcp2_range ngtcp2_gaptr_get_first_gap_after(const ngtcp2_gaptr *gaptr, uint64_t offset); /* * ngtcp2_gaptr_is_pushed returns nonzero if range [offset, offset + * datalen) is completely pushed into this object. */ -int ngtcp2_gaptr_is_pushed(ngtcp2_gaptr *gaptr, uint64_t offset, +int ngtcp2_gaptr_is_pushed(const ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen); /* @@ -95,4 +95,4 @@ int ngtcp2_gaptr_is_pushed(ngtcp2_gaptr *gaptr, uint64_t offset, */ void ngtcp2_gaptr_drop_first_gap(ngtcp2_gaptr *gaptr); -#endif /* NGTCP2_GAPTR_H */ +#endif /* !defined(NGTCP2_GAPTR_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_idtr.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_idtr.c index d9880227690faf..2cf9d3cbfd1393 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_idtr.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_idtr.c @@ -26,10 +26,8 @@ #include -void ngtcp2_idtr_init(ngtcp2_idtr *idtr, int server, const ngtcp2_mem *mem) { +void ngtcp2_idtr_init(ngtcp2_idtr *idtr, const ngtcp2_mem *mem) { ngtcp2_gaptr_init(&idtr->gap, mem); - - idtr->server = server; } void ngtcp2_idtr_free(ngtcp2_idtr *idtr) { @@ -41,8 +39,7 @@ void ngtcp2_idtr_free(ngtcp2_idtr *idtr) { } /* - * id_from_stream_id translates |stream_id| to id space used by - * ngtcp2_idtr. + * id_from_stream_id translates |stream_id| to an internal ID. */ static uint64_t id_from_stream_id(int64_t stream_id) { return (uint64_t)(stream_id >> 2); @@ -51,9 +48,6 @@ static uint64_t id_from_stream_id(int64_t stream_id) { int ngtcp2_idtr_open(ngtcp2_idtr *idtr, int64_t stream_id) { uint64_t q; - assert((idtr->server && (stream_id % 2)) || - (!idtr->server && (stream_id % 2)) == 0); - q = id_from_stream_id(stream_id); if (ngtcp2_gaptr_is_pushed(&idtr->gap, q, 1)) { @@ -63,17 +57,10 @@ int ngtcp2_idtr_open(ngtcp2_idtr *idtr, int64_t stream_id) { return ngtcp2_gaptr_push(&idtr->gap, q, 1); } -int ngtcp2_idtr_is_open(ngtcp2_idtr *idtr, int64_t stream_id) { +int ngtcp2_idtr_is_open(const ngtcp2_idtr *idtr, int64_t stream_id) { uint64_t q; - assert((idtr->server && (stream_id % 2)) || - (!idtr->server && (stream_id % 2)) == 0); - q = id_from_stream_id(stream_id); return ngtcp2_gaptr_is_pushed(&idtr->gap, q, 1); } - -uint64_t ngtcp2_idtr_first_gap(ngtcp2_idtr *idtr) { - return ngtcp2_gaptr_first_gap_offset(&idtr->gap); -} diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_idtr.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_idtr.h index edb8c68c8db9b5..0671f5ed91a5cd 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_idtr.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_idtr.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -38,21 +38,17 @@ * ngtcp2_idtr tracks the usage of stream ID. */ typedef struct ngtcp2_idtr { - /* gap maintains the range of ID which is not used yet. Initially, - its range is [0, UINT64_MAX). */ + /* gap maintains the range of an internal ID which is not used yet. + Initially, its range is [0, UINT64_MAX). The internal ID and + stream ID are in the different number spaces. See + id_from_stream_id to convert a stream ID to an internal ID. */ ngtcp2_gaptr gap; - /* server is nonzero if this object records server initiated stream - ID. */ - int server; } ngtcp2_idtr; /* * ngtcp2_idtr_init initializes |idtr|. - * - * If this object records server initiated ID (even number), set - * |server| to nonzero. */ -void ngtcp2_idtr_init(ngtcp2_idtr *idtr, int server, const ngtcp2_mem *mem); +void ngtcp2_idtr_init(ngtcp2_idtr *idtr, const ngtcp2_mem *mem); /* * ngtcp2_idtr_free frees resources allocated for |idtr|. @@ -60,30 +56,21 @@ void ngtcp2_idtr_init(ngtcp2_idtr *idtr, int server, const ngtcp2_mem *mem); void ngtcp2_idtr_free(ngtcp2_idtr *idtr); /* - * ngtcp2_idtr_open claims that |stream_id| is in used. + * ngtcp2_idtr_open claims that |stream_id| is in use. * * It returns 0 if it succeeds, or one of the following negative error * codes: * * NGTCP2_ERR_STREAM_IN_USE - * ID has already been used. + * |stream_id| has already been used. * NGTCP2_ERR_NOMEM * Out of memory. */ int ngtcp2_idtr_open(ngtcp2_idtr *idtr, int64_t stream_id); /* - * ngtcp2_idtr_open tells whether ID |stream_id| is in used or not. - * - * It returns nonzero if |stream_id| is used. - */ -int ngtcp2_idtr_is_open(ngtcp2_idtr *idtr, int64_t stream_id); - -/* - * ngtcp2_idtr_first_gap returns the first id of first gap. If there - * is no gap, it returns UINT64_MAX. The returned id is an id space - * used in this object internally, and not stream ID. + * ngtcp2_idtr_open returns nonzero if |stream_id| is in use. */ -uint64_t ngtcp2_idtr_first_gap(ngtcp2_idtr *idtr); +int ngtcp2_idtr_is_open(const ngtcp2_idtr *idtr, int64_t stream_id); -#endif /* NGTCP2_IDTR_H */ +#endif /* !defined(NGTCP2_IDTR_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ksl.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ksl.c index 0ccc048b5b16b1..5e74f647241816 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ksl.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ksl.c @@ -35,11 +35,13 @@ static ngtcp2_ksl_blk null_blk = {{{NULL, NULL, 0, 0, {0}}}}; -ngtcp2_objalloc_def(ksl_blk, ngtcp2_ksl_blk, oplent); +ngtcp2_objalloc_def(ksl_blk, ngtcp2_ksl_blk, oplent) static size_t ksl_nodelen(size_t keylen) { - return (sizeof(ngtcp2_ksl_node) + keylen - sizeof(uint64_t) + 0xfu) & - ~(uintptr_t)0xfu; + assert(keylen >= sizeof(uint64_t)); + + return (sizeof(ngtcp2_ksl_node) + keylen - sizeof(uint64_t) + 0x7u) & + ~(uintptr_t)0x7u; } static size_t ksl_blklen(size_t nodelen) { @@ -55,20 +57,21 @@ static void ksl_node_set_key(ngtcp2_ksl *ksl, ngtcp2_ksl_node *node, memcpy(node->key, key, ksl->keylen); } -void ngtcp2_ksl_init(ngtcp2_ksl *ksl, ngtcp2_ksl_compar compar, size_t keylen, +void ngtcp2_ksl_init(ngtcp2_ksl *ksl, ngtcp2_ksl_compar compar, + ngtcp2_ksl_search search, size_t keylen, const ngtcp2_mem *mem) { size_t nodelen = ksl_nodelen(keylen); ngtcp2_objalloc_init(&ksl->blkalloc, - ((ksl_blklen(nodelen) + 0xfu) & ~(uintptr_t)0xfu) * 8, - mem); + (ksl_blklen(nodelen) + 0xfu) & ~(uintptr_t)0xfu, mem); ksl->head = NULL; ksl->front = ksl->back = NULL; ksl->compar = compar; + ksl->search = search; + ksl->n = 0; ksl->keylen = keylen; ksl->nodelen = nodelen; - ksl->n = 0; } static ngtcp2_ksl_blk *ksl_blk_objalloc_new(ngtcp2_ksl *ksl) { @@ -82,6 +85,7 @@ static void ksl_blk_objalloc_del(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) { static int ksl_head_init(ngtcp2_ksl *ksl) { ngtcp2_ksl_blk *head = ksl_blk_objalloc_new(ksl); + if (!head) { return NGTCP2_ERR_NOMEM; } @@ -111,7 +115,7 @@ static void ksl_free_blk(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) { ksl_blk_objalloc_del(ksl, blk); } -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ void ngtcp2_ksl_free(ngtcp2_ksl *ksl) { if (!ksl || !ksl->head) { @@ -120,7 +124,7 @@ void ngtcp2_ksl_free(ngtcp2_ksl *ksl) { #ifdef NOMEMPOOL ksl_free_blk(ksl, ksl->head); -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ ngtcp2_objalloc_free(&ksl->blkalloc); } @@ -143,21 +147,22 @@ static ngtcp2_ksl_blk *ksl_split_blk(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) { rblk->next = blk->next; blk->next = rblk; + if (rblk->next) { rblk->next->prev = rblk; } else if (ksl->back == blk) { ksl->back = rblk; } + rblk->prev = blk; rblk->leaf = blk->leaf; rblk->n = blk->n / 2; + blk->n -= rblk->n; - memcpy(rblk->nodes, blk->nodes + ksl->nodelen * (blk->n - rblk->n), + memcpy(rblk->nodes, blk->nodes + ksl->nodelen * blk->n, ksl->nodelen * rblk->n); - blk->n -= rblk->n; - assert(blk->n >= NGTCP2_KSL_MIN_NBLK); assert(rblk->n >= NGTCP2_KSL_MIN_NBLK); @@ -173,7 +178,7 @@ static ngtcp2_ksl_blk *ksl_split_blk(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) { * codes: * * NGTCP2_ERR_NOMEM - * Out of memory. + * Out of memory. */ static int ksl_split_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { ngtcp2_ksl_node *node; @@ -207,7 +212,7 @@ static int ksl_split_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { * codes: * * NGTCP2_ERR_NOMEM - * Out of memory. + * Out of memory. */ static int ksl_split_head(ngtcp2_ksl *ksl) { ngtcp2_ksl_blk *rblk = NULL, *lblk, *nhead = NULL; @@ -221,10 +226,12 @@ static int ksl_split_head(ngtcp2_ksl *ksl) { lblk = ksl->head; nhead = ksl_blk_objalloc_new(ksl); + if (nhead == NULL) { ksl_blk_objalloc_del(ksl, rblk); return NGTCP2_ERR_NOMEM; } + nhead->next = nhead->prev = NULL; nhead->n = 2; nhead->leaf = 0; @@ -243,9 +250,9 @@ static int ksl_split_head(ngtcp2_ksl *ksl) { } /* - * insert_node inserts a node whose key is |key| with the associated - * |data| at the index of |i|. This function assumes that the number - * of nodes contained by |blk| is strictly less than + * ksl_insert_node inserts a node whose key is |key| with the + * associated |data| at the index of |i|. This function assumes that + * the number of nodes contained by |blk| is strictly less than * NGTCP2_KSL_MAX_NBLK. */ static void ksl_insert_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i, @@ -264,19 +271,6 @@ static void ksl_insert_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i, ++blk->n; } -static size_t ksl_bsearch(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, - const ngtcp2_ksl_key *key, ngtcp2_ksl_compar compar) { - size_t i; - ngtcp2_ksl_node *node; - - for (i = 0, node = (ngtcp2_ksl_node *)(void *)blk->nodes; - i < blk->n && compar((ngtcp2_ksl_key *)node->key, key); - ++i, node = (ngtcp2_ksl_node *)(void *)((uint8_t *)node + ksl->nodelen)) - ; - - return i; -} - int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, const ngtcp2_ksl_key *key, void *data) { ngtcp2_ksl_blk *blk; @@ -291,18 +285,17 @@ int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, } } - blk = ksl->head; - - if (blk->n == NGTCP2_KSL_MAX_NBLK) { + if (ksl->head->n == NGTCP2_KSL_MAX_NBLK) { rv = ksl_split_head(ksl); if (rv != 0) { return rv; } - blk = ksl->head; } + blk = ksl->head; + for (;;) { - i = ksl_bsearch(ksl, blk, key, ksl->compar); + i = ksl->search(ksl, blk, key); if (blk->leaf) { if (i < blk->n && @@ -310,13 +303,17 @@ int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, if (it) { *it = ngtcp2_ksl_end(ksl); } + return NGTCP2_ERR_INVALID_ARGUMENT; } + ksl_insert_node(ksl, blk, i, key, data); ++ksl->n; + if (it) { ngtcp2_ksl_it_init(it, ksl, blk, i); } + return 0; } @@ -329,16 +326,21 @@ int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, if (rv != 0) { return rv; } + node = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1); } + ksl_node_set_key(ksl, node, key); blk = node->blk; } + ksl_insert_node(ksl, blk, blk->n, key, data); ++ksl->n; + if (it) { ngtcp2_ksl_it_init(it, ksl, blk, blk->n - 1); } + return 0; } @@ -349,8 +351,10 @@ int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, if (rv != 0) { return rv; } + if (ksl->compar((ngtcp2_ksl_key *)node->key, key)) { node = ngtcp2_ksl_nth_node(ksl, blk, i + 1); + if (ksl->compar((ngtcp2_ksl_key *)node->key, key)) { ksl_node_set_key(ksl, node, key); } @@ -376,19 +380,22 @@ static void ksl_remove_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { * ksl_merge_node merges 2 nodes which are the nodes at the index of * |i| and |i + 1|. * - * If |blk| is the direct descendant of head (root) block and the head - * block contains just 2 nodes, the merged block becomes head block, - * which decreases the height of |ksl| by 1. + * If |blk| is the head (root) block and it contains just 2 nodes + * before merging nodes, the merged block becomes head block, which + * decreases the height of |ksl| by 1. * * This function returns the pointer to the merged block. */ static ngtcp2_ksl_blk *ksl_merge_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { + ngtcp2_ksl_node *lnode; ngtcp2_ksl_blk *lblk, *rblk; assert(i + 1 < blk->n); - lblk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk; + lnode = ngtcp2_ksl_nth_node(ksl, blk, i); + + lblk = lnode->blk; rblk = ngtcp2_ksl_nth_node(ksl, blk, i + 1)->blk; assert(lblk->n + rblk->n < NGTCP2_KSL_MAX_NBLK); @@ -398,6 +405,7 @@ static ngtcp2_ksl_blk *ksl_merge_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, lblk->n += rblk->n; lblk->next = rblk->next; + if (lblk->next) { lblk->next->prev = lblk; } else if (ksl->back == rblk) { @@ -411,7 +419,7 @@ static ngtcp2_ksl_blk *ksl_merge_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, ksl->head = lblk; } else { ksl_remove_node(ksl, blk, i + 1); - ksl_node_set_key(ksl, ngtcp2_ksl_nth_node(ksl, blk, i), + ksl_node_set_key(ksl, lnode, ngtcp2_ksl_nth_node(ksl, lblk, lblk->n - 1)->key); } @@ -425,6 +433,7 @@ static ngtcp2_ksl_blk *ksl_merge_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, */ static void ksl_shift_left(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { ngtcp2_ksl_node *lnode, *rnode; + ngtcp2_ksl_blk *lblk, *rblk; size_t n; assert(i > 0); @@ -432,35 +441,37 @@ static void ksl_shift_left(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { lnode = ngtcp2_ksl_nth_node(ksl, blk, i - 1); rnode = ngtcp2_ksl_nth_node(ksl, blk, i); - assert(lnode->blk->n < NGTCP2_KSL_MAX_NBLK); - assert(rnode->blk->n > NGTCP2_KSL_MIN_NBLK); + lblk = lnode->blk; + rblk = rnode->blk; + + assert(lblk->n < NGTCP2_KSL_MAX_NBLK); + assert(rblk->n > NGTCP2_KSL_MIN_NBLK); - n = (lnode->blk->n + rnode->blk->n + 1) / 2 - lnode->blk->n; + n = (lblk->n + rblk->n + 1) / 2 - lblk->n; assert(n > 0); - assert(lnode->blk->n <= NGTCP2_KSL_MAX_NBLK - n); - assert(rnode->blk->n >= NGTCP2_KSL_MIN_NBLK + n); + assert(lblk->n <= NGTCP2_KSL_MAX_NBLK - n); + assert(rblk->n >= NGTCP2_KSL_MIN_NBLK + n); - memcpy(lnode->blk->nodes + ksl->nodelen * lnode->blk->n, rnode->blk->nodes, - ksl->nodelen * n); + memcpy(lblk->nodes + ksl->nodelen * lblk->n, rblk->nodes, ksl->nodelen * n); - lnode->blk->n += (uint32_t)n; - rnode->blk->n -= (uint32_t)n; + lblk->n += (uint32_t)n; + rblk->n -= (uint32_t)n; - ksl_node_set_key( - ksl, lnode, ngtcp2_ksl_nth_node(ksl, lnode->blk, lnode->blk->n - 1)->key); + ksl_node_set_key(ksl, lnode, + ngtcp2_ksl_nth_node(ksl, lblk, lblk->n - 1)->key); - memmove(rnode->blk->nodes, rnode->blk->nodes + ksl->nodelen * n, - ksl->nodelen * rnode->blk->n); + memmove(rblk->nodes, rblk->nodes + ksl->nodelen * n, ksl->nodelen * rblk->n); } /* * ksl_shift_right moves the last nodes in blk->nodes[i]->blk->nodes * to blk->nodes[i + 1]->blk->nodes in a manner that they have the - * same amount of nodes as much as possible.. + * same amount of nodes as much as possible. */ static void ksl_shift_right(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { ngtcp2_ksl_node *lnode, *rnode; + ngtcp2_ksl_blk *lblk, *rblk; size_t n; assert(i < blk->n - 1); @@ -468,26 +479,27 @@ static void ksl_shift_right(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { lnode = ngtcp2_ksl_nth_node(ksl, blk, i); rnode = ngtcp2_ksl_nth_node(ksl, blk, i + 1); - assert(lnode->blk->n > NGTCP2_KSL_MIN_NBLK); - assert(rnode->blk->n < NGTCP2_KSL_MAX_NBLK); + lblk = lnode->blk; + rblk = rnode->blk; - n = (lnode->blk->n + rnode->blk->n + 1) / 2 - rnode->blk->n; + assert(lblk->n > NGTCP2_KSL_MIN_NBLK); + assert(rblk->n < NGTCP2_KSL_MAX_NBLK); + + n = (lblk->n + rblk->n + 1) / 2 - rblk->n; assert(n > 0); - assert(lnode->blk->n >= NGTCP2_KSL_MIN_NBLK + n); - assert(rnode->blk->n <= NGTCP2_KSL_MAX_NBLK - n); + assert(lblk->n >= NGTCP2_KSL_MIN_NBLK + n); + assert(rblk->n <= NGTCP2_KSL_MAX_NBLK - n); - memmove(rnode->blk->nodes + ksl->nodelen * n, rnode->blk->nodes, - ksl->nodelen * rnode->blk->n); + memmove(rblk->nodes + ksl->nodelen * n, rblk->nodes, ksl->nodelen * rblk->n); - rnode->blk->n += (uint32_t)n; - lnode->blk->n -= (uint32_t)n; + rblk->n += (uint32_t)n; + lblk->n -= (uint32_t)n; - memcpy(rnode->blk->nodes, lnode->blk->nodes + ksl->nodelen * lnode->blk->n, - ksl->nodelen * n); + memcpy(rblk->nodes, lblk->nodes + ksl->nodelen * lblk->n, ksl->nodelen * n); - ksl_node_set_key( - ksl, lnode, ngtcp2_ksl_nth_node(ksl, lnode->blk, lnode->blk->n - 1)->key); + ksl_node_set_key(ksl, lnode, + ngtcp2_ksl_nth_node(ksl, lblk, lblk->n - 1)->key); } /* @@ -531,23 +543,24 @@ int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, ngtcp2_ksl_node *node; size_t i; - if (!ksl->head) { + if (!blk) { return NGTCP2_ERR_INVALID_ARGUMENT; } if (!blk->leaf && blk->n == 2 && ngtcp2_ksl_nth_node(ksl, blk, 0)->blk->n == NGTCP2_KSL_MIN_NBLK && ngtcp2_ksl_nth_node(ksl, blk, 1)->blk->n == NGTCP2_KSL_MIN_NBLK) { - blk = ksl_merge_node(ksl, ksl->head, 0); + blk = ksl_merge_node(ksl, blk, 0); } for (;;) { - i = ksl_bsearch(ksl, blk, key, ksl->compar); + i = ksl->search(ksl, blk, key); if (i == blk->n) { if (it) { *it = ngtcp2_ksl_end(ksl); } + return NGTCP2_ERR_INVALID_ARGUMENT; } @@ -556,10 +569,13 @@ int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, if (it) { *it = ngtcp2_ksl_end(ksl); } + return NGTCP2_ERR_INVALID_ARGUMENT; } + ksl_remove_node(ksl, blk, i); --ksl->n; + if (it) { if (blk->n == i && blk->next) { ngtcp2_ksl_it_init(it, ksl, blk->next, 0); @@ -567,6 +583,7 @@ int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, ngtcp2_ksl_it_init(it, ksl, blk, i); } } + return 0; } @@ -583,6 +600,7 @@ int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, ngtcp2_ksl_nth_node(ksl, blk, i + 1)->blk->n > NGTCP2_KSL_MIN_NBLK) { ksl_shift_left(ksl, blk, i + 1); blk = node->blk; + continue; } @@ -590,6 +608,7 @@ int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, ngtcp2_ksl_nth_node(ksl, blk, i - 1)->blk->n > NGTCP2_KSL_MIN_NBLK) { ksl_shift_right(ksl, blk, i - 1); blk = node->blk; + continue; } @@ -604,50 +623,14 @@ int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, } } -ngtcp2_ksl_it ngtcp2_ksl_lower_bound(ngtcp2_ksl *ksl, +ngtcp2_ksl_it ngtcp2_ksl_lower_bound(const ngtcp2_ksl *ksl, const ngtcp2_ksl_key *key) { - ngtcp2_ksl_blk *blk = ksl->head; - ngtcp2_ksl_it it; - size_t i; - - if (!blk) { - ngtcp2_ksl_it_init(&it, ksl, &null_blk, 0); - return it; - } - - for (;;) { - i = ksl_bsearch(ksl, blk, key, ksl->compar); - - if (blk->leaf) { - if (i == blk->n && blk->next) { - blk = blk->next; - i = 0; - } - ngtcp2_ksl_it_init(&it, ksl, blk, i); - return it; - } - - if (i == blk->n) { - /* This happens if descendant has smaller key. Fast forward to - find last node in this subtree. */ - for (; !blk->leaf; blk = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1)->blk) - ; - if (blk->next) { - blk = blk->next; - i = 0; - } else { - i = blk->n; - } - ngtcp2_ksl_it_init(&it, ksl, blk, i); - return it; - } - blk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk; - } + return ngtcp2_ksl_lower_bound_search(ksl, key, ksl->search); } -ngtcp2_ksl_it ngtcp2_ksl_lower_bound_compar(ngtcp2_ksl *ksl, +ngtcp2_ksl_it ngtcp2_ksl_lower_bound_search(const ngtcp2_ksl *ksl, const ngtcp2_ksl_key *key, - ngtcp2_ksl_compar compar) { + ngtcp2_ksl_search search) { ngtcp2_ksl_blk *blk = ksl->head; ngtcp2_ksl_it it; size_t i; @@ -658,14 +641,16 @@ ngtcp2_ksl_it ngtcp2_ksl_lower_bound_compar(ngtcp2_ksl *ksl, } for (;;) { - i = ksl_bsearch(ksl, blk, key, compar); + i = search(ksl, blk, key); if (blk->leaf) { if (i == blk->n && blk->next) { blk = blk->next; i = 0; } + ngtcp2_ksl_it_init(&it, ksl, blk, i); + return it; } @@ -674,15 +659,19 @@ ngtcp2_ksl_it ngtcp2_ksl_lower_bound_compar(ngtcp2_ksl *ksl, find last node in this subtree. */ for (; !blk->leaf; blk = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1)->blk) ; + if (blk->next) { blk = blk->next; i = 0; } else { i = blk->n; } + ngtcp2_ksl_it_init(&it, ksl, blk, i); + return it; } + blk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk; } } @@ -696,7 +685,7 @@ void ngtcp2_ksl_update_key(ngtcp2_ksl *ksl, const ngtcp2_ksl_key *old_key, assert(ksl->head); for (;;) { - i = ksl_bsearch(ksl, blk, old_key, ksl->compar); + i = ksl->search(ksl, blk, old_key); assert(i < blk->n); node = ngtcp2_ksl_nth_node(ksl, blk, i); @@ -704,6 +693,7 @@ void ngtcp2_ksl_update_key(ngtcp2_ksl *ksl, const ngtcp2_ksl_key *old_key, if (blk->leaf) { assert(key_equal(ksl->compar, (ngtcp2_ksl_key *)node->key, old_key)); ksl_node_set_key(ksl, node, new_key); + return; } @@ -716,7 +706,7 @@ void ngtcp2_ksl_update_key(ngtcp2_ksl *ksl, const ngtcp2_ksl_key *old_key, } } -size_t ngtcp2_ksl_len(ngtcp2_ksl *ksl) { return ksl->n; } +size_t ngtcp2_ksl_len(const ngtcp2_ksl *ksl) { return ksl->n; } void ngtcp2_ksl_clear(ngtcp2_ksl *ksl) { if (!ksl->head) { @@ -725,7 +715,7 @@ void ngtcp2_ksl_clear(ngtcp2_ksl *ksl) { #ifdef NOMEMPOOL ksl_free_blk(ksl, ksl->head); -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ ksl->front = ksl->back = ksl->head = NULL; ksl->n = 0; @@ -734,7 +724,8 @@ void ngtcp2_ksl_clear(ngtcp2_ksl *ksl) { } #ifndef WIN32 -static void ksl_print(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t level) { +static void ksl_print(const ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, + size_t level) { size_t i; ngtcp2_ksl_node *node; @@ -745,7 +736,9 @@ static void ksl_print(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t level) { node = ngtcp2_ksl_nth_node(ksl, blk, i); fprintf(stderr, " %" PRId64, *(int64_t *)(void *)node->key); } + fprintf(stderr, "\n"); + return; } @@ -754,14 +747,14 @@ static void ksl_print(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t level) { } } -void ngtcp2_ksl_print(ngtcp2_ksl *ksl) { +void ngtcp2_ksl_print(const ngtcp2_ksl *ksl) { if (!ksl->head) { return; } ksl_print(ksl, ksl->head, 0); } -#endif /* !WIN32 */ +#endif /* !defined(WIN32) */ ngtcp2_ksl_it ngtcp2_ksl_begin(const ngtcp2_ksl *ksl) { ngtcp2_ksl_it it; @@ -815,9 +808,49 @@ int ngtcp2_ksl_range_compar(const ngtcp2_ksl_key *lhs, return a->begin < b->begin; } +ngtcp2_ksl_search_def(range, ngtcp2_ksl_range_compar) + +size_t ngtcp2_ksl_range_search(const ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key) { + return ksl_range_search(ksl, blk, key); +} + int ngtcp2_ksl_range_exclusive_compar(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) { const ngtcp2_range *a = lhs, *b = rhs; - return a->begin < b->begin && - !(ngtcp2_max(a->begin, b->begin) < ngtcp2_min(a->end, b->end)); + return a->begin < b->begin && !(ngtcp2_max_uint64(a->begin, b->begin) < + ngtcp2_min_uint64(a->end, b->end)); +} + +ngtcp2_ksl_search_def(range_exclusive, ngtcp2_ksl_range_exclusive_compar) + +size_t ngtcp2_ksl_range_exclusive_search(const ngtcp2_ksl *ksl, + ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key) { + return ksl_range_exclusive_search(ksl, blk, key); +} + +int ngtcp2_ksl_uint64_less(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs) { + return *(uint64_t *)lhs < *(uint64_t *)rhs; +} + +ngtcp2_ksl_search_def(uint64_less, ngtcp2_ksl_uint64_less) + +size_t ngtcp2_ksl_uint64_less_search(const ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key) { + return ksl_uint64_less_search(ksl, blk, key); +} + +int ngtcp2_ksl_int64_greater(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs) { + return *(int64_t *)lhs > *(int64_t *)rhs; +} + +ngtcp2_ksl_search_def(int64_greater, ngtcp2_ksl_int64_greater) + +size_t ngtcp2_ksl_int64_greater_search(const ngtcp2_ksl *ksl, + ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key) { + return ksl_int64_greater_search(ksl, blk, key); } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ksl.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ksl.h index 7e08f15cdae6e8..de78bcb8070f53 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ksl.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ksl.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -35,16 +35,12 @@ #include "ngtcp2_objalloc.h" -/* - * Skip List using single key instead of range. - */ - #define NGTCP2_KSL_DEGR 16 /* NGTCP2_KSL_MAX_NBLK is the maximum number of nodes which a single block can contain. */ #define NGTCP2_KSL_MAX_NBLK (2 * NGTCP2_KSL_DEGR - 1) /* NGTCP2_KSL_MIN_NBLK is the minimum number of nodes which a single - block other than root must contains. */ + block other than root must contain. */ #define NGTCP2_KSL_MIN_NBLK (NGTCP2_KSL_DEGR - 1) /* @@ -85,7 +81,8 @@ struct ngtcp2_ksl_blk { struct { /* next points to the next block if leaf field is nonzero. */ ngtcp2_ksl_blk *next; - /* prev points to the previous block if leaf field is nonzero. */ + /* prev points to the previous block if leaf field is + nonzero. */ ngtcp2_ksl_blk *prev; /* n is the number of nodes this object contains in nodes. */ uint32_t n; @@ -106,7 +103,7 @@ struct ngtcp2_ksl_blk { }; }; -ngtcp2_objalloc_decl(ksl_blk, ngtcp2_ksl_blk, oplent); +ngtcp2_objalloc_decl(ksl_blk, ngtcp2_ksl_blk, oplent) /* * ngtcp2_ksl_compar is a function type which returns nonzero if key @@ -117,10 +114,37 @@ typedef int (*ngtcp2_ksl_compar)(const ngtcp2_ksl_key *lhs, typedef struct ngtcp2_ksl ngtcp2_ksl; +/* + * ngtcp2_ksl_search is a function to search for the first element in + * |blk|->nodes which is not ordered before |key|. It returns the + * index of such element. It returns |blk|->n if there is no such + * element. + */ +typedef size_t (*ngtcp2_ksl_search)(const ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key); + +/* + * ngtcp2_ksl_search_def is a macro to implement ngtcp2_ksl_search + * with COMPAR which is supposed to be ngtcp2_ksl_compar. + */ +#define ngtcp2_ksl_search_def(NAME, COMPAR) \ + static size_t ksl_##NAME##_search( \ + const ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, const ngtcp2_ksl_key *key) { \ + size_t i; \ + ngtcp2_ksl_node *node; \ + \ + for (i = 0, node = (ngtcp2_ksl_node *)(void *)blk->nodes; \ + i < blk->n && COMPAR((ngtcp2_ksl_key *)node->key, key); ++i, \ + node = (ngtcp2_ksl_node *)(void *)((uint8_t *)node + ksl->nodelen)) \ + ; \ + \ + return i; \ + } + typedef struct ngtcp2_ksl_it ngtcp2_ksl_it; /* - * ngtcp2_ksl_it is a forward iterator to iterate nodes. + * ngtcp2_ksl_it is a bidirectional iterator to iterate nodes. */ struct ngtcp2_ksl_it { const ngtcp2_ksl *ksl; @@ -140,6 +164,8 @@ struct ngtcp2_ksl { /* back points to the last leaf block. */ ngtcp2_ksl_blk *back; ngtcp2_ksl_compar compar; + ngtcp2_ksl_search search; + /* n is the number of elements stored. */ size_t n; /* keylen is the size of key */ size_t keylen; @@ -150,9 +176,12 @@ struct ngtcp2_ksl { /* * ngtcp2_ksl_init initializes |ksl|. |compar| specifies compare - * function. |keylen| is the length of key. + * function. |search| is a search function which must use |compar|. + * |keylen| is the length of key and must be at least + * sizeof(uint64_t). */ -void ngtcp2_ksl_init(ngtcp2_ksl *ksl, ngtcp2_ksl_compar compar, size_t keylen, +void ngtcp2_ksl_init(ngtcp2_ksl *ksl, ngtcp2_ksl_compar compar, + ngtcp2_ksl_search search, size_t keylen, const ngtcp2_mem *mem); /* @@ -165,15 +194,15 @@ void ngtcp2_ksl_free(ngtcp2_ksl *ksl); /* * ngtcp2_ksl_insert inserts |key| with its associated |data|. On * successful insertion, the iterator points to the inserted node is - * stored in |*it|. + * stored in |*it| if |it| is not NULL. * * This function returns 0 if it succeeds, or one of the following * negative error codes: * * NGTCP2_ERR_NOMEM - * Out of memory. + * Out of memory. * NGTCP2_ERR_INVALID_ARGUMENT - * |key| already exists. + * |key| already exists. */ int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, const ngtcp2_ksl_key *key, void *data); @@ -184,13 +213,14 @@ int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, * This function assigns the iterator to |*it|, which points to the * node which is located at the right next of the removed node if |it| * is not NULL. If |key| is not found, no deletion takes place and - * the return value of ngtcp2_ksl_end(ksl) is assigned to |*it|. + * the return value of ngtcp2_ksl_end(ksl) is assigned to |*it| if + * |it| is not NULL. * * This function returns 0 if it succeeds, or one of the following * negative error codes: * * NGTCP2_ERR_INVALID_ARGUMENT - * |key| does not exist. + * |key| does not exist. */ int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, const ngtcp2_ksl_key *key); @@ -213,16 +243,16 @@ int ngtcp2_ksl_remove_hint(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, * node, it returns the iterator which satisfies ngtcp2_ksl_it_end(it) * != 0. */ -ngtcp2_ksl_it ngtcp2_ksl_lower_bound(ngtcp2_ksl *ksl, +ngtcp2_ksl_it ngtcp2_ksl_lower_bound(const ngtcp2_ksl *ksl, const ngtcp2_ksl_key *key); /* - * ngtcp2_ksl_lower_bound_compar works like ngtcp2_ksl_lower_bound, - * but it takes custom function |compar| to do lower bound search. + * ngtcp2_ksl_lower_bound_search works like ngtcp2_ksl_lower_bound, + * but it takes custom function |search| to do lower bound search. */ -ngtcp2_ksl_it ngtcp2_ksl_lower_bound_compar(ngtcp2_ksl *ksl, +ngtcp2_ksl_it ngtcp2_ksl_lower_bound_search(const ngtcp2_ksl *ksl, const ngtcp2_ksl_key *key, - ngtcp2_ksl_compar compar); + ngtcp2_ksl_search search); /* * ngtcp2_ksl_update_key replaces the key of nodes which has |old_key| @@ -235,7 +265,8 @@ void ngtcp2_ksl_update_key(ngtcp2_ksl *ksl, const ngtcp2_ksl_key *old_key, /* * ngtcp2_ksl_begin returns the iterator which points to the first * node. If there is no node in |ksl|, it returns the iterator which - * satisfies ngtcp2_ksl_it_end(it) != 0. + * satisfies both ngtcp2_ksl_it_begin(it) != 0 and + * ngtcp2_ksl_it_end(it) != 0. */ ngtcp2_ksl_it ngtcp2_ksl_begin(const ngtcp2_ksl *ksl); @@ -243,14 +274,15 @@ ngtcp2_ksl_it ngtcp2_ksl_begin(const ngtcp2_ksl *ksl); * ngtcp2_ksl_end returns the iterator which points to the node * following the last node. The returned object satisfies * ngtcp2_ksl_it_end(). If there is no node in |ksl|, it returns the - * iterator which satisfies ngtcp2_ksl_it_begin(it) != 0. + * iterator which satisfies ngtcp2_ksl_it_begin(it) != 0 and + * ngtcp2_ksl_it_end(it) != 0. */ ngtcp2_ksl_it ngtcp2_ksl_end(const ngtcp2_ksl *ksl); /* * ngtcp2_ksl_len returns the number of elements stored in |ksl|. */ -size_t ngtcp2_ksl_len(ngtcp2_ksl *ksl); +size_t ngtcp2_ksl_len(const ngtcp2_ksl *ksl); /* * ngtcp2_ksl_clear removes all elements stored in |ksl|. @@ -269,8 +301,8 @@ void ngtcp2_ksl_clear(ngtcp2_ksl *ksl); * that the key is of type int64_t. This function should be used for * the debugging purpose only. */ -void ngtcp2_ksl_print(ngtcp2_ksl *ksl); -#endif /* !WIN32 */ +void ngtcp2_ksl_print(const ngtcp2_ksl *ksl); +#endif /* !defined(WIN32) */ /* * ngtcp2_ksl_it_init initializes |it|. @@ -293,8 +325,8 @@ void ngtcp2_ksl_it_init(ngtcp2_ksl_it *it, const ngtcp2_ksl *ksl, */ #define ngtcp2_ksl_it_next(IT) \ (++(IT)->i == (IT)->blk->n && (IT)->blk->next \ - ? ((IT)->blk = (IT)->blk->next, (IT)->i = 0) \ - : 0) + ? ((IT)->blk = (IT)->blk->next, (IT)->i = 0) \ + : 0) /* * ngtcp2_ksl_it_prev moves backward the iterator by one. It is @@ -304,16 +336,16 @@ void ngtcp2_ksl_it_init(ngtcp2_ksl_it *it, const ngtcp2_ksl *ksl, void ngtcp2_ksl_it_prev(ngtcp2_ksl_it *it); /* - * ngtcp2_ksl_it_end returns nonzero if |it| points to the beyond the - * last node. + * ngtcp2_ksl_it_end returns nonzero if |it| points to the one beyond + * the last node. */ #define ngtcp2_ksl_it_end(IT) \ ((IT)->blk->n == (IT)->i && (IT)->blk->next == NULL) /* * ngtcp2_ksl_it_begin returns nonzero if |it| points to the first - * node. |it| might satisfy both ngtcp2_ksl_it_begin(&it) and - * ngtcp2_ksl_it_end(&it) if the skip list has no node. + * node. |it| might satisfy both ngtcp2_ksl_it_begin(it) != 0 and + * ngtcp2_ksl_it_end(it) != 0 if the skip list has no node. */ int ngtcp2_ksl_it_begin(const ngtcp2_ksl_it *it); @@ -327,21 +359,67 @@ int ngtcp2_ksl_it_begin(const ngtcp2_ksl_it *it); /* * ngtcp2_ksl_range_compar is an implementation of ngtcp2_ksl_compar. - * lhs->ptr and rhs->ptr must point to ngtcp2_range object and the - * function returns nonzero if (const ngtcp2_range *)(lhs->ptr)->begin - * < (const ngtcp2_range *)(rhs->ptr)->begin. + * |lhs| and |rhs| must point to ngtcp2_range object, and the function + * returns nonzero if ((const ngtcp2_range *)lhs)->begin < ((const + * ngtcp2_range *)rhs)->begin. */ int ngtcp2_ksl_range_compar(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs); +/* + * ngtcp2_ksl_range_search is an implementation of ngtcp2_ksl_search + * that uses ngtcp2_ksl_range_compar. + */ +size_t ngtcp2_ksl_range_search(const ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key); + /* * ngtcp2_ksl_range_exclusive_compar is an implementation of - * ngtcp2_ksl_compar. lhs->ptr and rhs->ptr must point to - * ngtcp2_range object and the function returns nonzero if (const - * ngtcp2_range *)(lhs->ptr)->begin < (const ngtcp2_range - * *)(rhs->ptr)->begin and the 2 ranges do not intersect. + * ngtcp2_ksl_compar. |lhs| and |rhs| must point to ngtcp2_range + * object, and the function returns nonzero if ((const ngtcp2_range + * *)lhs)->begin < ((const ngtcp2_range *)rhs)->begin, and the 2 + * ranges do not intersect. */ int ngtcp2_ksl_range_exclusive_compar(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs); -#endif /* NGTCP2_KSL_H */ +/* + * ngtcp2_ksl_range_exclusive_search is an implementation of + * ngtcp2_ksl_search that uses ngtcp2_ksl_range_exclusive_compar. + */ +size_t ngtcp2_ksl_range_exclusive_search(const ngtcp2_ksl *ksl, + ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key); + +/* + * ngtcp2_ksl_uint64_less is an implementation of ngtcp2_ksl_compar. + * |lhs| and |rhs| must point to uint64_t objects, and the function + * returns nonzero if *(uint64_t *)|lhs| < *(uint64_t *)|rhs|. + */ +int ngtcp2_ksl_uint64_less(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs); + +/* + * ngtcp2_ksl_uint64_less_search is an implementation of + * ngtcp2_ksl_search that uses ngtcp2_ksl_uint64_less. + */ +size_t ngtcp2_ksl_uint64_less_search(const ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key); + +/* + * ngtcp2_ksl_int64_greater is an implementation of ngtcp2_ksl_compar. + * |lhs| and |rhs| must point to int64_t objects, and the function + * returns nonzero if *(int64_t *)|lhs| > *(int64_t *)|rhs|. + */ +int ngtcp2_ksl_int64_greater(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs); + +/* + * ngtcp2_ksl_int64_greater_search is an implementation of + * ngtcp2_ksl_search that uses ngtcp2_ksl_int64_greater. + */ +size_t ngtcp2_ksl_int64_greater_search(const ngtcp2_ksl *ksl, + ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key); + +#endif /* !defined(NGTCP2_KSL_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_log.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_log.c index 93922a29c319f4..fc4eb443517d40 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_log.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_log.c @@ -27,7 +27,7 @@ #include #ifdef HAVE_UNISTD_H # include -#endif +#endif /* defined(HAVE_UNISTD_H) */ #include #include @@ -99,11 +99,11 @@ void ngtcp2_log_init(ngtcp2_log *log, const ngtcp2_cid *scid, #define NGTCP2_LOG_FRM_HD_FIELDS(DIR) \ timestamp_cast(log->last_ts - log->ts), (const char *)log->scid, "frm", \ - (DIR), hd->pkt_num, strpkttype(hd) + (DIR), hd->pkt_num, strpkttype(hd) #define NGTCP2_LOG_PKT_HD_FIELDS(DIR) \ timestamp_cast(log->last_ts - log->ts), (const char *)log->scid, "pkt", \ - (DIR), hd->pkt_num, strpkttype(hd) + (DIR), hd->pkt_num, strpkttype(hd) #define NGTCP2_LOG_TP_HD_FIELDS \ timestamp_cast(log->last_ts - log->ts), (const char *)log->scid, "cry" @@ -223,12 +223,12 @@ static uint64_t timestamp_cast(uint64_t ns) { return ns / NGTCP2_MILLISECONDS; } static void log_fr_stream(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, const ngtcp2_stream *fr, const char *dir) { log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " STREAM(0x%02" PRIx64 ") id=0x%" PRIx64 - " fin=%d offset=%" PRIu64 " len=%" PRIu64 " uni=%d"), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type | fr->flags, fr->stream_id, - fr->fin, fr->offset, ngtcp2_vec_len(fr->data, fr->datacnt), - (fr->stream_id & 0x2) != 0); + log->user_data, + (NGTCP2_LOG_PKT " STREAM(0x%02" PRIx64 ") id=0x%" PRIx64 + " fin=%d offset=%" PRIu64 " len=%" PRIu64 " uni=%d"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type | fr->flags, fr->stream_id, fr->fin, + fr->offset, ngtcp2_vec_len(fr->data, fr->datacnt), + (fr->stream_id & 0x2) != 0); } static void log_fr_ack(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, @@ -236,13 +236,12 @@ static void log_fr_ack(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, int64_t largest_ack, min_ack; size_t i; - log->log_printf(log->user_data, - (NGTCP2_LOG_PKT " ACK(0x%02" PRIx64 ") largest_ack=%" PRId64 - " ack_delay=%" PRIu64 "(%" PRIu64 - ") ack_range_count=%zu"), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->largest_ack, - fr->ack_delay_unscaled / NGTCP2_MILLISECONDS, fr->ack_delay, - fr->rangecnt); + log->log_printf( + log->user_data, + (NGTCP2_LOG_PKT " ACK(0x%02" PRIx64 ") largest_ack=%" PRId64 + " ack_delay=%" PRIu64 "(%" PRIu64 ") ack_range_count=%zu"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->largest_ack, + fr->ack_delay_unscaled / NGTCP2_MILLISECONDS, fr->ack_delay, fr->rangecnt); largest_ack = fr->largest_ack; min_ack = fr->largest_ack - (int64_t)fr->first_ack_range; @@ -285,38 +284,37 @@ static void log_fr_reset_stream(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, const ngtcp2_reset_stream *fr, const char *dir) { log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " RESET_STREAM(0x%02" PRIx64 ") id=0x%" PRIx64 - " app_error_code=%s(0x%" PRIx64 ") final_size=%" PRIu64), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id, - strapperrorcode(fr->app_error_code), fr->app_error_code, fr->final_size); + log->user_data, + (NGTCP2_LOG_PKT " RESET_STREAM(0x%02" PRIx64 ") id=0x%" PRIx64 + " app_error_code=%s(0x%" PRIx64 ") final_size=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id, + strapperrorcode(fr->app_error_code), fr->app_error_code, fr->final_size); } static void log_fr_connection_close(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, const ngtcp2_connection_close *fr, const char *dir) { char reason[256]; - size_t reasonlen = ngtcp2_min(sizeof(reason) - 1, fr->reasonlen); + size_t reasonlen = ngtcp2_min_size(sizeof(reason) - 1, fr->reasonlen); - log->log_printf(log->user_data, - (NGTCP2_LOG_PKT " CONNECTION_CLOSE(0x%02" PRIx64 - ") error_code=%s(0x%" PRIx64 ") " - "frame_type=%" PRIx64 - " reason_len=%zu reason=[%s]"), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, - fr->type == NGTCP2_FRAME_CONNECTION_CLOSE - ? strerrorcode(fr->error_code) - : strapperrorcode(fr->error_code), - fr->error_code, fr->frame_type, fr->reasonlen, - ngtcp2_encode_printable_ascii(reason, fr->reason, reasonlen)); + log->log_printf( + log->user_data, + (NGTCP2_LOG_PKT " CONNECTION_CLOSE(0x%02" PRIx64 + ") error_code=%s(0x%" PRIx64 ") " + "frame_type=%" PRIx64 " reason_len=%zu reason=[%s]"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, + fr->type == NGTCP2_FRAME_CONNECTION_CLOSE ? strerrorcode(fr->error_code) + : strapperrorcode(fr->error_code), + fr->error_code, fr->frame_type, fr->reasonlen, + ngtcp2_encode_printable_ascii(reason, fr->reason, reasonlen)); } static void log_fr_max_data(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, const ngtcp2_max_data *fr, const char *dir) { log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " MAX_DATA(0x%02" PRIx64 ") max_data=%" PRIu64), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_data); + log->user_data, + (NGTCP2_LOG_PKT " MAX_DATA(0x%02" PRIx64 ") max_data=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_data); } static void log_fr_max_stream_data(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, @@ -333,9 +331,9 @@ static void log_fr_max_stream_data(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, static void log_fr_max_streams(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, const ngtcp2_max_streams *fr, const char *dir) { log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " MAX_STREAMS(0x%02" PRIx64 ") max_streams=%" PRIu64), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_streams); + log->user_data, + (NGTCP2_LOG_PKT " MAX_STREAMS(0x%02" PRIx64 ") max_streams=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_streams); } static void log_fr_ping(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, @@ -348,9 +346,9 @@ static void log_fr_data_blocked(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, const ngtcp2_data_blocked *fr, const char *dir) { log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " DATA_BLOCKED(0x%02" PRIx64 ") offset=%" PRIu64), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->offset); + log->user_data, + (NGTCP2_LOG_PKT " DATA_BLOCKED(0x%02" PRIx64 ") offset=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->offset); } static void log_fr_stream_data_blocked(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, @@ -367,9 +365,9 @@ static void log_fr_streams_blocked(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, const ngtcp2_streams_blocked *fr, const char *dir) { log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " STREAMS_BLOCKED(0x%02" PRIx64 ") max_streams=%" PRIu64), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_streams); + log->user_data, + (NGTCP2_LOG_PKT " STREAMS_BLOCKED(0x%02" PRIx64 ") max_streams=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_streams); } static void log_fr_new_connection_id(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, @@ -379,15 +377,15 @@ static void log_fr_new_connection_id(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, uint8_t cid[sizeof(fr->cid.data) * 2 + 1]; log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " NEW_CONNECTION_ID(0x%02" PRIx64 ") seq=%" PRIu64 - " cid=0x%s retire_prior_to=%" PRIu64 - " stateless_reset_token=0x%s"), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->seq, - (const char *)ngtcp2_encode_hex(cid, fr->cid.data, fr->cid.datalen), - fr->retire_prior_to, - (const char *)ngtcp2_encode_hex(buf, fr->stateless_reset_token, - sizeof(fr->stateless_reset_token))); + log->user_data, + (NGTCP2_LOG_PKT " NEW_CONNECTION_ID(0x%02" PRIx64 ") seq=%" PRIu64 + " cid=0x%s retire_prior_to=%" PRIu64 + " stateless_reset_token=0x%s"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->seq, + (const char *)ngtcp2_encode_hex(cid, fr->cid.data, fr->cid.datalen), + fr->retire_prior_to, + (const char *)ngtcp2_encode_hex(buf, fr->stateless_reset_token, + sizeof(fr->stateless_reset_token))); } static void log_fr_stop_sending(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, @@ -406,10 +404,10 @@ static void log_fr_path_challenge(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, uint8_t buf[sizeof(fr->data) * 2 + 1]; log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " PATH_CHALLENGE(0x%02" PRIx64 ") data=0x%s"), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, - (const char *)ngtcp2_encode_hex(buf, fr->data, sizeof(fr->data))); + log->user_data, + (NGTCP2_LOG_PKT " PATH_CHALLENGE(0x%02" PRIx64 ") data=0x%s"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, + (const char *)ngtcp2_encode_hex(buf, fr->data, sizeof(fr->data))); } static void log_fr_path_response(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, @@ -418,19 +416,19 @@ static void log_fr_path_response(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, uint8_t buf[sizeof(fr->data) * 2 + 1]; log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " PATH_RESPONSE(0x%02" PRIx64 ") data=0x%s"), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, - (const char *)ngtcp2_encode_hex(buf, fr->data, sizeof(fr->data))); + log->user_data, + (NGTCP2_LOG_PKT " PATH_RESPONSE(0x%02" PRIx64 ") data=0x%s"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, + (const char *)ngtcp2_encode_hex(buf, fr->data, sizeof(fr->data))); } static void log_fr_crypto(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, const ngtcp2_stream *fr, const char *dir) { - log->log_printf(log->user_data, - (NGTCP2_LOG_PKT " CRYPTO(0x%02" PRIx64 ") offset=%" PRIu64 - " len=%" PRIu64), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->offset, - ngtcp2_vec_len(fr->data, fr->datacnt)); + log->log_printf( + log->user_data, + (NGTCP2_LOG_PKT " CRYPTO(0x%02" PRIx64 ") offset=%" PRIu64 " len=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->offset, + ngtcp2_vec_len(fr->data, fr->datacnt)); } static void log_fr_new_token(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, @@ -448,9 +446,9 @@ static void log_fr_new_token(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, p = ngtcp2_encode_hex(buf, fr->token, fr->tokenlen); } log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " NEW_TOKEN(0x%02" PRIx64 ") token=0x%s len=%zu"), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, (const char *)p, fr->tokenlen); + log->user_data, + (NGTCP2_LOG_PKT " NEW_TOKEN(0x%02" PRIx64 ") token=0x%s len=%zu"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, (const char *)p, fr->tokenlen); } static void log_fr_retire_connection_id(ngtcp2_log *log, @@ -458,9 +456,9 @@ static void log_fr_retire_connection_id(ngtcp2_log *log, const ngtcp2_retire_connection_id *fr, const char *dir) { log->log_printf( - log->user_data, - (NGTCP2_LOG_PKT " RETIRE_CONNECTION_ID(0x%02" PRIx64 ") seq=%" PRIu64), - NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->seq); + log->user_data, + (NGTCP2_LOG_PKT " RETIRE_CONNECTION_ID(0x%02" PRIx64 ") seq=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->seq); } static void log_fr_handshake_done(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, @@ -601,11 +599,11 @@ void ngtcp2_log_rx_sr(ngtcp2_log *log, const ngtcp2_pkt_stateless_reset *sr) { shd.type = NGTCP2_PKT_STATELESS_RESET; log->log_printf( - log->user_data, (NGTCP2_LOG_PKT " token=0x%s randlen=%zu"), - NGTCP2_LOG_PKT_HD_FIELDS("rx"), - (const char *)ngtcp2_encode_hex(buf, sr->stateless_reset_token, - sizeof(sr->stateless_reset_token)), - sr->randlen); + log->user_data, (NGTCP2_LOG_PKT " token=0x%s randlen=%zu"), + NGTCP2_LOG_PKT_HD_FIELDS("rx"), + (const char *)ngtcp2_encode_hex(buf, sr->stateless_reset_token, + sizeof(sr->stateless_reset_token)), + sr->randlen); } void ngtcp2_log_remote_tp(ngtcp2_log *log, @@ -625,10 +623,10 @@ void ngtcp2_log_remote_tp(ngtcp2_log *log, if (params->stateless_reset_token_present) { log->log_printf( - log->user_data, (NGTCP2_LOG_TP " stateless_reset_token=0x%s"), - NGTCP2_LOG_TP_HD_FIELDS, - (const char *)ngtcp2_encode_hex(token, params->stateless_reset_token, - sizeof(params->stateless_reset_token))); + log->user_data, (NGTCP2_LOG_TP " stateless_reset_token=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex(token, params->stateless_reset_token, + sizeof(params->stateless_reset_token))); } if (params->preferred_addr_present) { @@ -639,7 +637,7 @@ void ngtcp2_log_remote_tp(ngtcp2_log *log, (NGTCP2_LOG_TP " preferred_address.ipv4_addr=%s"), NGTCP2_LOG_TP_HD_FIELDS, (const char *)ngtcp2_encode_ipv4( - addr, (const uint8_t *)&sa_in->sin_addr)); + addr, (const uint8_t *)&sa_in->sin_addr)); log->log_printf(log->user_data, (NGTCP2_LOG_TP " preferred_address.ipv4_port=%u"), NGTCP2_LOG_TP_HD_FIELDS, ngtcp2_ntohs(sa_in->sin_port)); @@ -652,59 +650,59 @@ void ngtcp2_log_remote_tp(ngtcp2_log *log, (NGTCP2_LOG_TP " preferred_address.ipv6_addr=%s"), NGTCP2_LOG_TP_HD_FIELDS, (const char *)ngtcp2_encode_ipv6( - addr, (const uint8_t *)&sa_in6->sin6_addr)); + addr, (const uint8_t *)&sa_in6->sin6_addr)); log->log_printf(log->user_data, (NGTCP2_LOG_TP " preferred_address.ipv6_port=%u"), NGTCP2_LOG_TP_HD_FIELDS, ngtcp2_ntohs(sa_in6->sin6_port)); } log->log_printf( - log->user_data, (NGTCP2_LOG_TP " preferred_address.cid=0x%s"), - NGTCP2_LOG_TP_HD_FIELDS, - (const char *)ngtcp2_encode_hex(cid, params->preferred_addr.cid.data, - params->preferred_addr.cid.datalen)); + log->user_data, (NGTCP2_LOG_TP " preferred_address.cid=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex(cid, params->preferred_addr.cid.data, + params->preferred_addr.cid.datalen)); log->log_printf( - log->user_data, - (NGTCP2_LOG_TP " preferred_address.stateless_reset_token=0x%s"), - NGTCP2_LOG_TP_HD_FIELDS, - (const char *)ngtcp2_encode_hex( - token, params->preferred_addr.stateless_reset_token, - sizeof(params->preferred_addr.stateless_reset_token))); + log->user_data, + (NGTCP2_LOG_TP " preferred_address.stateless_reset_token=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex( + token, params->preferred_addr.stateless_reset_token, + sizeof(params->preferred_addr.stateless_reset_token))); } if (params->original_dcid_present) { log->log_printf( - log->user_data, - (NGTCP2_LOG_TP " original_destination_connection_id=0x%s"), - NGTCP2_LOG_TP_HD_FIELDS, - (const char *)ngtcp2_encode_hex(cid, params->original_dcid.data, - params->original_dcid.datalen)); + log->user_data, + (NGTCP2_LOG_TP " original_destination_connection_id=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex(cid, params->original_dcid.data, + params->original_dcid.datalen)); } if (params->retry_scid_present) { log->log_printf( - log->user_data, (NGTCP2_LOG_TP " retry_source_connection_id=0x%s"), - NGTCP2_LOG_TP_HD_FIELDS, - (const char *)ngtcp2_encode_hex(cid, params->retry_scid.data, - params->retry_scid.datalen)); + log->user_data, (NGTCP2_LOG_TP " retry_source_connection_id=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex(cid, params->retry_scid.data, + params->retry_scid.datalen)); } if (params->initial_scid_present) { log->log_printf( - log->user_data, (NGTCP2_LOG_TP " initial_source_connection_id=0x%s"), - NGTCP2_LOG_TP_HD_FIELDS, - (const char *)ngtcp2_encode_hex(cid, params->initial_scid.data, - params->initial_scid.datalen)); + log->user_data, (NGTCP2_LOG_TP " initial_source_connection_id=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex(cid, params->initial_scid.data, + params->initial_scid.datalen)); } log->log_printf( - log->user_data, - (NGTCP2_LOG_TP " initial_max_stream_data_bidi_local=%" PRIu64), - NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_bidi_local); + log->user_data, + (NGTCP2_LOG_TP " initial_max_stream_data_bidi_local=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_bidi_local); log->log_printf( - log->user_data, - (NGTCP2_LOG_TP " initial_max_stream_data_bidi_remote=%" PRIu64), - NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_bidi_remote); + log->user_data, + (NGTCP2_LOG_TP " initial_max_stream_data_bidi_remote=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_bidi_remote); log->log_printf(log->user_data, (NGTCP2_LOG_TP " initial_max_stream_data_uni=%" PRIu64), NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_uni); @@ -742,21 +740,21 @@ void ngtcp2_log_remote_tp(ngtcp2_log *log, if (params->version_info_present) { log->log_printf( - log->user_data, - (NGTCP2_LOG_TP " version_information.chosen_version=0x%08x"), - NGTCP2_LOG_TP_HD_FIELDS, params->version_info.chosen_version); + log->user_data, + (NGTCP2_LOG_TP " version_information.chosen_version=0x%08x"), + NGTCP2_LOG_TP_HD_FIELDS, params->version_info.chosen_version); assert(!(params->version_info.available_versionslen & 0x3)); for (i = 0, p = params->version_info.available_versions; i < params->version_info.available_versionslen; i += sizeof(uint32_t)) { - p = ngtcp2_get_uint32(&version, p); + p = ngtcp2_get_uint32be(&version, p); log->log_printf( - log->user_data, - (NGTCP2_LOG_TP " version_information.available_versions[%zu]=0x%08x"), - NGTCP2_LOG_TP_HD_FIELDS, i >> 2, version); + log->user_data, + (NGTCP2_LOG_TP " version_information.available_versions[%zu]=0x%08x"), + NGTCP2_LOG_TP_HD_FIELDS, i >> 2, version); } } } @@ -783,18 +781,18 @@ static void log_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, if (hd->type == NGTCP2_PKT_1RTT) { ngtcp2_log_info( - log, NGTCP2_LOG_EVENT_PKT, "%s pkn=%" PRId64 " dcid=0x%s type=%s k=%d", - dir, hd->pkt_num, - (const char *)ngtcp2_encode_hex(dcid, hd->dcid.data, hd->dcid.datalen), - strpkttype(hd), (hd->flags & NGTCP2_PKT_FLAG_KEY_PHASE) != 0); + log, NGTCP2_LOG_EVENT_PKT, "%s pkn=%" PRId64 " dcid=0x%s type=%s k=%d", + dir, hd->pkt_num, + (const char *)ngtcp2_encode_hex(dcid, hd->dcid.data, hd->dcid.datalen), + strpkttype(hd), (hd->flags & NGTCP2_PKT_FLAG_KEY_PHASE) != 0); } else { ngtcp2_log_info( - log, NGTCP2_LOG_EVENT_PKT, - "%s pkn=%" PRId64 " dcid=0x%s scid=0x%s version=0x%08x type=%s len=%zu", - dir, hd->pkt_num, - (const char *)ngtcp2_encode_hex(dcid, hd->dcid.data, hd->dcid.datalen), - (const char *)ngtcp2_encode_hex(scid, hd->scid.data, hd->scid.datalen), - hd->version, strpkttype(hd), hd->len); + log, NGTCP2_LOG_EVENT_PKT, + "%s pkn=%" PRId64 " dcid=0x%s scid=0x%s version=0x%08x type=%s len=%zu", + dir, hd->pkt_num, + (const char *)ngtcp2_encode_hex(dcid, hd->dcid.data, hd->dcid.datalen), + (const char *)ngtcp2_encode_hex(scid, hd->scid.data, hd->scid.datalen), + hd->version, strpkttype(hd), hd->len); } } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_log.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_log.h index 1280ce04d6385a..13fb81a72e1d51 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_log.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_log.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -129,4 +129,4 @@ void ngtcp2_log_tx_cancel(ngtcp2_log *log, const ngtcp2_pkt_hd *hd); void ngtcp2_log_info(ngtcp2_log *log, ngtcp2_log_event ev, const char *fmt, ...); -#endif /* NGTCP2_LOG_H */ +#endif /* !defined(NGTCP2_LOG_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_macro.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_macro.h index 28d3461bef9238..dfe5e0aed220f8 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_macro.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_macro.h @@ -27,17 +27,14 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include #include -#define ngtcp2_min(A, B) ((A) < (B) ? (A) : (B)) -#define ngtcp2_max(A, B) ((A) > (B) ? (A) : (B)) - #define ngtcp2_struct_of(ptr, type, member) \ - ((type *)(void *)((char *)(ptr)-offsetof(type, member))) + ((type *)(void *)((char *)(ptr) - offsetof(type, member))) /* ngtcp2_list_insert inserts |T| before |*PD|. The contract is that this is singly linked list, and the next element is pointed by next @@ -55,4 +52,30 @@ */ #define ngtcp2_arraylen(A) (sizeof(A) / sizeof(A[0])) -#endif /* NGTCP2_MACRO_H */ +#define ngtcp2_max_def(SUFFIX, T) \ + static inline T ngtcp2_max_##SUFFIX(T a, T b) { return a < b ? b : a; } + +ngtcp2_max_def(int8, int8_t) +ngtcp2_max_def(int16, int16_t) +ngtcp2_max_def(int32, int32_t) +ngtcp2_max_def(int64, int64_t) +ngtcp2_max_def(uint8, uint8_t) +ngtcp2_max_def(uint16, uint16_t) +ngtcp2_max_def(uint32, uint32_t) +ngtcp2_max_def(uint64, uint64_t) +ngtcp2_max_def(size, size_t) + +#define ngtcp2_min_def(SUFFIX, T) \ + static inline T ngtcp2_min_##SUFFIX(T a, T b) { return a < b ? a : b; } + +ngtcp2_min_def(int8, int8_t) +ngtcp2_min_def(int16, int16_t) +ngtcp2_min_def(int32, int32_t) +ngtcp2_min_def(int64, int64_t) +ngtcp2_min_def(uint8, uint8_t) +ngtcp2_min_def(uint16, uint16_t) +ngtcp2_min_def(uint32, uint32_t) +ngtcp2_min_def(uint64, uint64_t) +ngtcp2_min_def(size, size_t) + +#endif /* !defined(NGTCP2_MACRO_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_map.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_map.c index 33e9fcc018b5db..9eb102f16b32e2 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_map.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_map.c @@ -35,8 +35,7 @@ void ngtcp2_map_init(ngtcp2_map *map, const ngtcp2_mem *mem) { map->mem = mem; - map->tablelen = 0; - map->tablelenbits = 0; + map->hashbits = 0; map->table = NULL; map->size = 0; } @@ -49,33 +48,20 @@ void ngtcp2_map_free(ngtcp2_map *map) { ngtcp2_mem_free(map->mem, map->table); } -void ngtcp2_map_each_free(ngtcp2_map *map, int (*func)(void *data, void *ptr), - void *ptr) { - uint32_t i; - ngtcp2_map_bucket *bkt; - - for (i = 0; i < map->tablelen; ++i) { - bkt = &map->table[i]; - - if (bkt->data == NULL) { - continue; - } - - func(bkt->data, ptr); - } -} - -int ngtcp2_map_each(ngtcp2_map *map, int (*func)(void *data, void *ptr), +int ngtcp2_map_each(const ngtcp2_map *map, int (*func)(void *data, void *ptr), void *ptr) { int rv; - uint32_t i; + size_t i; ngtcp2_map_bucket *bkt; + size_t tablelen; if (map->size == 0) { return 0; } - for (i = 0; i < map->tablelen; ++i) { + tablelen = 1u << map->hashbits; + + for (i = 0; i < tablelen; ++i) { bkt = &map->table[i]; if (bkt->data == NULL) { @@ -91,82 +77,61 @@ int ngtcp2_map_each(ngtcp2_map *map, int (*func)(void *data, void *ptr), return 0; } -static uint32_t hash(ngtcp2_map_key_type key) { - return (uint32_t)((key * 11400714819323198485llu) >> 32); -} - -static size_t h2idx(uint32_t hash, uint32_t bits) { - return hash >> (32 - bits); -} - -static size_t distance(uint32_t tablelen, uint32_t tablelenbits, - ngtcp2_map_bucket *bkt, size_t idx) { - return (idx - h2idx(bkt->hash, tablelenbits)) & (tablelen - 1); +static size_t hash(ngtcp2_map_key_type key, size_t bits) { + return (size_t)((key * 11400714819323198485llu) >> (64 - bits)); } -static void map_bucket_swap(ngtcp2_map_bucket *bkt, uint32_t *phash, - ngtcp2_map_key_type *pkey, void **pdata) { - uint32_t h = bkt->hash; - ngtcp2_map_key_type key = bkt->key; - void *data = bkt->data; - - bkt->hash = *phash; - bkt->key = *pkey; - bkt->data = *pdata; +static void map_bucket_swap(ngtcp2_map_bucket *a, ngtcp2_map_bucket *b) { + ngtcp2_map_bucket c = *a; - *phash = h; - *pkey = key; - *pdata = data; -} - -static void map_bucket_set_data(ngtcp2_map_bucket *bkt, uint32_t hash, - ngtcp2_map_key_type key, void *data) { - bkt->hash = hash; - bkt->key = key; - bkt->data = data; + *a = *b; + *b = c; } #ifndef WIN32 -void ngtcp2_map_print_distance(ngtcp2_map *map) { - uint32_t i; +void ngtcp2_map_print_distance(const ngtcp2_map *map) { + size_t i; size_t idx; ngtcp2_map_bucket *bkt; + size_t tablelen; - for (i = 0; i < map->tablelen; ++i) { + if (map->size == 0) { + return; + } + + tablelen = 1u << map->hashbits; + + for (i = 0; i < tablelen; ++i) { bkt = &map->table[i]; if (bkt->data == NULL) { - fprintf(stderr, "@%u \n", i); + fprintf(stderr, "@%zu \n", i); continue; } - idx = h2idx(bkt->hash, map->tablelenbits); - fprintf(stderr, "@%u hash=%08x key=%" PRIu64 " base=%zu distance=%zu\n", i, - bkt->hash, bkt->key, idx, - distance(map->tablelen, map->tablelenbits, bkt, idx)); + idx = hash(bkt->key, map->hashbits); + fprintf(stderr, "@%zu hash=%zu key=%" PRIu64 " base=%zu distance=%u\n", i, + hash(bkt->key, map->hashbits), bkt->key, idx, bkt->psl); } } -#endif /* !WIN32 */ +#endif /* !defined(WIN32) */ -static int insert(ngtcp2_map_bucket *table, uint32_t tablelen, - uint32_t tablelenbits, uint32_t hash, ngtcp2_map_key_type key, - void *data) { - size_t idx = h2idx(hash, tablelenbits); - size_t d = 0, dd; - ngtcp2_map_bucket *bkt; +static int insert(ngtcp2_map_bucket *table, size_t hashbits, + ngtcp2_map_key_type key, void *data) { + size_t idx = hash(key, hashbits); + ngtcp2_map_bucket b = {0, key, data}, *bkt; + size_t mask = (1u << hashbits) - 1; for (;;) { bkt = &table[idx]; if (bkt->data == NULL) { - map_bucket_set_data(bkt, hash, key, data); + *bkt = b; return 0; } - dd = distance(tablelen, tablelenbits, bkt, idx); - if (d > dd) { - map_bucket_swap(bkt, &hash, &key, &data); - d = dd; + if (b.psl > bkt->psl) { + map_bucket_swap(bkt, &b); } else if (bkt->key == key) { /* TODO This check is just a waste after first swap or if this function is called from map_resize. That said, there is no @@ -175,41 +140,42 @@ static int insert(ngtcp2_map_bucket *table, uint32_t tablelen, return NGTCP2_ERR_INVALID_ARGUMENT; } - ++d; - idx = (idx + 1) & (tablelen - 1); + ++b.psl; + idx = (idx + 1) & mask; } } -/* new_tablelen must be power of 2 and new_tablelen == (1 << - new_tablelenbits) must hold. */ -static int map_resize(ngtcp2_map *map, uint32_t new_tablelen, - uint32_t new_tablelenbits) { - uint32_t i; +static int map_resize(ngtcp2_map *map, size_t new_hashbits) { + size_t i; ngtcp2_map_bucket *new_table; ngtcp2_map_bucket *bkt; + size_t tablelen; int rv; (void)rv; new_table = - ngtcp2_mem_calloc(map->mem, new_tablelen, sizeof(ngtcp2_map_bucket)); + ngtcp2_mem_calloc(map->mem, 1u << new_hashbits, sizeof(ngtcp2_map_bucket)); if (new_table == NULL) { return NGTCP2_ERR_NOMEM; } - for (i = 0; i < map->tablelen; ++i) { - bkt = &map->table[i]; - if (bkt->data == NULL) { - continue; - } - rv = insert(new_table, new_tablelen, new_tablelenbits, bkt->hash, bkt->key, - bkt->data); + if (map->size) { + tablelen = 1u << map->hashbits; - assert(0 == rv); + for (i = 0; i < tablelen; ++i) { + bkt = &map->table[i]; + if (bkt->data == NULL) { + continue; + } + + rv = insert(new_table, new_hashbits, bkt->key, bkt->data); + + assert(0 == rv); + } } ngtcp2_mem_free(map->mem, map->table); - map->tablelen = new_tablelen; - map->tablelenbits = new_tablelenbits; + map->hashbits = new_hashbits; map->table = new_table; return 0; @@ -221,48 +187,49 @@ int ngtcp2_map_insert(ngtcp2_map *map, ngtcp2_map_key_type key, void *data) { assert(data); /* Load factor is 0.75 */ - if ((map->size + 1) * 4 > map->tablelen * 3) { - if (map->tablelen) { - rv = map_resize(map, map->tablelen * 2, map->tablelenbits + 1); + /* Under the very initial condition, that is map->size == 0 and + map->hashbits == 0, 4 > 3 still holds nicely. */ + if ((map->size + 1) * 4 > (1u << map->hashbits) * 3) { + if (map->hashbits) { + rv = map_resize(map, map->hashbits + 1); if (rv != 0) { return rv; } } else { - rv = map_resize(map, 1 << NGTCP2_INITIAL_TABLE_LENBITS, - NGTCP2_INITIAL_TABLE_LENBITS); + rv = map_resize(map, NGTCP2_INITIAL_TABLE_LENBITS); if (rv != 0) { return rv; } } } - rv = insert(map->table, map->tablelen, map->tablelenbits, hash(key), key, - data); + rv = insert(map->table, map->hashbits, key, data); if (rv != 0) { return rv; } + ++map->size; + return 0; } -void *ngtcp2_map_find(ngtcp2_map *map, ngtcp2_map_key_type key) { - uint32_t h; +void *ngtcp2_map_find(const ngtcp2_map *map, ngtcp2_map_key_type key) { size_t idx; ngtcp2_map_bucket *bkt; - size_t d = 0; + size_t psl = 0; + size_t mask; if (map->size == 0) { return NULL; } - h = hash(key); - idx = h2idx(h, map->tablelenbits); + idx = hash(key, map->hashbits); + mask = (1u << map->hashbits) - 1; for (;;) { bkt = &map->table[idx]; - if (bkt->data == NULL || - d > distance(map->tablelen, map->tablelenbits, bkt, idx)) { + if (bkt->data == NULL || psl > bkt->psl) { return NULL; } @@ -270,50 +237,47 @@ void *ngtcp2_map_find(ngtcp2_map *map, ngtcp2_map_key_type key) { return bkt->data; } - ++d; - idx = (idx + 1) & (map->tablelen - 1); + ++psl; + idx = (idx + 1) & mask; } } int ngtcp2_map_remove(ngtcp2_map *map, ngtcp2_map_key_type key) { - uint32_t h; - size_t idx, didx; - ngtcp2_map_bucket *bkt; - size_t d = 0; + size_t idx; + ngtcp2_map_bucket *b, *bkt; + size_t psl = 0; + size_t mask; if (map->size == 0) { return NGTCP2_ERR_INVALID_ARGUMENT; } - h = hash(key); - idx = h2idx(h, map->tablelenbits); + idx = hash(key, map->hashbits); + mask = (1u << map->hashbits) - 1; for (;;) { bkt = &map->table[idx]; - if (bkt->data == NULL || - d > distance(map->tablelen, map->tablelenbits, bkt, idx)) { + if (bkt->data == NULL || psl > bkt->psl) { return NGTCP2_ERR_INVALID_ARGUMENT; } if (bkt->key == key) { - map_bucket_set_data(bkt, 0, 0, NULL); - - didx = idx; - idx = (idx + 1) & (map->tablelen - 1); + b = bkt; + idx = (idx + 1) & mask; for (;;) { bkt = &map->table[idx]; - if (bkt->data == NULL || - distance(map->tablelen, map->tablelenbits, bkt, idx) == 0) { + if (bkt->data == NULL || bkt->psl == 0) { + b->data = NULL; break; } - map->table[didx] = *bkt; - map_bucket_set_data(bkt, 0, 0, NULL); - didx = idx; + --bkt->psl; + *b = *bkt; + b = bkt; - idx = (idx + 1) & (map->tablelen - 1); + idx = (idx + 1) & mask; } --map->size; @@ -321,18 +285,18 @@ int ngtcp2_map_remove(ngtcp2_map *map, ngtcp2_map_key_type key) { return 0; } - ++d; - idx = (idx + 1) & (map->tablelen - 1); + ++psl; + idx = (idx + 1) & mask; } } void ngtcp2_map_clear(ngtcp2_map *map) { - if (map->tablelen == 0) { + if (map->size == 0) { return; } - memset(map->table, 0, sizeof(*map->table) * map->tablelen); + memset(map->table, 0, sizeof(*map->table) * (1u << map->hashbits)); map->size = 0; } -size_t ngtcp2_map_size(ngtcp2_map *map) { return map->size; } +size_t ngtcp2_map_size(const ngtcp2_map *map) { return map->size; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_map.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_map.h index d05b1657489e45..9d882fb20088d8 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_map.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_map.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -39,7 +39,7 @@ typedef uint64_t ngtcp2_map_key_type; typedef struct ngtcp2_map_bucket { - uint32_t hash; + uint32_t psl; ngtcp2_map_key_type key; void *data; } ngtcp2_map_bucket; @@ -48,33 +48,24 @@ typedef struct ngtcp2_map { ngtcp2_map_bucket *table; const ngtcp2_mem *mem; size_t size; - uint32_t tablelen; - uint32_t tablelenbits; + size_t hashbits; } ngtcp2_map; /* - * Initializes the map |map|. + * ngtcp2_map_init initializes the map |map|. */ void ngtcp2_map_init(ngtcp2_map *map, const ngtcp2_mem *mem); /* - * Deallocates any resources allocated for |map|. The stored entries - * are not freed by this function. Use ngtcp2_map_each_free() to free - * each entries. + * ngtcp2_map_free deallocates any resources allocated for |map|. The + * stored entries are not freed by this function. Use + * ngtcp2_map_each() to free each entry. */ void ngtcp2_map_free(ngtcp2_map *map); /* - * Deallocates each entries using |func| function and any resources - * allocated for |map|. The |func| function is responsible for freeing - * given the |data| object. The |ptr| will be passed to the |func| as - * send argument. The return value of the |func| will be ignored. - */ -void ngtcp2_map_each_free(ngtcp2_map *map, int (*func)(void *data, void *ptr), - void *ptr); - -/* - * Inserts the new |data| with the |key| to the map |map|. + * ngtcp2_map_insert inserts the new |data| with the |key| to the map + * |map|. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -82,57 +73,56 @@ void ngtcp2_map_each_free(ngtcp2_map *map, int (*func)(void *data, void *ptr), * NGTCP2_ERR_INVALID_ARGUMENT * The item associated by |key| already exists. * NGTCP2_ERR_NOMEM - * Out of memory + * Out of memory */ int ngtcp2_map_insert(ngtcp2_map *map, ngtcp2_map_key_type key, void *data); /* - * Returns the data associated by the key |key|. If there is no such - * data, this function returns NULL. + * ngtcp2_map_find returns the entry associated by the key |key|. If + * there is no such entry, this function returns NULL. */ -void *ngtcp2_map_find(ngtcp2_map *map, ngtcp2_map_key_type key); +void *ngtcp2_map_find(const ngtcp2_map *map, ngtcp2_map_key_type key); /* - * Removes the data associated by the key |key| from the |map|. The - * removed data is not freed by this function. + * ngtcp2_map_remove removes the entry associated by the key |key| + * from the |map|. The removed entry is not freed by this function. * * This function returns 0 if it succeeds, or one of the following * negative error codes: * * NGTCP2_ERR_INVALID_ARGUMENT - * The data associated by |key| does not exist. + * The entry associated by |key| does not exist. */ int ngtcp2_map_remove(ngtcp2_map *map, ngtcp2_map_key_type key); /* - * Removes all entries from |map|. + * ngtcp2_map_clear removes all entries from |map|. The removed entry + * is not freed by this function. */ void ngtcp2_map_clear(ngtcp2_map *map); /* - * Returns the number of items stored in the map |map|. + * ngtcp2_map_size returns the number of items stored in the map + * |map|. */ -size_t ngtcp2_map_size(ngtcp2_map *map); +size_t ngtcp2_map_size(const ngtcp2_map *map); /* - * Applies the function |func| to each data in the |map| with the - * optional user supplied pointer |ptr|. + * ngtcp2_map_each applies the function |func| to each entry in the + * |map| with the optional user supplied pointer |ptr|. * * If the |func| returns 0, this function calls the |func| with the - * next data. If the |func| returns nonzero, it will not call the + * next entry. If the |func| returns nonzero, it will not call the * |func| for further entries and return the return value of the * |func| immediately. Thus, this function returns 0 if all the * invocations of the |func| return 0, or nonzero value which the last * invocation of |func| returns. - * - * Don't use this function to free each data. Use - * ngtcp2_map_each_free() instead. */ -int ngtcp2_map_each(ngtcp2_map *map, int (*func)(void *data, void *ptr), +int ngtcp2_map_each(const ngtcp2_map *map, int (*func)(void *data, void *ptr), void *ptr); #ifndef WIN32 -void ngtcp2_map_print_distance(ngtcp2_map *map); -#endif /* !WIN32 */ +void ngtcp2_map_print_distance(const ngtcp2_map *map); +#endif /* !defined(WIN32) */ -#endif /* NGTCP2_MAP_H */ +#endif /* !defined(NGTCP2_MAP_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_mem.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_mem.c index bcce0b5cdfcf02..d30e1f986e97d3 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_mem.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_mem.c @@ -72,7 +72,7 @@ void *ngtcp2_mem_calloc(const ngtcp2_mem *mem, size_t nmemb, size_t size) { void *ngtcp2_mem_realloc(const ngtcp2_mem *mem, void *ptr, size_t size) { return mem->realloc(ptr, size, mem->user_data); } -#else /* MEMDEBUG */ +#else /* defined(MEMDEBUG) */ void *ngtcp2_mem_malloc_debug(const ngtcp2_mem *mem, size_t size, const char *func, const char *file, size_t line) { void *nptr = mem->malloc(size, mem->user_data); @@ -110,4 +110,4 @@ void *ngtcp2_mem_realloc_debug(const ngtcp2_mem *mem, void *ptr, size_t size, return nptr; } -#endif /* MEMDEBUG */ +#endif /* defined(MEMDEBUG) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_mem.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_mem.h index c99b6c59726891..9f818752125523 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_mem.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_mem.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -42,7 +42,7 @@ void ngtcp2_mem_free(const ngtcp2_mem *mem, void *ptr); void *ngtcp2_mem_calloc(const ngtcp2_mem *mem, size_t nmemb, size_t size); void *ngtcp2_mem_realloc(const ngtcp2_mem *mem, void *ptr, size_t size); -#else /* MEMDEBUG */ +#else /* defined(MEMDEBUG) */ void *ngtcp2_mem_malloc_debug(const ngtcp2_mem *mem, size_t size, const char *func, const char *file, size_t line); @@ -67,6 +67,6 @@ void *ngtcp2_mem_realloc_debug(const ngtcp2_mem *mem, void *ptr, size_t size, # define ngtcp2_mem_realloc(MEM, PTR, SIZE) \ ngtcp2_mem_realloc_debug((MEM), (PTR), (SIZE), __func__, __FILE__, __LINE__) -#endif /* MEMDEBUG */ +#endif /* defined(MEMDEBUG) */ -#endif /* NGTCP2_MEM_H */ +#endif /* !defined(NGTCP2_MEM_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_net.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_net.h index 4a2c4041d4d170..103a2fb2d80714 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_net.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_net.h @@ -30,70 +30,68 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #ifdef HAVE_ARPA_INET_H # include -#endif /* HAVE_ARPA_INET_H */ +#endif /* defined(HAVE_ARPA_INET_H) */ #ifdef HAVE_NETINET_IN_H # include -#endif /* HAVE_NETINET_IN_H */ +#endif /* defined(HAVE_NETINET_IN_H) */ #ifdef HAVE_BYTESWAP_H # include -#endif /* HAVE_BYTESWAP_H */ +#endif /* defined(HAVE_BYTESWAP_H) */ #ifdef HAVE_ENDIAN_H # include -#endif /* HAVE_ENDIAN_H */ +#endif /* defined(HAVE_ENDIAN_H) */ #ifdef HAVE_SYS_ENDIAN_H # include -#endif /* HAVE_SYS_ENDIAN_H */ +#endif /* defined(HAVE_SYS_ENDIAN_H) */ -#if defined(__APPLE__) +#ifdef __APPLE__ # include -#endif // __APPLE__ +#endif /* defined(__APPLE__) */ #include -#if defined(HAVE_BE64TOH) || \ - (defined(HAVE_DECL_BE64TOH) && HAVE_DECL_BE64TOH > 0) +#if HAVE_DECL_BE64TOH # define ngtcp2_ntohl64(N) be64toh(N) # define ngtcp2_htonl64(N) htobe64(N) -#else /* !HAVE_BE64TOH */ -# if defined(WORDS_BIGENDIAN) +#else /* !HAVE_DECL_BE64TOH */ +# ifdef WORDS_BIGENDIAN # define ngtcp2_ntohl64(N) (N) # define ngtcp2_htonl64(N) (N) -# else /* !WORDS_BIGENDIAN */ -# if defined(HAVE_BSWAP_64) || \ - (defined(HAVE_DECL_BSWAP_64) && HAVE_DECL_BSWAP_64 > 0) +# else /* !defined(WORDS_BIGENDIAN) */ +# if HAVE_DECL_BSWAP_64 # define ngtcp2_bswap64 bswap_64 # elif defined(WIN32) # define ngtcp2_bswap64 _byteswap_uint64 # elif defined(__APPLE__) # define ngtcp2_bswap64 OSSwapInt64 -# else /* !HAVE_BSWAP_64 && !WIN32 && !__APPLE__ */ +# else /* !(HAVE_DECL_BSWAP_64 || defined(WIN32) || defined(__APPLE__)) */ # define ngtcp2_bswap64(N) \ ((uint64_t)(ngtcp2_ntohl((uint32_t)(N))) << 32 | \ ngtcp2_ntohl((uint32_t)((N) >> 32))) -# endif /* !HAVE_BSWAP_64 && !WIN32 && !__APPLE__ */ +# endif /* !(HAVE_DECL_BSWAP_64 || defined(WIN32) || defined(__APPLE__)) */ # define ngtcp2_ntohl64(N) ngtcp2_bswap64(N) # define ngtcp2_htonl64(N) ngtcp2_bswap64(N) -# endif /* !WORDS_BIGENDIAN */ -#endif /* !HAVE_BE64TOH */ +# endif /* !defined(WORDS_BIGENDIAN) */ +#endif /* !HAVE_DECL_BE64TOH */ -#if defined(WIN32) +#ifdef WIN32 /* Windows requires ws2_32 library for ntonl family functions. We define inline functions for those function so that we don't have dependency on that lib. */ # ifdef _MSC_VER # define STIN static __inline -# else +# else /* !defined(_MSC_VER) */ # define STIN static inline -# endif +# endif /* !defined(_MSC_VER) */ STIN uint32_t ngtcp2_htonl(uint32_t hostlong) { uint32_t res; @@ -131,13 +129,13 @@ STIN uint16_t ngtcp2_ntohs(uint16_t netshort) { return res; } -#else /* !WIN32 */ +#else /* !defined(WIN32) */ # define ngtcp2_htonl htonl # define ngtcp2_htons htons # define ngtcp2_ntohl ntohl # define ngtcp2_ntohs ntohs -#endif /* !WIN32 */ +#endif /* !defined(WIN32) */ -#endif /* NGTCP2_NET_H */ +#endif /* !defined(NGTCP2_NET_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_objalloc.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_objalloc.h index ea73e788317681..cf23de7b2b7f20 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_objalloc.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_objalloc.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -67,9 +67,9 @@ void ngtcp2_objalloc_clear(ngtcp2_objalloc *objalloc); #ifndef NOMEMPOOL # define ngtcp2_objalloc_decl(NAME, TYPE, OPLENTFIELD) \ inline static void ngtcp2_objalloc_##NAME##_init( \ - ngtcp2_objalloc *objalloc, size_t nmemb, const ngtcp2_mem *mem) { \ + ngtcp2_objalloc *objalloc, size_t nmemb, const ngtcp2_mem *mem) { \ ngtcp2_objalloc_init( \ - objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ + objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ } \ \ TYPE *ngtcp2_objalloc_##NAME##_get(ngtcp2_objalloc *objalloc); \ @@ -78,7 +78,7 @@ void ngtcp2_objalloc_clear(ngtcp2_objalloc *objalloc); size_t len); \ \ inline static void ngtcp2_objalloc_##NAME##_release( \ - ngtcp2_objalloc *objalloc, TYPE *obj) { \ + ngtcp2_objalloc *objalloc, TYPE *obj) { \ ngtcp2_opl_push(&objalloc->opl, &obj->OPLENTFIELD); \ } @@ -90,7 +90,7 @@ void ngtcp2_objalloc_clear(ngtcp2_objalloc *objalloc); \ if (!oplent) { \ rv = \ - ngtcp2_balloc_get(&objalloc->balloc, (void **)&obj, sizeof(TYPE)); \ + ngtcp2_balloc_get(&objalloc->balloc, (void **)&obj, sizeof(TYPE)); \ if (rv != 0) { \ return NULL; \ } \ @@ -118,30 +118,30 @@ void ngtcp2_objalloc_clear(ngtcp2_objalloc *objalloc); \ return ngtcp2_struct_of(oplent, TYPE, OPLENTFIELD); \ } -#else /* NOMEMPOOL */ +#else /* defined(NOMEMPOOL) */ # define ngtcp2_objalloc_decl(NAME, TYPE, OPLENTFIELD) \ inline static void ngtcp2_objalloc_##NAME##_init( \ - ngtcp2_objalloc *objalloc, size_t nmemb, const ngtcp2_mem *mem) { \ + ngtcp2_objalloc *objalloc, size_t nmemb, const ngtcp2_mem *mem) { \ ngtcp2_objalloc_init( \ - objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ + objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ } \ \ inline static TYPE *ngtcp2_objalloc_##NAME##_get( \ - ngtcp2_objalloc *objalloc) { \ + ngtcp2_objalloc *objalloc) { \ return ngtcp2_mem_malloc(objalloc->balloc.mem, sizeof(TYPE)); \ } \ \ inline static TYPE *ngtcp2_objalloc_##NAME##_len_get( \ - ngtcp2_objalloc *objalloc, size_t len) { \ + ngtcp2_objalloc *objalloc, size_t len) { \ return ngtcp2_mem_malloc(objalloc->balloc.mem, len); \ } \ \ inline static void ngtcp2_objalloc_##NAME##_release( \ - ngtcp2_objalloc *objalloc, TYPE *obj) { \ + ngtcp2_objalloc *objalloc, TYPE *obj) { \ ngtcp2_mem_free(objalloc->balloc.mem, obj); \ } # define ngtcp2_objalloc_def(NAME, TYPE, OPLENTFIELD) -#endif /* NOMEMPOOL */ +#endif /* defined(NOMEMPOOL) */ -#endif /* NGTCP2_OBJALLOC_H */ +#endif /* !defined(NGTCP2_OBJALLOC_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_opl.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_opl.h index 714aa366304f0d..f2df3f6dccd45e 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_opl.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_opl.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -62,4 +62,4 @@ ngtcp2_opl_entry *ngtcp2_opl_pop(ngtcp2_opl *opl); void ngtcp2_opl_clear(ngtcp2_opl *opl); -#endif /* NGTCP2_OPL_H */ +#endif /* !defined(NGTCP2_OPL_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_path.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_path.h index 0c360e936231c8..a708378db32fbb 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_path.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_path.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -46,4 +46,4 @@ void ngtcp2_path_init(ngtcp2_path *path, const ngtcp2_addr *local, void ngtcp2_path_storage_init2(ngtcp2_path_storage *ps, const ngtcp2_path *path); -#endif /* NGTCP2_PATH_H */ +#endif /* !defined(NGTCP2_PATH_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pkt.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pkt.c index 1687ff254d94c7..5c82e1bd503f98 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pkt.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pkt.c @@ -83,16 +83,19 @@ int ngtcp2_pkt_decode_version_cid(ngtcp2_version_cid *dest, const uint8_t *data, dcidlen = data[5]; len += dcidlen; + if (datalen < len) { return NGTCP2_ERR_INVALID_ARGUMENT; } + scidlen = data[5 + 1 + dcidlen]; len += scidlen; + if (datalen < len) { return NGTCP2_ERR_INVALID_ARGUMENT; } - ngtcp2_get_uint32(&version, &data[1]); + ngtcp2_get_uint32be(&version, &data[1]); supported_version = ngtcp2_is_supported_version(version); @@ -120,6 +123,7 @@ int ngtcp2_pkt_decode_version_cid(ngtcp2_version_cid *dest, const uint8_t *data, if (!supported_version) { return NGTCP2_ERR_VERSION_NEGOTIATION; } + return 0; } @@ -145,16 +149,19 @@ void ngtcp2_pkt_hd_init(ngtcp2_pkt_hd *hd, uint8_t flags, uint8_t type, size_t len) { hd->flags = flags; hd->type = type; + if (dcid) { hd->dcid = *dcid; } else { ngtcp2_cid_zero(&hd->dcid); } + if (scid) { hd->scid = *scid; } else { ngtcp2_cid_zero(&hd->scid); } + hd->pkt_num = pkt_num; hd->token = NULL; hd->tokenlen = 0; @@ -170,10 +177,11 @@ ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, const uint8_t *pkt, size_t dcil, scil; const uint8_t *p; size_t len = 0; - size_t n; size_t ntokenlen = 0; const uint8_t *token = NULL; size_t tokenlen = 0; + size_t nlonglen = 0; + size_t longlen = 0; uint64_t vi; uint8_t flags = NGTCP2_PKT_FLAG_LONG_FORM; @@ -185,7 +193,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, const uint8_t *pkt, return NGTCP2_ERR_INVALID_ARGUMENT; } - ngtcp2_get_uint32(&version, &pkt[1]); + ngtcp2_get_uint32be(&version, &pkt[1]); if (version == 0) { type = NGTCP2_PKT_VERSION_NEGOTIATION; @@ -227,6 +235,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, const uint8_t *pkt, p = &pkt[5]; dcil = *p; + if (dcil > NGTCP2_MAX_CIDLEN) { /* QUIC v1 implementation never expect to receive CID length more than NGTCP2_MAX_CIDLEN. */ @@ -240,9 +249,11 @@ ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, const uint8_t *pkt, p += 1 + dcil; scil = *p; + if (scil > NGTCP2_MAX_CIDLEN) { return NGTCP2_ERR_INVALID_ARGUMENT; } + len += scil; if (pktlen < len) { @@ -264,6 +275,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, const uint8_t *pkt, if (pktlen - len < vi) { return NGTCP2_ERR_INVALID_ARGUMENT; } + tokenlen = (size_t)vi; len += tokenlen; @@ -285,12 +297,21 @@ ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, const uint8_t *pkt, } /* Length */ - n = ngtcp2_get_uvarintlen(p); - len += n - 1; + nlonglen = ngtcp2_get_uvarintlen(p); + len += nlonglen - 1; if (pktlen < len) { return NGTCP2_ERR_INVALID_ARGUMENT; } + + ngtcp2_get_uvarint(&vi, p); +#if SIZE_MAX > UINT32_MAX + if (vi > SIZE_MAX) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } +#endif /* SIZE_MAX > UINT32_MAX */ + + longlen = (size_t)vi; } dest->flags = flags; @@ -309,24 +330,11 @@ ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, const uint8_t *pkt, dest->tokenlen = tokenlen; p += ntokenlen + tokenlen; - switch (type) { - case NGTCP2_PKT_RETRY: - dest->len = 0; - break; - default: - if (!(flags & NGTCP2_PKT_FLAG_LONG_FORM)) { - assert(type == NGTCP2_PKT_VERSION_NEGOTIATION); - /* Version Negotiation is not a long header packet. */ - dest->len = 0; - break; - } + dest->len = longlen; - p = ngtcp2_get_uvarint(&vi, p); - if (vi > SIZE_MAX) { - return NGTCP2_ERR_INVALID_ARGUMENT; - } - dest->len = (size_t)vi; - } +#ifndef NDEBUG + p += nlonglen; +#endif /* !defined(NDEBUG) */ assert((size_t)(p - pkt) == len); @@ -401,6 +409,7 @@ ngtcp2_ssize ngtcp2_pkt_encode_hd_long(uint8_t *out, size_t outlen, *p = (uint8_t)(NGTCP2_HEADER_FORM_BIT | (ngtcp2_pkt_versioned_type(hd->version, hd->type) << 4) | (uint8_t)(hd->pkt_numlen - 1)); + if (!(hd->flags & NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR)) { *p |= NGTCP2_FIXED_BIT_MASK; } @@ -409,16 +418,20 @@ ngtcp2_ssize ngtcp2_pkt_encode_hd_long(uint8_t *out, size_t outlen, p = ngtcp2_put_uint32be(p, hd->version); *p++ = (uint8_t)hd->dcid.datalen; + if (hd->dcid.datalen) { p = ngtcp2_cpymem(p, hd->dcid.data, hd->dcid.datalen); } + *p++ = (uint8_t)hd->scid.datalen; + if (hd->scid.datalen) { p = ngtcp2_cpymem(p, hd->scid.data, hd->scid.datalen); } if (hd->type == NGTCP2_PKT_INITIAL) { p = ngtcp2_put_uvarint(p, hd->tokenlen); + if (hd->tokenlen) { p = ngtcp2_cpymem(p, hd->token, hd->tokenlen); } @@ -446,9 +459,11 @@ ngtcp2_ssize ngtcp2_pkt_encode_hd_short(uint8_t *out, size_t outlen, p = out; *p = (uint8_t)(hd->pkt_numlen - 1); + if (!(hd->flags & NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR)) { *p |= NGTCP2_FIXED_BIT_MASK; } + if (hd->flags & NGTCP2_PKT_FLAG_KEY_PHASE) { *p |= NGTCP2_SHORT_KEY_PHASE_BIT; } @@ -503,7 +518,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_frame(ngtcp2_frame *dest, const uint8_t *payload, payloadlen); case NGTCP2_FRAME_STREAM_DATA_BLOCKED: return ngtcp2_pkt_decode_stream_data_blocked_frame( - &dest->stream_data_blocked, payload, payloadlen); + &dest->stream_data_blocked, payload, payloadlen); case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI: case NGTCP2_FRAME_STREAMS_BLOCKED_UNI: return ngtcp2_pkt_decode_streams_blocked_frame(&dest->streams_blocked, @@ -530,7 +545,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_frame(ngtcp2_frame *dest, const uint8_t *payload, payloadlen); case NGTCP2_FRAME_RETIRE_CONNECTION_ID: return ngtcp2_pkt_decode_retire_connection_id_frame( - &dest->retire_connection_id, payload, payloadlen); + &dest->retire_connection_id, payload, payloadlen); case NGTCP2_FRAME_HANDSHAKE_DONE: return ngtcp2_pkt_decode_handshake_done_frame(&dest->handshake_done, payload, payloadlen); @@ -613,6 +628,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_stream_frame(ngtcp2_stream *dest, if (payloadlen - len < vi) { return NGTCP2_ERR_FRAME_ENCODING; } + datalen = (size_t)vi; len += datalen; } else { @@ -752,7 +768,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_ack_frame(ngtcp2_ack *dest, } /* TODO We might not decode all ranges. It could be very large. */ - max_rangecnt = ngtcp2_min(NGTCP2_MAX_ACK_RANGES, rangecnt); + max_rangecnt = ngtcp2_min_size(NGTCP2_MAX_ACK_RANGES, rangecnt); p = payload + 1; @@ -770,7 +786,8 @@ ngtcp2_ssize ngtcp2_pkt_decode_ack_frame(ngtcp2_ack *dest, p = ngtcp2_get_uvarint(&range->gap, p); p = ngtcp2_get_uvarint(&range->len, p); } - for (i = max_rangecnt; i < rangecnt; ++i) { + + for (; i < rangecnt; ++i) { p += ngtcp2_get_uvarintlen(p); p += ngtcp2_get_uvarintlen(p); } @@ -820,18 +837,23 @@ ngtcp2_ssize ngtcp2_pkt_decode_reset_stream_frame(ngtcp2_reset_stream *dest, n = ngtcp2_get_uvarintlen(p); len += n - 1; + if (payloadlen < len) { return NGTCP2_ERR_FRAME_ENCODING; } + p += n; n = ngtcp2_get_uvarintlen(p); len += n - 1; + if (payloadlen < len) { return NGTCP2_ERR_FRAME_ENCODING; } + p += n; n = ngtcp2_get_uvarintlen(p); len += n - 1; + if (payloadlen < len) { return NGTCP2_ERR_FRAME_ENCODING; } @@ -849,7 +871,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_reset_stream_frame(ngtcp2_reset_stream *dest, } ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame( - ngtcp2_connection_close *dest, const uint8_t *payload, size_t payloadlen) { + ngtcp2_connection_close *dest, const uint8_t *payload, size_t payloadlen) { size_t len = 1 + 1 + 1; const uint8_t *p; size_t reasonlen; @@ -868,6 +890,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame( n = ngtcp2_get_uvarintlen(p); len += n - 1; + if (payloadlen < len) { return NGTCP2_ERR_FRAME_ENCODING; } @@ -879,6 +902,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame( n = ngtcp2_get_uvarintlen(p); len += n - 1; + if (payloadlen < len) { return NGTCP2_ERR_FRAME_ENCODING; } @@ -888,6 +912,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame( nreasonlen = ngtcp2_get_uvarintlen(p); len += nreasonlen - 1; + if (payloadlen < len) { return NGTCP2_ERR_FRAME_ENCODING; } @@ -896,6 +921,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame( if (payloadlen - len < vi) { return NGTCP2_ERR_FRAME_ENCODING; } + reasonlen = (size_t)vi; len += reasonlen; @@ -903,13 +929,16 @@ ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame( dest->type = type; p = ngtcp2_get_uvarint(&dest->error_code, p); + if (type == NGTCP2_FRAME_CONNECTION_CLOSE) { p = ngtcp2_get_uvarint(&dest->frame_type, p); } else { dest->frame_type = 0; } + dest->reasonlen = reasonlen; p += nreasonlen; + if (reasonlen == 0) { dest->reason = NULL; } else { @@ -951,7 +980,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_max_data_frame(ngtcp2_max_data *dest, } ngtcp2_ssize ngtcp2_pkt_decode_max_stream_data_frame( - ngtcp2_max_stream_data *dest, const uint8_t *payload, size_t payloadlen) { + ngtcp2_max_stream_data *dest, const uint8_t *payload, size_t payloadlen) { size_t len = 1 + 1 + 1; const uint8_t *p; size_t n; @@ -1055,10 +1084,8 @@ ngtcp2_ssize ngtcp2_pkt_decode_data_blocked_frame(ngtcp2_data_blocked *dest, return (ngtcp2_ssize)len; } -ngtcp2_ssize -ngtcp2_pkt_decode_stream_data_blocked_frame(ngtcp2_stream_data_blocked *dest, - const uint8_t *payload, - size_t payloadlen) { +ngtcp2_ssize ngtcp2_pkt_decode_stream_data_blocked_frame( + ngtcp2_stream_data_blocked *dest, const uint8_t *payload, size_t payloadlen) { size_t len = 1 + 1 + 1; const uint8_t *p; size_t n; @@ -1097,7 +1124,7 @@ ngtcp2_pkt_decode_stream_data_blocked_frame(ngtcp2_stream_data_blocked *dest, } ngtcp2_ssize ngtcp2_pkt_decode_streams_blocked_frame( - ngtcp2_streams_blocked *dest, const uint8_t *payload, size_t payloadlen) { + ngtcp2_streams_blocked *dest, const uint8_t *payload, size_t payloadlen) { size_t len = 1 + 1; const uint8_t *p; size_t n; @@ -1124,7 +1151,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_streams_blocked_frame( } ngtcp2_ssize ngtcp2_pkt_decode_new_connection_id_frame( - ngtcp2_new_connection_id *dest, const uint8_t *payload, size_t payloadlen) { + ngtcp2_new_connection_id *dest, const uint8_t *payload, size_t payloadlen) { size_t len = 1 + 1 + 1 + 1 + 16; const uint8_t *p; size_t n; @@ -1138,6 +1165,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_new_connection_id_frame( n = ngtcp2_get_uvarintlen(p); len += n - 1; + if (payloadlen < len) { return NGTCP2_ERR_FRAME_ENCODING; } @@ -1146,6 +1174,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_new_connection_id_frame( n = ngtcp2_get_uvarintlen(p); len += n - 1; + if (payloadlen < len) { return NGTCP2_ERR_FRAME_ENCODING; } @@ -1197,6 +1226,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_stop_sending_frame(ngtcp2_stop_sending *dest, if (payloadlen < len) { return NGTCP2_ERR_FRAME_ENCODING; } + p += n; n = ngtcp2_get_uvarintlen(p); len += n - 1; @@ -1307,6 +1337,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_crypto_frame(ngtcp2_stream *dest, p = ngtcp2_get_uvarint(&dest->offset, p); dest->data[0].len = datalen; p += ndatalen; + if (dest->data[0].len) { dest->data[0].base = (uint8_t *)p; p += dest->data[0].len; @@ -1347,6 +1378,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_new_token_frame(ngtcp2_new_token *dest, if (payloadlen - len < vi) { return NGTCP2_ERR_FRAME_ENCODING; } + datalen = (size_t)vi; len += datalen; @@ -1442,6 +1474,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_datagram_frame(ngtcp2_datagram *dest, datalen = (size_t)vi; len += datalen; + break; default: ngtcp2_unreachable(); @@ -1496,7 +1529,7 @@ ngtcp2_ssize ngtcp2_pkt_encode_frame(uint8_t *out, size_t outlen, return ngtcp2_pkt_encode_data_blocked_frame(out, outlen, &fr->data_blocked); case NGTCP2_FRAME_STREAM_DATA_BLOCKED: return ngtcp2_pkt_encode_stream_data_blocked_frame( - out, outlen, &fr->stream_data_blocked); + out, outlen, &fr->stream_data_blocked); case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI: case NGTCP2_FRAME_STREAMS_BLOCKED_UNI: return ngtcp2_pkt_encode_streams_blocked_frame(out, outlen, @@ -1518,7 +1551,7 @@ ngtcp2_ssize ngtcp2_pkt_encode_frame(uint8_t *out, size_t outlen, return ngtcp2_pkt_encode_new_token_frame(out, outlen, &fr->new_token); case NGTCP2_FRAME_RETIRE_CONNECTION_ID: return ngtcp2_pkt_encode_retire_connection_id_frame( - out, outlen, &fr->retire_connection_id); + out, outlen, &fr->retire_connection_id); case NGTCP2_FRAME_HANDSHAKE_DONE: return ngtcp2_pkt_encode_handshake_done_frame(out, outlen, &fr->handshake_done); @@ -1586,7 +1619,7 @@ ngtcp2_ssize ngtcp2_pkt_encode_stream_frame(uint8_t *out, size_t outlen, } ngtcp2_ssize ngtcp2_pkt_encode_ack_frame(uint8_t *out, size_t outlen, - ngtcp2_ack *fr) { + const ngtcp2_ack *fr) { size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->largest_ack) + ngtcp2_put_uvarintlen(fr->ack_delay) + ngtcp2_put_uvarintlen(fr->rangecnt) + @@ -1676,8 +1709,8 @@ ngtcp2_pkt_encode_connection_close_frame(uint8_t *out, size_t outlen, const ngtcp2_connection_close *fr) { size_t len = 1 + ngtcp2_put_uvarintlen(fr->error_code) + (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE - ? ngtcp2_put_uvarintlen(fr->frame_type) - : 0) + + ? ngtcp2_put_uvarintlen(fr->frame_type) + : 0) + ngtcp2_put_uvarintlen(fr->reasonlen) + fr->reasonlen; uint8_t *p; @@ -1689,10 +1722,13 @@ ngtcp2_pkt_encode_connection_close_frame(uint8_t *out, size_t outlen, *p++ = (uint8_t)fr->type; p = ngtcp2_put_uvarint(p, fr->error_code); + if (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE) { p = ngtcp2_put_uvarint(p, fr->frame_type); } + p = ngtcp2_put_uvarint(p, fr->reasonlen); + if (fr->reasonlen) { p = ngtcp2_cpymem(p, fr->reason, fr->reasonlen); } @@ -1796,7 +1832,7 @@ ngtcp2_pkt_encode_data_blocked_frame(uint8_t *out, size_t outlen, } ngtcp2_ssize ngtcp2_pkt_encode_stream_data_blocked_frame( - uint8_t *out, size_t outlen, const ngtcp2_stream_data_blocked *fr) { + uint8_t *out, size_t outlen, const ngtcp2_stream_data_blocked *fr) { size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->stream_id) + ngtcp2_put_uvarintlen(fr->offset); uint8_t *p; @@ -1986,7 +2022,7 @@ ngtcp2_ssize ngtcp2_pkt_encode_new_token_frame(uint8_t *out, size_t outlen, } ngtcp2_ssize ngtcp2_pkt_encode_retire_connection_id_frame( - uint8_t *out, size_t outlen, const ngtcp2_retire_connection_id *fr) { + uint8_t *out, size_t outlen, const ngtcp2_retire_connection_id *fr) { size_t len = 1 + ngtcp2_put_uvarintlen(fr->seq); uint8_t *p; @@ -2023,9 +2059,9 @@ ngtcp2_ssize ngtcp2_pkt_encode_datagram_frame(uint8_t *out, size_t outlen, const ngtcp2_datagram *fr) { uint64_t datalen = ngtcp2_vec_len(fr->data, fr->datacnt); uint64_t len = - 1 + - (fr->type == NGTCP2_FRAME_DATAGRAM ? 0 : ngtcp2_put_uvarintlen(datalen)) + - datalen; + 1 + + (fr->type == NGTCP2_FRAME_DATAGRAM ? 0 : ngtcp2_put_uvarintlen(datalen)) + + datalen; uint8_t *p; size_t i; @@ -2039,6 +2075,7 @@ ngtcp2_ssize ngtcp2_pkt_encode_datagram_frame(uint8_t *out, size_t outlen, p = out; *p++ = (uint8_t)fr->type; + if (fr->type == NGTCP2_FRAME_DATAGRAM_LEN) { p = ngtcp2_put_uvarint(p, datalen); } @@ -2055,9 +2092,9 @@ ngtcp2_ssize ngtcp2_pkt_encode_datagram_frame(uint8_t *out, size_t outlen, } ngtcp2_ssize ngtcp2_pkt_write_version_negotiation( - uint8_t *dest, size_t destlen, uint8_t unused_random, const uint8_t *dcid, - size_t dcidlen, const uint8_t *scid, size_t scidlen, const uint32_t *sv, - size_t nsv) { + uint8_t *dest, size_t destlen, uint8_t unused_random, const uint8_t *dcid, + size_t dcidlen, const uint8_t *scid, size_t scidlen, const uint32_t *sv, + size_t nsv) { size_t len = 1 + 4 + 1 + dcidlen + 1 + scidlen + nsv * 4; uint8_t *p; size_t i; @@ -2074,10 +2111,13 @@ ngtcp2_ssize ngtcp2_pkt_write_version_negotiation( *p++ = 0xc0 | unused_random; p = ngtcp2_put_uint32be(p, 0); *p++ = (uint8_t)dcidlen; + if (dcidlen) { p = ngtcp2_cpymem(p, dcid, dcidlen); } + *p++ = (uint8_t)scidlen; + if (scidlen) { p = ngtcp2_cpymem(p, scid, scidlen); } @@ -2099,7 +2139,7 @@ size_t ngtcp2_pkt_decode_version_negotiation(uint32_t *dest, assert((payloadlen % sizeof(uint32_t)) == 0); for (; payload != end;) { - payload = ngtcp2_get_uint32(dest++, payload); + payload = ngtcp2_get_uint32be(dest++, payload); } return payloadlen / sizeof(uint32_t); @@ -2150,13 +2190,15 @@ int64_t ngtcp2_pkt_adjust_pkt_num(int64_t max_pkt_num, int64_t pkt_num, assert(cand <= (int64_t)NGTCP2_MAX_VARINT - win); return cand + win; } + if (cand > expected + hwin && cand >= win) { return cand - win; } + return cand; } -int ngtcp2_pkt_validate_ack(ngtcp2_ack *fr, int64_t min_pkt_num) { +int ngtcp2_pkt_validate_ack(const ngtcp2_ack *fr, int64_t min_pkt_num) { int64_t largest_ack = fr->largest_ack; size_t i; @@ -2208,7 +2250,7 @@ ngtcp2_pkt_write_stateless_reset(uint8_t *dest, size_t destlen, p = dest; - randlen = ngtcp2_min(destlen - NGTCP2_STATELESS_RESET_TOKENLEN, randlen); + randlen = ngtcp2_min_size(destlen - NGTCP2_STATELESS_RESET_TOKENLEN, randlen); p = ngtcp2_cpymem(p, rand, randlen); p = ngtcp2_cpymem(p, stateless_reset_token, NGTCP2_STATELESS_RESET_TOKENLEN); @@ -2218,10 +2260,10 @@ ngtcp2_pkt_write_stateless_reset(uint8_t *dest, size_t destlen, } ngtcp2_ssize ngtcp2_pkt_write_retry( - uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, - const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token, - size_t tokenlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, - const ngtcp2_crypto_aead_ctx *aead_ctx) { + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token, + size_t tokenlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx) { ngtcp2_pkt_hd hd; uint8_t pseudo_retry[1500]; ngtcp2_ssize pseudo_retrylen; @@ -2247,8 +2289,8 @@ ngtcp2_ssize ngtcp2_pkt_write_retry( /* len = */ 0); pseudo_retrylen = - ngtcp2_pkt_encode_pseudo_retry(pseudo_retry, sizeof(pseudo_retry), &hd, - /* unused = */ 0, odcid, token, tokenlen); + ngtcp2_pkt_encode_pseudo_retry(pseudo_retry, sizeof(pseudo_retry), &hd, + /* unused = */ 0, odcid, token, tokenlen); if (pseudo_retrylen < 0) { return pseudo_retrylen; } @@ -2285,8 +2327,8 @@ ngtcp2_ssize ngtcp2_pkt_write_retry( } ngtcp2_ssize ngtcp2_pkt_encode_pseudo_retry( - uint8_t *dest, size_t destlen, const ngtcp2_pkt_hd *hd, uint8_t unused, - const ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen) { + uint8_t *dest, size_t destlen, const ngtcp2_pkt_hd *hd, uint8_t unused, + const ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen) { uint8_t *p = dest; ngtcp2_ssize nwrite; @@ -2382,23 +2424,23 @@ size_t ngtcp2_pkt_stream_max_datalen(int64_t stream_id, uint64_t offset, if (left > 8 + 1073741823 && len > 1073741823) { #if SIZE_MAX > UINT32_MAX - len = ngtcp2_min(len, 4611686018427387903lu); + len = ngtcp2_min_uint64(len, 4611686018427387903lu); #endif /* SIZE_MAX > UINT32_MAX */ - return (size_t)ngtcp2_min(len, (uint64_t)(left - 8)); + return (size_t)ngtcp2_min_uint64(len, (uint64_t)(left - 8)); } if (left > 4 + 16383 && len > 16383) { - len = ngtcp2_min(len, 1073741823); - return (size_t)ngtcp2_min(len, (uint64_t)(left - 4)); + len = ngtcp2_min_uint64(len, 1073741823); + return (size_t)ngtcp2_min_uint64(len, (uint64_t)(left - 4)); } if (left > 2 + 63 && len > 63) { - len = ngtcp2_min(len, 16383); - return (size_t)ngtcp2_min(len, (uint64_t)(left - 2)); + len = ngtcp2_min_uint64(len, 16383); + return (size_t)ngtcp2_min_uint64(len, (uint64_t)(left - 2)); } - len = ngtcp2_min(len, 63); - return (size_t)ngtcp2_min(len, (uint64_t)(left - 1)); + len = ngtcp2_min_uint64(len, 63); + return (size_t)ngtcp2_min_uint64(len, (uint64_t)(left - 1)); } size_t ngtcp2_pkt_crypto_max_datalen(uint64_t offset, size_t len, size_t left) { @@ -2414,23 +2456,23 @@ size_t ngtcp2_pkt_crypto_max_datalen(uint64_t offset, size_t len, size_t left) { if (left > 8 + 1073741823 && len > 1073741823) { #if SIZE_MAX > UINT32_MAX - len = ngtcp2_min(len, 4611686018427387903lu); + len = ngtcp2_min_size(len, 4611686018427387903lu); #endif /* SIZE_MAX > UINT32_MAX */ - return ngtcp2_min(len, left - 8); + return ngtcp2_min_size(len, left - 8); } if (left > 4 + 16383 && len > 16383) { - len = ngtcp2_min(len, 1073741823); - return ngtcp2_min(len, left - 4); + len = ngtcp2_min_size(len, 1073741823); + return ngtcp2_min_size(len, left - 4); } if (left > 2 + 63 && len > 63) { - len = ngtcp2_min(len, 16383); - return ngtcp2_min(len, left - 2); + len = ngtcp2_min_size(len, 16383); + return ngtcp2_min_size(len, left - 2); } - len = ngtcp2_min(len, 63); - return ngtcp2_min(len, left - 1); + len = ngtcp2_min_size(len, 63); + return ngtcp2_min_size(len, left - 1); } size_t ngtcp2_pkt_datagram_framelen(size_t len) { @@ -2528,5 +2570,6 @@ int ngtcp2_pkt_verify_reserved_bits(uint8_t c) { if (c & NGTCP2_HEADER_FORM_BIT) { return (c & NGTCP2_LONG_RESERVED_BIT_MASK) == 0 ? 0 : NGTCP2_ERR_PROTO; } + return (c & NGTCP2_SHORT_RESERVED_BIT_MASK) == 0 ? 0 : NGTCP2_ERR_PROTO; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pkt.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pkt.h index feec4d32c97bdd..86ebecef7bc870 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pkt.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pkt.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -41,7 +41,6 @@ #define NGTCP2_LONG_RESERVED_BIT_MASK 0x0c /* Short header specific macros */ -#define NGTCP2_SHORT_SPIN_BIT_MASK 0x20 #define NGTCP2_SHORT_RESERVED_BIT_MASK 0x18 #define NGTCP2_SHORT_KEY_PHASE_BIT 0x04 @@ -53,6 +52,7 @@ LENGTH<1> + PKN<1> */ #define NGTCP2_MIN_LONG_HEADERLEN (1 + 4 + 1 + 1 + 1 + 1) +/* STREAM frame specific macros */ #define NGTCP2_STREAM_FIN_BIT 0x01 #define NGTCP2_STREAM_LEN_BIT 0x02 #define NGTCP2_STREAM_OFF_BIT 0x04 @@ -72,9 +72,6 @@ the beginning of the payload. */ #define NGTCP2_DATAGRAM_OVERHEAD (1 + 8) -/* NGTCP2_MIN_FRAME_PAYLOADLEN is the minimum frame payload length. */ -#define NGTCP2_MIN_FRAME_PAYLOADLEN 16 - /* NGTCP2_MAX_SERVER_STREAM_ID_BIDI is the maximum bidirectional server stream ID. */ #define NGTCP2_MAX_SERVER_STREAM_ID_BIDI ((int64_t)0x3ffffffffffffffdll) @@ -95,16 +92,15 @@ /* NGTCP2_MAX_PKT_NUM is the maximum packet number. */ #define NGTCP2_MAX_PKT_NUM ((int64_t)((1ll << 62) - 1)) -/* NGTCP2_MIN_PKT_EXPANDLEN is the minimum packet size expansion in - addition to the minimum DCID length to hide/trigger Stateless - Reset. */ +/* NGTCP2_MIN_PKT_EXPANDLEN is the minimum packet size expansion to + hide/trigger Stateless Reset. */ #define NGTCP2_MIN_PKT_EXPANDLEN 22 /* NGTCP2_RETRY_TAGLEN is the length of Retry packet integrity tag. */ #define NGTCP2_RETRY_TAGLEN 16 -/* NGTCP2_HARD_MAX_UDP_PAYLOAD_SIZE is the maximum UDP payload size - that this library can write. */ +/* NGTCP2_HARD_MAX_UDP_PAYLOAD_SIZE is the maximum UDP datagram + payload size that this library can write. */ #define NGTCP2_HARD_MAX_UDP_PAYLOAD_SIZE ((1 << 24) - 1) /* NGTCP2_PKT_LENGTHLEN is the number of bytes that is occupied by @@ -357,6 +353,11 @@ typedef union ngtcp2_frame { ngtcp2_retire_connection_id retire_connection_id; ngtcp2_handshake_done handshake_done; ngtcp2_datagram datagram; + /* Extend ngtcp2_frame so that ngtcp2_stream has at least additional + 3 ngtcp2_vec, totaling 4 slots, which can store HEADERS header, + HEADERS payload, DATA header, and DATA payload in the standard + sized ngtcp2_frame_chain. */ + uint8_t pad[sizeof(ngtcp2_stream) + sizeof(ngtcp2_vec) * 3]; } ngtcp2_frame; typedef struct ngtcp2_pkt_chain ngtcp2_pkt_chain; @@ -371,7 +372,7 @@ struct ngtcp2_pkt_chain { uint8_t *pkt; /* pktlen is length of a QUIC packet. */ size_t pktlen; - /* dgramlen is length of UDP datagram that a QUIC packet is + /* dgramlen is length of UDP datagram payload that a QUIC packet is included. */ size_t dgramlen; ngtcp2_tstamp ts; @@ -403,11 +404,11 @@ void ngtcp2_pkt_chain_del(ngtcp2_pkt_chain *pc, const ngtcp2_mem *mem); /* * ngtcp2_pkt_hd_init initializes |hd| with the given values. If - * |dcid| and/or |scid| is NULL, DCID and SCID of |hd| is empty - * respectively. |pkt_numlen| is the number of bytes used to encode - * |pkt_num| and either 1, 2, or 4. |version| is QUIC version for - * long header. |len| is the length field of Initial, 0RTT, and - * Handshake packets. + * |dcid| and/or |scid| is NULL, Destination Connection ID and/or + * Source Connection ID of |hd| is empty respectively. |pkt_numlen| + * is the number of bytes used to encode |pkt_num| and either 1, 2, or + * 4. |version| is QUIC version for long header. |len| is the length + * field of Initial, 0RTT, and Handshake packets. */ void ngtcp2_pkt_hd_init(ngtcp2_pkt_hd *hd, uint8_t flags, uint8_t type, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, @@ -417,8 +418,8 @@ void ngtcp2_pkt_hd_init(ngtcp2_pkt_hd *hd, uint8_t flags, uint8_t type, /* * ngtcp2_pkt_encode_hd_long encodes |hd| as QUIC long header into * |out| which has length |outlen|. It returns the number of bytes - * written into |outlen| if it succeeds, or one of the following - * negative error codes: + * written into |out| if it succeeds, or one of the following negative + * error codes: * * NGTCP2_ERR_NOBUF * Buffer is too short @@ -429,8 +430,8 @@ ngtcp2_ssize ngtcp2_pkt_encode_hd_long(uint8_t *out, size_t outlen, /* * ngtcp2_pkt_encode_hd_short encodes |hd| as QUIC short header into * |out| which has length |outlen|. It returns the number of bytes - * written into |outlen| if it succeeds, or one of the following - * negative error codes: + * written into |out| if it succeeds, or one of the following negative + * error codes: * * NGTCP2_ERR_NOBUF * Buffer is too short @@ -457,7 +458,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_frame(ngtcp2_frame *dest, const uint8_t *payload, /** * @function * - * `ngtcp2_pkt_encode_frame` encodes a frame |fm| into the buffer + * `ngtcp2_pkt_encode_frame` encodes a frame |fr| into the buffer * pointed by |out| of length |outlen|. * * This function returns the number of bytes written to the buffer, or @@ -543,7 +544,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_ack_frame(ngtcp2_ack *dest, /* * ngtcp2_pkt_decode_padding_frame decodes contiguous PADDING frames * from |payload| of length |payloadlen|. It continues to parse - * frames as long as the frame type is PADDING. This finishes when it + * frames as long as the frame type is PADDING. It finishes when it * encounters the frame type which is not PADDING, or all input data * is read. The first byte (payload[0]) must be NGTCP2_FRAME_PADDING. * This function returns the exact number of bytes read to decode @@ -573,7 +574,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_reset_stream_frame(ngtcp2_reset_stream *dest, * ngtcp2_pkt_decode_connection_close_frame decodes CONNECTION_CLOSE * frame from |payload| of length |payloadlen|. The result is stored * in the object pointed by |dest|. CONNECTION_CLOSE frame must start - * at payload[0]. This function finishes it decodes one + * at payload[0]. This function finishes when it decodes one * CONNECTION_CLOSE frame, and returns the exact number of bytes read * to decode a frame if it succeeds, or one of the following negative * error codes: @@ -582,7 +583,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_reset_stream_frame(ngtcp2_reset_stream *dest, * Payload is too short to include CONNECTION_CLOSE frame. */ ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame( - ngtcp2_connection_close *dest, const uint8_t *payload, size_t payloadlen); + ngtcp2_connection_close *dest, const uint8_t *payload, size_t payloadlen); /* * ngtcp2_pkt_decode_max_data_frame decodes MAX_DATA frame from @@ -612,7 +613,7 @@ ngtcp2_ssize ngtcp2_pkt_decode_max_data_frame(ngtcp2_max_data *dest, * Payload is too short to include MAX_STREAM_DATA frame. */ ngtcp2_ssize ngtcp2_pkt_decode_max_stream_data_frame( - ngtcp2_max_stream_data *dest, const uint8_t *payload, size_t payloadlen); + ngtcp2_max_stream_data *dest, const uint8_t *payload, size_t payloadlen); /* * ngtcp2_pkt_decode_max_streams_frame decodes MAX_STREAMS frame from @@ -668,10 +669,8 @@ ngtcp2_ssize ngtcp2_pkt_decode_data_blocked_frame(ngtcp2_data_blocked *dest, * NGTCP2_ERR_FRAME_ENCODING * Payload is too short to include STREAM_DATA_BLOCKED frame. */ -ngtcp2_ssize -ngtcp2_pkt_decode_stream_data_blocked_frame(ngtcp2_stream_data_blocked *dest, - const uint8_t *payload, - size_t payloadlen); +ngtcp2_ssize ngtcp2_pkt_decode_stream_data_blocked_frame( + ngtcp2_stream_data_blocked *dest, const uint8_t *payload, size_t payloadlen); /* * ngtcp2_pkt_decode_streams_blocked_frame decodes STREAMS_BLOCKED @@ -686,7 +685,7 @@ ngtcp2_pkt_decode_stream_data_blocked_frame(ngtcp2_stream_data_blocked *dest, * Payload is too short to include STREAMS_BLOCKED frame. */ ngtcp2_ssize ngtcp2_pkt_decode_streams_blocked_frame( - ngtcp2_streams_blocked *dest, const uint8_t *payload, size_t payloadlen); + ngtcp2_streams_blocked *dest, const uint8_t *payload, size_t payloadlen); /* * ngtcp2_pkt_decode_new_connection_id_frame decodes NEW_CONNECTION_ID @@ -699,11 +698,11 @@ ngtcp2_ssize ngtcp2_pkt_decode_streams_blocked_frame( * * NGTCP2_ERR_FRAME_ENCODING * Payload is too short to include NEW_CONNECTION_ID frame; or the - * length of CID is strictly less than NGTCP2_MIN_CIDLEN or - * greater than NGTCP2_MAX_CIDLEN. + * length of Connection ID is strictly less than NGTCP2_MIN_CIDLEN + * or greater than NGTCP2_MAX_CIDLEN. */ ngtcp2_ssize ngtcp2_pkt_decode_new_connection_id_frame( - ngtcp2_new_connection_id *dest, const uint8_t *payload, size_t payloadlen); + ngtcp2_new_connection_id *dest, const uint8_t *payload, size_t payloadlen); /* * ngtcp2_pkt_decode_stop_sending_frame decodes STOP_SENDING frame @@ -784,20 +783,19 @@ ngtcp2_ssize ngtcp2_pkt_decode_new_token_frame(ngtcp2_new_token *dest, size_t payloadlen); /* - * ngtcp2_pkt_decode_retire_connection_id_frame decodes RETIRE_CONNECTION_ID - * frame from |payload| of length |payloadlen|. The result is stored in the - * object pointed by |dest|. RETIRE_CONNECTION_ID frame must start at - * payload[0]. This function finishes when it decodes one RETIRE_CONNECTION_ID - * frame, and returns the exact number of bytes read to decode a frame - * if it succeeds, or one of the following negative error codes: + * ngtcp2_pkt_decode_retire_connection_id_frame decodes + * RETIRE_CONNECTION_ID frame from |payload| of length |payloadlen|. + * The result is stored in the object pointed by |dest|. + * RETIRE_CONNECTION_ID frame must start at payload[0]. This function + * finishes when it decodes one RETIRE_CONNECTION_ID frame, and + * returns the exact number of bytes read to decode a frame if it + * succeeds, or one of the following negative error codes: * * NGTCP2_ERR_FRAME_ENCODING * Payload is too short to include RETIRE_CONNECTION_ID frame. */ -ngtcp2_ssize -ngtcp2_pkt_decode_retire_connection_id_frame(ngtcp2_retire_connection_id *dest, - const uint8_t *payload, - size_t payloadlen); +ngtcp2_ssize ngtcp2_pkt_decode_retire_connection_id_frame( + ngtcp2_retire_connection_id *dest, const uint8_t *payload, size_t payloadlen); /* * ngtcp2_pkt_decode_handshake_done_frame decodes HANDSHAKE_DONE frame @@ -846,9 +844,6 @@ ngtcp2_ssize ngtcp2_pkt_encode_stream_frame(uint8_t *out, size_t outlen, * ngtcp2_pkt_encode_ack_frame encodes ACK frame |fr| into the buffer * pointed by |out| of length |outlen|. * - * This function assigns & - * ~NGTCP2_FRAME_ACK to fr->flags. - * * This function returns the number of bytes written if it succeeds, * or one of the following negative error codes: * @@ -856,7 +851,7 @@ ngtcp2_ssize ngtcp2_pkt_encode_stream_frame(uint8_t *out, size_t outlen, * Buffer does not have enough capacity to write a frame. */ ngtcp2_ssize ngtcp2_pkt_encode_ack_frame(uint8_t *out, size_t outlen, - ngtcp2_ack *fr); + const ngtcp2_ack *fr); /* * ngtcp2_pkt_encode_padding_frame encodes PADDING frame |fr| into the @@ -980,7 +975,7 @@ ngtcp2_pkt_encode_data_blocked_frame(uint8_t *out, size_t outlen, * Buffer does not have enough capacity to write a frame. */ ngtcp2_ssize ngtcp2_pkt_encode_stream_data_blocked_frame( - uint8_t *out, size_t outlen, const ngtcp2_stream_data_blocked *fr); + uint8_t *out, size_t outlen, const ngtcp2_stream_data_blocked *fr); /* * ngtcp2_pkt_encode_streams_blocked_frame encodes STREAMS_BLOCKED @@ -1079,8 +1074,9 @@ ngtcp2_ssize ngtcp2_pkt_encode_new_token_frame(uint8_t *out, size_t outlen, const ngtcp2_new_token *fr); /* - * ngtcp2_pkt_encode_retire_connection_id_frame encodes RETIRE_CONNECTION_ID - * frame |fr| into the buffer pointed by |out| of length |outlen|. + * ngtcp2_pkt_encode_retire_connection_id_frame encodes + * RETIRE_CONNECTION_ID frame |fr| into the buffer pointed by |out| of + * length |outlen|. * * This function returns the number of bytes written if it succeeds, * or one of the following negative error codes: @@ -1089,7 +1085,7 @@ ngtcp2_ssize ngtcp2_pkt_encode_new_token_frame(uint8_t *out, size_t outlen, * Buffer does not have enough capacity to write a frame. */ ngtcp2_ssize ngtcp2_pkt_encode_retire_connection_id_frame( - uint8_t *out, size_t outlen, const ngtcp2_retire_connection_id *fr); + uint8_t *out, size_t outlen, const ngtcp2_retire_connection_id *fr); /* * ngtcp2_pkt_encode_handshake_done_frame encodes HANDSHAKE_DONE frame @@ -1119,7 +1115,7 @@ ngtcp2_ssize ngtcp2_pkt_encode_datagram_frame(uint8_t *out, size_t outlen, const ngtcp2_datagram *fr); /* - * ngtcp2_pkt_adjust_pkt_num find the full 64 bits packet number for + * ngtcp2_pkt_adjust_pkt_num finds the full 62 bits packet number for * |pkt_num|, which is encoded in |pkt_numlen| bytes. The * |max_pkt_num| is the highest successfully authenticated packet * number. @@ -1128,10 +1124,10 @@ int64_t ngtcp2_pkt_adjust_pkt_num(int64_t max_pkt_num, int64_t pkt_num, size_t pkt_numlen); /* - * ngtcp2_pkt_validate_ack checks that ack is malformed or not. - * |min_pkt_num| is the minimum packet number that an endpoint sends. - * It is an error to receive acknowledgements for a packet less than - * |min_pkt_num|. + * ngtcp2_pkt_validate_ack verifies whether |fr| is malformed or not. + * |min_pkt_num| is the minimum packet number that a local endpoint + * sends. It is an error to receive acknowledgements for a packet + * less than |min_pkt_num|. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -1141,13 +1137,13 @@ int64_t ngtcp2_pkt_adjust_pkt_num(int64_t max_pkt_num, int64_t pkt_num, * NGTCP2_ERR_PROTO * |fr| contains a packet number less than |min_pkt_num|. */ -int ngtcp2_pkt_validate_ack(ngtcp2_ack *fr, int64_t min_pkt_num); +int ngtcp2_pkt_validate_ack(const ngtcp2_ack *fr, int64_t min_pkt_num); /* * ngtcp2_pkt_stream_max_datalen returns the maximum number of bytes * which can be sent for stream denoted by |stream_id|. |offset| is - * an offset of within the stream. |len| is the estimated number of - * bytes to be sent. |left| is the size of buffer. If |left| is too + * an offset within the stream. |len| is the estimated number of + * bytes to send. |left| is the size of buffer. If |left| is too * small to write STREAM frame, this function returns (size_t)-1. */ size_t ngtcp2_pkt_stream_max_datalen(int64_t stream_id, uint64_t offset, @@ -1155,10 +1151,10 @@ size_t ngtcp2_pkt_stream_max_datalen(int64_t stream_id, uint64_t offset, /* * ngtcp2_pkt_crypto_max_datalen returns the maximum number of bytes - * which can be sent for crypto stream. |offset| is an offset of - * within the crypto stream. |len| is the estimated number of bytes - * to be sent. |left| is the size of buffer. If |left| is too small - * to write CRYPTO frame, this function returns (size_t)-1. + * which can be sent for crypto stream. |offset| is an offset within + * the crypto stream. |len| is the estimated number of bytes to send. + * |left| is the size of buffer. If |left| is too small to write + * CRYPTO frame, this function returns (size_t)-1. */ size_t ngtcp2_pkt_crypto_max_datalen(uint64_t offset, size_t len, size_t left); @@ -1191,8 +1187,8 @@ int ngtcp2_pkt_verify_reserved_bits(uint8_t c); * Buffer is too short. */ ngtcp2_ssize ngtcp2_pkt_encode_pseudo_retry( - uint8_t *dest, size_t destlen, const ngtcp2_pkt_hd *hd, uint8_t unused, - const ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen); + uint8_t *dest, size_t destlen, const ngtcp2_pkt_hd *hd, uint8_t unused, + const ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen); /* * ngtcp2_pkt_verify_retry_tag verifies Retry packet. The buffer @@ -1229,4 +1225,4 @@ uint8_t ngtcp2_pkt_versioned_type(uint32_t version, uint32_t pkt_type); */ uint8_t ngtcp2_pkt_get_type_long(uint32_t version, uint8_t c); -#endif /* NGTCP2_PKT_H */ +#endif /* !defined(NGTCP2_PKT_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pktns_id.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pktns_id.h index 66b0ee9e6c13cf..9cf8444d32d7eb 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pktns_id.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pktns_id.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -59,4 +59,4 @@ typedef enum ngtcp2_pktns_id { NGTCP2_PKTNS_ID_MAX } ngtcp2_pktns_id; -#endif /* NGTCP2_PKTNS_ID_H */ +#endif /* !defined(NGTCP2_PKTNS_ID_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pmtud.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pmtud.c index 771ef5e026d12d..ebd113f6746217 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pmtud.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pmtud.c @@ -33,18 +33,17 @@ for each probe. */ #define NGTCP2_PMTUD_PROBE_NUM_MAX 3 -static size_t mtu_probes[] = { - 1454 - 48, /* The well known MTU used by a domestic optic fiber - service in Japan. */ - 1390 - 48, /* Typical Tunneled MTU */ - 1280 - 48, /* IPv6 minimum MTU */ - 1492 - 48, /* PPPoE */ +static uint16_t pmtud_default_probes[] = { + 1454 - 48, /* The well known MTU used by a domestic optic fiber + service in Japan. */ + 1390 - 48, /* Typical Tunneled MTU */ + 1280 - 48, /* IPv6 minimum MTU */ + 1492 - 48, /* PPPoE */ }; -#define NGTCP2_MTU_PROBESLEN ngtcp2_arraylen(mtu_probes) - int ngtcp2_pmtud_new(ngtcp2_pmtud **ppmtud, size_t max_udp_payload_size, size_t hard_max_udp_payload_size, int64_t tx_pkt_num, + const uint16_t *probes, size_t probeslen, const ngtcp2_mem *mem) { ngtcp2_pmtud *pmtud = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pmtud)); @@ -61,11 +60,19 @@ int ngtcp2_pmtud_new(ngtcp2_pmtud **ppmtud, size_t max_udp_payload_size, pmtud->hard_max_udp_payload_size = hard_max_udp_payload_size; pmtud->min_fail_udp_payload_size = SIZE_MAX; - for (; pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN; ++pmtud->mtu_idx) { - if (mtu_probes[pmtud->mtu_idx] > pmtud->hard_max_udp_payload_size) { + if (probeslen) { + pmtud->probes = probes; + pmtud->probeslen = probeslen; + } else { + pmtud->probes = pmtud_default_probes; + pmtud->probeslen = ngtcp2_arraylen(pmtud_default_probes); + } + + for (; pmtud->mtu_idx < pmtud->probeslen; ++pmtud->mtu_idx) { + if (pmtud->probes[pmtud->mtu_idx] > pmtud->hard_max_udp_payload_size) { continue; } - if (mtu_probes[pmtud->mtu_idx] > pmtud->max_udp_payload_size) { + if (pmtud->probes[pmtud->mtu_idx] > pmtud->max_udp_payload_size) { break; } } @@ -84,9 +91,9 @@ void ngtcp2_pmtud_del(ngtcp2_pmtud *pmtud) { } size_t ngtcp2_pmtud_probelen(ngtcp2_pmtud *pmtud) { - assert(pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN); + assert(pmtud->mtu_idx < pmtud->probeslen); - return mtu_probes[pmtud->mtu_idx]; + return pmtud->probes[pmtud->mtu_idx]; } void ngtcp2_pmtud_probe_sent(ngtcp2_pmtud *pmtud, ngtcp2_duration pto, @@ -107,19 +114,19 @@ int ngtcp2_pmtud_require_probe(ngtcp2_pmtud *pmtud) { } static void pmtud_next_probe(ngtcp2_pmtud *pmtud) { - assert(pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN); + assert(pmtud->mtu_idx < pmtud->probeslen); ++pmtud->mtu_idx; pmtud->num_pkts_sent = 0; pmtud->expiry = UINT64_MAX; - for (; pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN; ++pmtud->mtu_idx) { - if (mtu_probes[pmtud->mtu_idx] <= pmtud->max_udp_payload_size || - mtu_probes[pmtud->mtu_idx] > pmtud->hard_max_udp_payload_size) { + for (; pmtud->mtu_idx < pmtud->probeslen; ++pmtud->mtu_idx) { + if (pmtud->probes[pmtud->mtu_idx] <= pmtud->max_udp_payload_size || + pmtud->probes[pmtud->mtu_idx] > pmtud->hard_max_udp_payload_size) { continue; } - if (mtu_probes[pmtud->mtu_idx] < pmtud->min_fail_udp_payload_size) { + if (pmtud->probes[pmtud->mtu_idx] < pmtud->min_fail_udp_payload_size) { break; } } @@ -127,11 +134,11 @@ static void pmtud_next_probe(ngtcp2_pmtud *pmtud) { void ngtcp2_pmtud_probe_success(ngtcp2_pmtud *pmtud, size_t payloadlen) { pmtud->max_udp_payload_size = - ngtcp2_max(pmtud->max_udp_payload_size, payloadlen); + ngtcp2_max_size(pmtud->max_udp_payload_size, payloadlen); - assert(pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN); + assert(pmtud->mtu_idx < pmtud->probeslen); - if (mtu_probes[pmtud->mtu_idx] > pmtud->max_udp_payload_size) { + if (pmtud->probes[pmtud->mtu_idx] > pmtud->max_udp_payload_size) { return; } @@ -149,12 +156,12 @@ void ngtcp2_pmtud_handle_expiry(ngtcp2_pmtud *pmtud, ngtcp2_tstamp ts) { return; } - pmtud->min_fail_udp_payload_size = - ngtcp2_min(pmtud->min_fail_udp_payload_size, mtu_probes[pmtud->mtu_idx]); + pmtud->min_fail_udp_payload_size = ngtcp2_min_size( + pmtud->min_fail_udp_payload_size, pmtud->probes[pmtud->mtu_idx]); pmtud_next_probe(pmtud); } int ngtcp2_pmtud_finished(ngtcp2_pmtud *pmtud) { - return pmtud->mtu_idx >= NGTCP2_MTU_PROBESLEN; + return pmtud->mtu_idx >= pmtud->probeslen; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pmtud.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pmtud.h index 6b2e691cfc793a..53fc6a538292e0 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pmtud.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pmtud.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -58,6 +58,10 @@ typedef struct ngtcp2_pmtud { /* min_fail_udp_payload_size is the minimum UDP payload size that is known to fail. */ size_t min_fail_udp_payload_size; + /* probes is the array of UDP datagram payload size to probe. */ + const uint16_t *probes; + /* probeslen is the number of probes pointed by probes. */ + size_t probeslen; } ngtcp2_pmtud; /* @@ -70,6 +74,10 @@ typedef struct ngtcp2_pmtud { * larger than or equal to all UDP payload probe candidates. * Therefore, call ngtcp2_pmtud_finished to check this situation. * + * The array pointed by |pmtud_probes| of length |pmtud_probeslen| + * specifies UDP datagram payload size to probe. If |pmtud_probeslen| + * is zero, the default probes are used. + * * This function returns 0 if it succeeds, or one of the following * negative error codes: * @@ -78,6 +86,7 @@ typedef struct ngtcp2_pmtud { */ int ngtcp2_pmtud_new(ngtcp2_pmtud **ppmtud, size_t max_udp_payload_size, size_t hard_max_udp_payload_size, int64_t tx_pkt_num, + const uint16_t *pmtud_probes, size_t pmtud_probeslen, const ngtcp2_mem *mem); /* @@ -120,4 +129,4 @@ void ngtcp2_pmtud_handle_expiry(ngtcp2_pmtud *pmtud, ngtcp2_tstamp ts); */ int ngtcp2_pmtud_finished(ngtcp2_pmtud *pmtud); -#endif /* NGTCP2_PMTUD_H */ +#endif /* !defined(NGTCP2_PMTUD_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ppe.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ppe.c index f7c122b1ab406b..13ef2b24908905 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ppe.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ppe.c @@ -29,11 +29,13 @@ #include "ngtcp2_str.h" #include "ngtcp2_conv.h" +#include "ngtcp2_macro.h" void ngtcp2_ppe_init(ngtcp2_ppe *ppe, uint8_t *out, size_t outlen, - ngtcp2_crypto_cc *cc) { + size_t dgram_offset, ngtcp2_crypto_cc *cc) { ngtcp2_buf_init(&ppe->buf, out, outlen); + ppe->dgram_offset = dgram_offset; ppe->hdlen = 0; ppe->len_offset = 0; ppe->pkt_num_offset = 0; @@ -53,17 +55,22 @@ int ngtcp2_ppe_encode_hd(ngtcp2_ppe *ppe, const ngtcp2_pkt_hd *hd) { if (hd->flags & NGTCP2_PKT_FLAG_LONG_FORM) { ppe->len_offset = 1 + 4 + 1 + hd->dcid.datalen + 1 + hd->scid.datalen; + if (hd->type == NGTCP2_PKT_INITIAL) { ppe->len_offset += ngtcp2_put_uvarintlen(hd->tokenlen) + hd->tokenlen; } + ppe->pkt_num_offset = ppe->len_offset + NGTCP2_PKT_LENGTHLEN; + rv = ngtcp2_pkt_encode_hd_long( - buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, hd); + buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, hd); } else { ppe->pkt_num_offset = 1 + hd->dcid.datalen; + rv = ngtcp2_pkt_encode_hd_short( - buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, hd); + buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, hd); } + if (rv < 0) { return (int)rv; } @@ -72,7 +79,6 @@ int ngtcp2_ppe_encode_hd(ngtcp2_ppe *ppe, const ngtcp2_pkt_hd *hd) { ppe->pkt_numlen = hd->pkt_numlen; ppe->hdlen = (size_t)rv; - ppe->pkt_num = hd->pkt_num; return 0; @@ -88,7 +94,7 @@ int ngtcp2_ppe_encode_frame(ngtcp2_ppe *ppe, ngtcp2_frame *fr) { } rv = ngtcp2_pkt_encode_frame( - buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, fr); + buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, fr); if (rv < 0) { return (int)rv; } @@ -121,8 +127,8 @@ ngtcp2_ssize ngtcp2_ppe_final(ngtcp2_ppe *ppe, const uint8_t **ppkt) { if (ppe->len_offset) { ngtcp2_put_uvarint30( - buf->begin + ppe->len_offset, - (uint16_t)(payloadlen + ppe->pkt_numlen + cc->aead.max_overhead)); + buf->begin + ppe->len_offset, + (uint16_t)(payloadlen + ppe->pkt_numlen + cc->aead.max_overhead)); } ngtcp2_crypto_create_nonce(ppe->nonce, cc->ckm->iv.base, cc->ckm->iv.len, @@ -136,7 +142,7 @@ ngtcp2_ssize ngtcp2_ppe_final(ngtcp2_ppe *ppe, const uint8_t **ppkt) { buf->last = payload + payloadlen + cc->aead.max_overhead; - /* TODO Check that we have enough space to get sample */ + /* Make sure that we have enough space to get sample */ assert(ppe_sample_offset(ppe) + NGTCP2_HP_SAMPLELEN <= ngtcp2_buf_len(buf)); rv = cc->hp_mask(mask, &cc->hp, &cc->hp_ctx, @@ -164,7 +170,7 @@ ngtcp2_ssize ngtcp2_ppe_final(ngtcp2_ppe *ppe, const uint8_t **ppkt) { return (ngtcp2_ssize)ngtcp2_buf_len(buf); } -size_t ngtcp2_ppe_left(ngtcp2_ppe *ppe) { +size_t ngtcp2_ppe_left(const ngtcp2_ppe *ppe) { ngtcp2_crypto_cc *cc = ppe->cc; if (ngtcp2_buf_left(&ppe->buf) < cc->aead.max_overhead) { @@ -174,57 +180,57 @@ size_t ngtcp2_ppe_left(ngtcp2_ppe *ppe) { return ngtcp2_buf_left(&ppe->buf) - cc->aead.max_overhead; } -size_t ngtcp2_ppe_pktlen(ngtcp2_ppe *ppe) { +size_t ngtcp2_ppe_pktlen(const ngtcp2_ppe *ppe) { ngtcp2_crypto_cc *cc = ppe->cc; return ngtcp2_buf_len(&ppe->buf) + cc->aead.max_overhead; } -size_t ngtcp2_ppe_padding(ngtcp2_ppe *ppe) { - ngtcp2_crypto_cc *cc = ppe->cc; - ngtcp2_buf *buf = &ppe->buf; - size_t len; - - assert(ngtcp2_buf_left(buf) >= cc->aead.max_overhead); - - len = ngtcp2_buf_left(buf) - cc->aead.max_overhead; - memset(buf->last, 0, len); - buf->last += len; - - return len; -} - -size_t ngtcp2_ppe_padding_hp_sample(ngtcp2_ppe *ppe) { +size_t ngtcp2_ppe_padding_size(ngtcp2_ppe *ppe, size_t n) { ngtcp2_crypto_cc *cc = ppe->cc; ngtcp2_buf *buf = &ppe->buf; - size_t max_samplelen; + size_t pktlen = ngtcp2_buf_len(buf) + cc->aead.max_overhead; size_t len = 0; + size_t max_samplelen; - assert(cc->aead.max_overhead); + n = ngtcp2_min_size(n, ngtcp2_buf_cap(buf)); + if (pktlen < n) { + len = n - pktlen; + } + /* Ensure header protection sample */ max_samplelen = - ngtcp2_buf_len(buf) + cc->aead.max_overhead - ppe_sample_offset(ppe); + ngtcp2_buf_len(buf) + cc->aead.max_overhead - ppe_sample_offset(ppe); + if (max_samplelen < NGTCP2_HP_SAMPLELEN) { - len = NGTCP2_HP_SAMPLELEN - max_samplelen; - assert(ngtcp2_ppe_left(ppe) >= len); - memset(buf->last, 0, len); - buf->last += len; + len = ngtcp2_max_size(len, NGTCP2_HP_SAMPLELEN - max_samplelen); } + assert(ngtcp2_buf_left(buf) >= len + cc->aead.max_overhead); + + buf->last = ngtcp2_setmem(buf->last, 0, len); + return len; } -size_t ngtcp2_ppe_padding_size(ngtcp2_ppe *ppe, size_t n) { +size_t ngtcp2_ppe_dgram_padding(ngtcp2_ppe *ppe) { + return ngtcp2_ppe_dgram_padding_size(ppe, NGTCP2_MAX_UDP_PAYLOAD_SIZE); +} + +size_t ngtcp2_ppe_dgram_padding_size(ngtcp2_ppe *ppe, size_t n) { ngtcp2_crypto_cc *cc = ppe->cc; ngtcp2_buf *buf = &ppe->buf; - size_t pktlen = ngtcp2_buf_len(buf) + cc->aead.max_overhead; + size_t dgramlen = + ppe->dgram_offset + ngtcp2_buf_len(buf) + cc->aead.max_overhead; size_t len; - if (pktlen >= n) { + n = ngtcp2_min_size(n, ppe->dgram_offset + ngtcp2_buf_cap(buf)); + + if (dgramlen >= n) { return 0; } - len = n - pktlen; + len = n - dgramlen; buf->last = ngtcp2_setmem(buf->last, 0, len); return len; @@ -232,5 +238,6 @@ size_t ngtcp2_ppe_padding_size(ngtcp2_ppe *ppe, size_t n) { int ngtcp2_ppe_ensure_hp_sample(ngtcp2_ppe *ppe) { ngtcp2_buf *buf = &ppe->buf; + return ngtcp2_buf_left(buf) >= (4 - ppe->pkt_numlen) + NGTCP2_HP_SAMPLELEN; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ppe.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ppe.h index 2a069ef33451ab..660b1482b56671 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ppe.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ppe.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -36,11 +36,17 @@ #include "ngtcp2_crypto.h" /* - * ngtcp2_ppe is the Protected Packet Encoder. + * ngtcp2_ppe is the QUIC Packet Encoder. */ typedef struct ngtcp2_ppe { + /* buf is the buffer where a QUIC packet is written. */ ngtcp2_buf buf; + /* cc is the encryption context that includes callback functions to + encrypt a QUIC packet, and AEAD cipher, etc. */ ngtcp2_crypto_cc *cc; + /* dgram_offset is the offset in UDP datagram payload that this QUIC + packet is positioned at. */ + size_t dgram_offset; /* hdlen is the number of bytes for packet header written in buf. */ size_t hdlen; /* len_offset is the offset to Length field. */ @@ -53,7 +59,7 @@ typedef struct ngtcp2_ppe { /* pkt_num is the packet number written in buf. */ int64_t pkt_num; /* nonce is the buffer to store nonce. It should be equal or longer - than then length of IV. */ + than the length of IV. */ uint8_t nonce[32]; } ngtcp2_ppe; @@ -61,7 +67,7 @@ typedef struct ngtcp2_ppe { * ngtcp2_ppe_init initializes |ppe| with the given buffer. */ void ngtcp2_ppe_init(ngtcp2_ppe *ppe, uint8_t *out, size_t outlen, - ngtcp2_crypto_cc *cc); + size_t dgram_offset, ngtcp2_crypto_cc *cc); /* * ngtcp2_ppe_encode_hd encodes |hd|. @@ -86,7 +92,7 @@ int ngtcp2_ppe_encode_hd(ngtcp2_ppe *ppe, const ngtcp2_pkt_hd *hd); int ngtcp2_ppe_encode_frame(ngtcp2_ppe *ppe, ngtcp2_frame *fr); /* - * ngtcp2_ppe_final encrypts QUIC packet payload. If |**ppkt| is not + * ngtcp2_ppe_final encrypts QUIC packet payload. If |ppkt| is not * NULL, the pointer to the packet is assigned to it. * * This function returns the length of QUIC packet, including header, @@ -102,39 +108,46 @@ ngtcp2_ssize ngtcp2_ppe_final(ngtcp2_ppe *ppe, const uint8_t **ppkt); * ngtcp2_ppe_left returns the number of bytes left to write * additional frames. This does not count AEAD overhead. */ -size_t ngtcp2_ppe_left(ngtcp2_ppe *ppe); +size_t ngtcp2_ppe_left(const ngtcp2_ppe *ppe); /* * ngtcp2_ppe_pktlen returns the provisional packet length. It * includes AEAD overhead. */ -size_t ngtcp2_ppe_pktlen(ngtcp2_ppe *ppe); +size_t ngtcp2_ppe_pktlen(const ngtcp2_ppe *ppe); -/** - * @function +/* + * ngtcp2_ppe_dgram_padding is equivalent to call + * ngtcp2_ppe_dgram_padding_size(ppe, NGTCP2_MAX_UDP_PAYLOAD_SIZE). + * This function should be called just before calling + * ngtcp2_ppe_final(). * - * `ngtcp2_ppe_padding` encodes PADDING frames to the end of the - * buffer. This function returns the number of bytes padded. + * This function returns the number of bytes padded. */ -size_t ngtcp2_ppe_padding(ngtcp2_ppe *ppe); +size_t ngtcp2_ppe_dgram_padding(ngtcp2_ppe *ppe); /* - * ngtcp2_ppe_padding_hp_sample adds PADDING frame if the current - * payload does not have enough space for header protection sample. - * This function should be called just before calling - * ngtcp2_ppe_final(). + * ngtcp2_ppe_dgram_padding_size adds PADDING frame so that the size + * of a UDP datagram payload is at least |n| bytes long. If it is + * unable to add PADDING in that way, this function still adds PADDING + * frame as much as possible. This function should be called just + * before calling ngtcp2_ppe_final(). * * This function returns the number of bytes added as padding. */ -size_t ngtcp2_ppe_padding_hp_sample(ngtcp2_ppe *ppe); +size_t ngtcp2_ppe_dgram_padding_size(ngtcp2_ppe *ppe, size_t n); /* * ngtcp2_ppe_padding_size adds PADDING frame so that the size of QUIC - * packet is at least |n| bytes long. If it is unable to add PADDING - * in that way, this function still adds PADDING frame as much as - * possible. This function should be called just before calling - * ngtcp2_ppe_final(). For Short packet, this function should be - * called instead of ngtcp2_ppe_padding_hp_sample. + * packet is at least |n| bytes long and the current payload has + * enough space for header protection sample. If it is unable to add + * PADDING at least |n| bytes, this function still adds PADDING frames + * as much as possible. This function also adds PADDING frames so + * that the minimum padding requirement of header protection is met. + * Those padding may be larger than |n| bytes. It is recommended to + * make sure that ngtcp2_ppe_ensure_hp_sample succeeds after writing + * QUIC packet header. This function should be called just before + * calling ngtcp2_ppe_final(). * * This function returns the number of bytes added as padding. */ @@ -147,4 +160,4 @@ size_t ngtcp2_ppe_padding_size(ngtcp2_ppe *ppe, size_t n); */ int ngtcp2_ppe_ensure_hp_sample(ngtcp2_ppe *ppe); -#endif /* NGTCP2_PPE_H */ +#endif /* !defined(NGTCP2_PPE_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pq.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pq.c index 5e1003d7942e53..19e3e3e36aa5e6 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pq.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pq.c @@ -29,17 +29,20 @@ #include "ngtcp2_macro.h" -void ngtcp2_pq_init(ngtcp2_pq *pq, ngtcp2_less less, const ngtcp2_mem *mem) { - pq->mem = mem; - pq->capacity = 0; +void ngtcp2_pq_init(ngtcp2_pq *pq, ngtcp2_pq_less less, const ngtcp2_mem *mem) { pq->q = NULL; + pq->mem = mem; pq->length = 0; + pq->capacity = 0; pq->less = less; } void ngtcp2_pq_free(ngtcp2_pq *pq) { + if (!pq) { + return; + } + ngtcp2_mem_free(pq->mem, pq->q); - pq->q = NULL; } static void swap(ngtcp2_pq *pq, size_t i, size_t j) { @@ -54,11 +57,13 @@ static void swap(ngtcp2_pq *pq, size_t i, size_t j) { static void bubble_up(ngtcp2_pq *pq, size_t index) { size_t parent; - while (index != 0) { + + while (index) { parent = (index - 1) / 2; if (!pq->less(pq->q[index], pq->q[parent])) { return; } + swap(pq, parent, index); index = parent; } @@ -69,56 +74,64 @@ int ngtcp2_pq_push(ngtcp2_pq *pq, ngtcp2_pq_entry *item) { void *nq; size_t ncapacity; - ncapacity = ngtcp2_max(4, (pq->capacity * 2)); + ncapacity = ngtcp2_max_size(4, pq->capacity * 2); - nq = ngtcp2_mem_realloc(pq->mem, pq->q, - ncapacity * sizeof(ngtcp2_pq_entry *)); + nq = + ngtcp2_mem_realloc(pq->mem, pq->q, ncapacity * sizeof(ngtcp2_pq_entry *)); if (nq == NULL) { return NGTCP2_ERR_NOMEM; } + pq->capacity = ncapacity; pq->q = nq; } + pq->q[pq->length] = item; item->index = pq->length; ++pq->length; - bubble_up(pq, pq->length - 1); + bubble_up(pq, item->index); + return 0; } -ngtcp2_pq_entry *ngtcp2_pq_top(ngtcp2_pq *pq) { +ngtcp2_pq_entry *ngtcp2_pq_top(const ngtcp2_pq *pq) { assert(pq->length); return pq->q[0]; } static void bubble_down(ngtcp2_pq *pq, size_t index) { size_t i, j, minindex; + for (;;) { j = index * 2 + 1; minindex = index; + for (i = 0; i < 2; ++i, ++j) { if (j >= pq->length) { break; } + if (pq->less(pq->q[j], pq->q[minindex])) { minindex = j; } } + if (minindex == index) { return; } + swap(pq, index, minindex); index = minindex; } } void ngtcp2_pq_pop(ngtcp2_pq *pq) { - if (pq->length > 0) { - pq->q[0] = pq->q[pq->length - 1]; - pq->q[0]->index = 0; - --pq->length; - bubble_down(pq, 0); - } + assert(pq->length); + + pq->q[0] = pq->q[pq->length - 1]; + pq->q[0]->index = 0; + --pq->length; + bubble_down(pq, 0); } void ngtcp2_pq_remove(ngtcp2_pq *pq, ngtcp2_pq_entry *item) { @@ -145,20 +158,22 @@ void ngtcp2_pq_remove(ngtcp2_pq *pq, ngtcp2_pq_entry *item) { } } -int ngtcp2_pq_empty(ngtcp2_pq *pq) { return pq->length == 0; } +int ngtcp2_pq_empty(const ngtcp2_pq *pq) { return pq->length == 0; } -size_t ngtcp2_pq_size(ngtcp2_pq *pq) { return pq->length; } +size_t ngtcp2_pq_size(const ngtcp2_pq *pq) { return pq->length; } -int ngtcp2_pq_each(ngtcp2_pq *pq, ngtcp2_pq_item_cb fun, void *arg) { +int ngtcp2_pq_each(const ngtcp2_pq *pq, ngtcp2_pq_item_cb fun, void *arg) { size_t i; if (pq->length == 0) { return 0; } + for (i = 0; i < pq->length; ++i) { if ((*fun)(pq->q[i], arg)) { return 1; } } + return 0; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pq.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pq.h index 484c8f21f75de2..84961c9143978c 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pq.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pq.h @@ -28,7 +28,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -45,37 +45,39 @@ typedef struct ngtcp2_pq_entry { size_t index; } ngtcp2_pq_entry; -/* "less" function, return nonzero if |lhs| is less than |rhs|. */ -typedef int (*ngtcp2_less)(const ngtcp2_pq_entry *lhs, - const ngtcp2_pq_entry *rhs); +/* ngtcp2_pq_less is a "less" function, that returns nonzero if |lhs| + is considered to be less than |rhs|. */ +typedef int (*ngtcp2_pq_less)(const ngtcp2_pq_entry *lhs, + const ngtcp2_pq_entry *rhs); typedef struct ngtcp2_pq { - /* The pointer to the pointer to the item stored */ + /* q is a pointer to an array that stores the items. */ ngtcp2_pq_entry **q; - /* Memory allocator */ + /* mem is a memory allocator. */ const ngtcp2_mem *mem; - /* The number of items stored */ + /* length is the number of items stored. */ size_t length; - /* The maximum number of items this pq can store. This is - automatically extended when length is reached to this value. */ + /* capacity is the maximum number of items this queue can store. + This is automatically extended when length is reached to this + limit. */ size_t capacity; - /* The less function between items */ - ngtcp2_less less; + /* less is the less function to compare items. */ + ngtcp2_pq_less less; } ngtcp2_pq; /* - * Initializes priority queue |pq| with compare function |cmp|. + * ngtcp2_pq_init initializes |pq| with compare function |cmp|. */ -void ngtcp2_pq_init(ngtcp2_pq *pq, ngtcp2_less less, const ngtcp2_mem *mem); +void ngtcp2_pq_init(ngtcp2_pq *pq, ngtcp2_pq_less less, const ngtcp2_mem *mem); /* - * Deallocates any resources allocated for |pq|. The stored items are - * not freed by this function. + * ngtcp2_pq_free deallocates any resources allocated for |pq|. The + * stored items are not freed by this function. */ void ngtcp2_pq_free(ngtcp2_pq *pq); /* - * Adds |item| to the priority queue |pq|. + * ngtcp2_pq_push adds |item| to |pq|. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -86,41 +88,42 @@ void ngtcp2_pq_free(ngtcp2_pq *pq); int ngtcp2_pq_push(ngtcp2_pq *pq, ngtcp2_pq_entry *item); /* - * Returns item at the top of the queue |pq|. It is undefined if the - * queue is empty. + * ngtcp2_pq_top returns item at the top of |pq|. It is undefined if + * |pq| is empty. */ -ngtcp2_pq_entry *ngtcp2_pq_top(ngtcp2_pq *pq); +ngtcp2_pq_entry *ngtcp2_pq_top(const ngtcp2_pq *pq); /* - * Pops item at the top of the queue |pq|. The popped item is not - * freed by this function. + * ngtcp2_pq_pop pops item at the top of |pq|. The popped item is not + * freed by this function. It is undefined if |pq| is empty. */ void ngtcp2_pq_pop(ngtcp2_pq *pq); /* - * Returns nonzero if the queue |pq| is empty. + * ngtcp2_pq_empty returns nonzero if |pq| is empty. */ -int ngtcp2_pq_empty(ngtcp2_pq *pq); +int ngtcp2_pq_empty(const ngtcp2_pq *pq); /* - * Returns the number of items in the queue |pq|. + * ngtcp2_pq_size returns the number of items |pq| contains. */ -size_t ngtcp2_pq_size(ngtcp2_pq *pq); +size_t ngtcp2_pq_size(const ngtcp2_pq *pq); typedef int (*ngtcp2_pq_item_cb)(ngtcp2_pq_entry *item, void *arg); /* - * Applies |fun| to each item in |pq|. The |arg| is passed as arg - * parameter to callback function. This function must not change the - * ordering key. If the return value from callback is nonzero, this - * function returns 1 immediately without iterating remaining items. - * Otherwise this function returns 0. + * ngtcp2_pq_each applies |fun| to each item in |pq|. The |arg| is + * passed as arg parameter to callback function. This function must + * not change the ordering key. If the return value from callback is + * nonzero, this function returns 1 immediately without iterating + * remaining items. Otherwise this function returns 0. */ -int ngtcp2_pq_each(ngtcp2_pq *pq, ngtcp2_pq_item_cb fun, void *arg); +int ngtcp2_pq_each(const ngtcp2_pq *pq, ngtcp2_pq_item_cb fun, void *arg); /* - * Removes |item| from priority queue. + * ngtcp2_pq_remove removes |item| from |pq|. |pq| must contain + * |item| otherwise the behavior is undefined. */ void ngtcp2_pq_remove(ngtcp2_pq *pq, ngtcp2_pq_entry *item); -#endif /* NGTCP2_PQ_H */ +#endif /* !defined(NGTCP2_PQ_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pv.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pv.c index 314e005293c279..e4fee94eb558d3 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pv.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pv.c @@ -143,7 +143,7 @@ int ngtcp2_pv_validation_timed_out(ngtcp2_pv *pv, ngtcp2_tstamp ts) { ent = ngtcp2_ringbuf_get(&pv->ents.rb, ngtcp2_ringbuf_len(&pv->ents.rb) - 1); t = pv->started_ts + pv->timeout; - t = ngtcp2_max(t, ent->expiry); + t = ngtcp2_max_uint64(t, ent->expiry); return t <= ts; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pv.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pv.h index c9da15248a3e2b..e9573da497bee4 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_pv.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_pv.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -83,7 +83,7 @@ void ngtcp2_pv_entry_init(ngtcp2_pv_entry *pvent, const uint8_t *data, typedef struct ngtcp2_pv ngtcp2_pv; ngtcp2_static_ringbuf_def(pv_ents, NGTCP2_PV_MAX_ENTRIES, - sizeof(ngtcp2_pv_entry)); + sizeof(ngtcp2_pv_entry)) /* * ngtcp2_pv is the context of a single path validation. */ @@ -191,4 +191,4 @@ ngtcp2_tstamp ngtcp2_pv_next_expiry(ngtcp2_pv *pv); */ void ngtcp2_pv_cancel_expired_timer(ngtcp2_pv *pv, ngtcp2_tstamp ts); -#endif /* NGTCP2_PV_H */ +#endif /* !defined(NGTCP2_PV_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_qlog.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_qlog.c index 27675347794b2a..c0f920746a4dff 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_qlog.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_qlog.c @@ -184,13 +184,12 @@ static uint8_t *write_pair_cid_impl(uint8_t *p, const uint8_t *name, write_pair_cid_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \ (VALUE)) -#define ngtcp2_make_vec_lit(S) \ - { (uint8_t *)(S), sizeof((S)) - 1 } +#define ngtcp2_make_vec_lit(S) {(uint8_t *)(S), sizeof((S)) - 1} static uint8_t *write_common_fields(uint8_t *p, const ngtcp2_cid *odcid) { p = write_verbatim( - p, "\"common_fields\":{\"protocol_type\":[\"QUIC\"],\"time_format\":" - "\"relative\",\"reference_time\":0,\"group_id\":"); + p, "\"common_fields\":{\"protocol_type\":[\"QUIC\"],\"time_format\":" + "\"relative\",\"reference_time\":0,\"group_id\":"); p = write_cid(p, odcid); *p++ = '}'; return p; @@ -198,7 +197,7 @@ static uint8_t *write_common_fields(uint8_t *p, const ngtcp2_cid *odcid) { static uint8_t *write_trace(uint8_t *p, int server, const ngtcp2_cid *odcid) { p = write_verbatim( - p, "\"trace\":{\"vantage_point\":{\"name\":\"ngtcp2\",\"type\":"); + p, "\"trace\":{\"vantage_point\":{\"name\":\"ngtcp2\",\"type\":"); if (server) { p = write_string(p, "server"); } else { @@ -219,7 +218,7 @@ void ngtcp2_qlog_start(ngtcp2_qlog *qlog, const ngtcp2_cid *odcid, int server) { } p = write_verbatim( - p, "\x1e{\"qlog_format\":\"JSON-SEQ\",\"qlog_version\":\"0.3\","); + p, "\x1e{\"qlog_format\":\"JSON-SEQ\",\"qlog_version\":\"0.3\","); p = write_trace(p, server, odcid); p = write_verbatim(p, "}\n"); @@ -243,9 +242,9 @@ static ngtcp2_vec vec_pkt_type_0rtt = ngtcp2_make_vec_lit("0RTT"); static ngtcp2_vec vec_pkt_type_1rtt = ngtcp2_make_vec_lit("1RTT"); static ngtcp2_vec vec_pkt_type_retry = ngtcp2_make_vec_lit("retry"); static ngtcp2_vec vec_pkt_type_version_negotiation = - ngtcp2_make_vec_lit("version_negotiation"); + ngtcp2_make_vec_lit("version_negotiation"); static ngtcp2_vec vec_pkt_type_stateless_reset = - ngtcp2_make_vec_lit("stateless_reset"); + ngtcp2_make_vec_lit("stateless_reset"); static ngtcp2_vec vec_pkt_type_unknown = ngtcp2_make_vec_lit("unknown"); static const ngtcp2_vec *qlog_pkt_type(const ngtcp2_pkt_hd *hd) { @@ -638,8 +637,8 @@ write_connection_close_frame(uint8_t *p, const ngtcp2_connection_close *fr) { */ #define NGTCP2_QLOG_CONNECTION_CLOSE_FRAME_OVERHEAD 131 - p = write_verbatim(p, - "{\"frame_type\":\"connection_close\",\"error_space\":"); + p = + write_verbatim(p, "{\"frame_type\":\"connection_close\",\"error_space\":"); if (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE) { p = write_string(p, "transport"); } else { @@ -772,10 +771,10 @@ void ngtcp2_qlog_write_frame(ngtcp2_qlog *qlog, const ngtcp2_frame *fr) { case NGTCP2_FRAME_ACK_ECN: if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_ACK_FRAME_BASE_OVERHEAD + - (size_t)(fr->type == NGTCP2_FRAME_ACK_ECN - ? NGTCP2_QLOG_ACK_FRAME_ECN_OVERHEAD - : 0) + - NGTCP2_QLOG_ACK_FRAME_RANGE_OVERHEAD * (1 + fr->ack.rangecnt) + 1) { + (size_t)(fr->type == NGTCP2_FRAME_ACK_ECN + ? NGTCP2_QLOG_ACK_FRAME_ECN_OVERHEAD + : 0) + + NGTCP2_QLOG_ACK_FRAME_RANGE_OVERHEAD * (1 + fr->ack.rangecnt) + 1) { return; } p = write_ack_frame(p, &fr->ack); @@ -934,8 +933,8 @@ void ngtcp2_qlog_pkt_sent_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, } void ngtcp2_qlog_parameters_set_transport_params( - ngtcp2_qlog *qlog, const ngtcp2_transport_params *params, int server, - ngtcp2_qlog_side side) { + ngtcp2_qlog *qlog, const ngtcp2_transport_params *params, int server, + ngtcp2_qlog_side side) { uint8_t buf[1024]; uint8_t *p = buf; const ngtcp2_preferred_addr *paddr; @@ -950,7 +949,7 @@ void ngtcp2_qlog_parameters_set_transport_params( *p++ = '{'; p = qlog_write_time(qlog, p); p = write_verbatim( - p, ",\"name\":\"transport:parameters_set\",\"data\":{\"owner\":"); + p, ",\"name\":\"transport:parameters_set\",\"data\":{\"owner\":"); if (side == NGTCP2_QLOG_SIDE_LOCAL) { p = write_string(p, "local"); @@ -982,8 +981,8 @@ void ngtcp2_qlog_parameters_set_transport_params( *p++ = ','; p = write_pair_duration(p, "max_idle_timeout", params->max_idle_timeout); *p++ = ','; - p = write_pair_number(p, "max_udp_payload_size", - params->max_udp_payload_size); + p = + write_pair_number(p, "max_udp_payload_size", params->max_udp_payload_size); *p++ = ','; p = write_pair_number(p, "ack_delay_exponent", params->ack_delay_exponent); *p++ = ','; @@ -1106,7 +1105,7 @@ void ngtcp2_qlog_pkt_lost(ngtcp2_qlog *qlog, ngtcp2_rtb_entry *ent) { *p++ = '{'; p = qlog_write_time(qlog, p); p = write_verbatim( - p, ",\"name\":\"recovery:packet_lost\",\"data\":{\"header\":"); + p, ",\"name\":\"recovery:packet_lost\",\"data\":{\"header\":"); hd.type = ent->hd.type; hd.flags = ent->hd.flags; @@ -1134,13 +1133,11 @@ void ngtcp2_qlog_retry_pkt_received(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, *buf.last++ = '{'; buf.last = qlog_write_time(qlog, buf.last); buf.last = write_verbatim( - buf.last, - ",\"name\":\"transport:packet_received\",\"data\":{\"header\":"); + buf.last, ",\"name\":\"transport:packet_received\",\"data\":{\"header\":"); - if (ngtcp2_buf_left(&buf) < - NGTCP2_QLOG_PKT_HD_OVERHEAD + hd->tokenlen * 2 + - sizeof(",\"retry_token\":{\"data\":\"\"}}}\n") - 1 + - retry->tokenlen * 2) { + if (ngtcp2_buf_left(&buf) < NGTCP2_QLOG_PKT_HD_OVERHEAD + hd->tokenlen * 2 + + sizeof(",\"retry_token\":{\"data\":\"\"}}}\n") - + 1 + retry->tokenlen * 2) { return; } @@ -1154,7 +1151,7 @@ void ngtcp2_qlog_retry_pkt_received(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, } void ngtcp2_qlog_stateless_reset_pkt_received( - ngtcp2_qlog *qlog, const ngtcp2_pkt_stateless_reset *sr) { + ngtcp2_qlog *qlog, const ngtcp2_pkt_stateless_reset *sr) { uint8_t buf[256]; uint8_t *p = buf; ngtcp2_pkt_hd hd = {0}; @@ -1169,7 +1166,7 @@ void ngtcp2_qlog_stateless_reset_pkt_received( *p++ = '{'; p = qlog_write_time(qlog, p); p = write_verbatim( - p, ",\"name\":\"transport:packet_received\",\"data\":{\"header\":"); + p, ",\"name\":\"transport:packet_received\",\"data\":{\"header\":"); p = write_pkt_hd(p, &hd); *p++ = ','; p = write_pair_hex(p, "stateless_reset_token", sr->stateless_reset_token, @@ -1199,8 +1196,7 @@ void ngtcp2_qlog_version_negotiation_pkt_received(ngtcp2_qlog *qlog, *buf.last++ = '{'; buf.last = qlog_write_time(qlog, buf.last); buf.last = write_verbatim( - buf.last, - ",\"name\":\"transport:packet_received\",\"data\":{\"header\":"); + buf.last, ",\"name\":\"transport:packet_received\",\"data\":{\"header\":"); buf.last = write_pkt_hd(buf.last, hd); buf.last = write_verbatim(buf.last, ",\"supported_versions\":["); diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_qlog.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_qlog.h index b9107c0e5c031a..d2a5f1038c0f42 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_qlog.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_qlog.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -120,8 +120,8 @@ void ngtcp2_qlog_pkt_sent_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, * "local", otherwise "remote". */ void ngtcp2_qlog_parameters_set_transport_params( - ngtcp2_qlog *qlog, const ngtcp2_transport_params *params, int server, - ngtcp2_qlog_side side); + ngtcp2_qlog *qlog, const ngtcp2_transport_params *params, int server, + ngtcp2_qlog_side side); /* * ngtcp2_qlog_metrics_updated writes metrics_updated event of @@ -147,7 +147,7 @@ void ngtcp2_qlog_retry_pkt_received(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, * event for a received Stateless Reset packet. */ void ngtcp2_qlog_stateless_reset_pkt_received( - ngtcp2_qlog *qlog, const ngtcp2_pkt_stateless_reset *sr); + ngtcp2_qlog *qlog, const ngtcp2_pkt_stateless_reset *sr); /* * ngtcp2_qlog_version_negotiation_pkt_received writes packet_received @@ -158,4 +158,4 @@ void ngtcp2_qlog_version_negotiation_pkt_received(ngtcp2_qlog *qlog, const uint32_t *sv, size_t nsv); -#endif /* NGTCP2_QLOG_H */ +#endif /* !defined(NGTCP2_QLOG_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_range.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_range.c index 9379496b7d4b53..7bbefc0175c595 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_range.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_range.c @@ -33,11 +33,13 @@ void ngtcp2_range_init(ngtcp2_range *r, uint64_t begin, uint64_t end) { ngtcp2_range ngtcp2_range_intersect(const ngtcp2_range *a, const ngtcp2_range *b) { ngtcp2_range r = {0, 0}; - uint64_t begin = ngtcp2_max(a->begin, b->begin); - uint64_t end = ngtcp2_min(a->end, b->end); + uint64_t begin = ngtcp2_max_uint64(a->begin, b->begin); + uint64_t end = ngtcp2_min_uint64(a->end, b->end); + if (begin < end) { ngtcp2_range_init(&r, begin, end); } + return r; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_range.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_range.h index a776c4ec4768ce..22cd2951859ef1 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_range.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_range.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -58,7 +58,7 @@ uint64_t ngtcp2_range_len(const ngtcp2_range *r); /* * ngtcp2_range_eq returns nonzero if |a| equals |b|, such that - * a->begin == b->begin, and a->end == b->end hold. + * a->begin == b->begin and a->end == b->end hold. */ int ngtcp2_range_eq(const ngtcp2_range *a, const ngtcp2_range *b); @@ -77,4 +77,4 @@ void ngtcp2_range_cut(ngtcp2_range *left, ngtcp2_range *right, */ int ngtcp2_range_not_after(const ngtcp2_range *a, const ngtcp2_range *b); -#endif /* NGTCP2_RANGE_H */ +#endif /* !defined(NGTCP2_RANGE_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rcvry.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rcvry.h index 4cb40882192a77..e6321061b59cd1 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rcvry.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rcvry.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -37,4 +37,4 @@ /* NGTCP2_GRANULARITY is kGranularity described in RFC 9002. */ #define NGTCP2_GRANULARITY NGTCP2_MILLISECONDS -#endif /* NGTCP2_RCVRY_H */ +#endif /* !defined(NGTCP2_RCVRY_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c index ecfdeb63b3485b..41446739bf699d 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c @@ -27,24 +27,29 @@ #include #ifdef WIN32 # include -#endif +#endif /* defined(WIN32) */ #include "ngtcp2_macro.h" -#if defined(_MSC_VER) && _MSC_VER < 1941 && !defined(__clang__) && \ - (defined(_M_ARM) || defined(_M_ARM64)) -static unsigned int __popcnt(unsigned int x) { - unsigned int c = 0; - for (; x; ++c) { - x &= x - 1; - } - return c; +#ifndef NDEBUG +static int ispow2(size_t n) { +# if defined(_MSC_VER) && !defined(__clang__) && \ + (defined(_M_ARM) || (defined(_M_ARM64) && _MSC_VER < 1941)) + return n && !(n & (n - 1)); +# elif defined(WIN32) + return 1 == __popcnt((unsigned int)n); +# else /* !((defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || \ + (defined(_M_ARM64) && _MSC_VER < 1941))) || defined(WIN32)) */ + return 1 == __builtin_popcount((unsigned int)n); +# endif /* !((defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || \ + (defined(_M_ARM64) && _MSC_VER < 1941))) || defined(WIN32)) */ } -#endif +#endif /* !defined(NDEBUG) */ int ngtcp2_ringbuf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size, const ngtcp2_mem *mem) { uint8_t *buf = ngtcp2_mem_malloc(mem, nmemb * size); + if (buf == NULL) { return NGTCP2_ERR_NOMEM; } @@ -56,11 +61,7 @@ int ngtcp2_ringbuf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size, void ngtcp2_ringbuf_buf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size, uint8_t *buf, const ngtcp2_mem *mem) { -#ifdef WIN32 - assert(1 == __popcnt((unsigned int)nmemb)); -#else - assert(1 == __builtin_popcount((unsigned int)nmemb)); -#endif + assert(ispow2(nmemb)); rb->buf = buf; rb->mem = mem; @@ -114,9 +115,10 @@ void ngtcp2_ringbuf_resize(ngtcp2_ringbuf *rb, size_t len) { rb->len = len; } -void *ngtcp2_ringbuf_get(ngtcp2_ringbuf *rb, size_t offset) { +void *ngtcp2_ringbuf_get(const ngtcp2_ringbuf *rb, size_t offset) { assert(offset < rb->len); offset = (rb->first + offset) & rb->mask; + return &rb->buf[offset * rb->size]; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h index b28a882c4bae84..6953ea6278f88d 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -51,7 +51,7 @@ typedef struct ngtcp2_ringbuf { /* * ngtcp2_ringbuf_init initializes |rb|. |nmemb| is the number of * elements that can be stored in this buffer. |size| is the size of - * each element. |size| must be power of 2. + * each element. |nmemb| must be power of 2. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -64,7 +64,7 @@ int ngtcp2_ringbuf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size, /* * ngtcp2_ringbuf_buf_init initializes |rb| with given buffer and - * size. + * size. Same restrictions are applied as ngtcp2_ringbuf_init. */ void ngtcp2_ringbuf_buf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size, uint8_t *buf, const ngtcp2_mem *mem); @@ -79,15 +79,16 @@ void ngtcp2_ringbuf_free(ngtcp2_ringbuf *rb); the buffer backward, and returns the pointer to the element. Caller can store data to the buffer pointed by the returned pointer. If this action exceeds the capacity of the ring buffer, - the last element is silently overwritten, and rb->len remains - unchanged. */ + this function returns the pointer to the last element, and rb->len + remains unchanged. */ void *ngtcp2_ringbuf_push_front(ngtcp2_ringbuf *rb); /* ngtcp2_ringbuf_push_back moves the offset to the last element in the buffer forward, and returns the pointer to the element. Caller can store data to the buffer pointed by the returned pointer. If - this action exceeds the capacity of the ring buffer, the first - element is silently overwritten, and rb->len remains unchanged. */ + this action exceeds the capacity of the ring buffer, this function + returns the pointer to the first element, and rb->len remains + unchanged. */ void *ngtcp2_ringbuf_push_back(ngtcp2_ringbuf *rb); /* @@ -106,7 +107,7 @@ void ngtcp2_ringbuf_resize(ngtcp2_ringbuf *rb, size_t len); /* ngtcp2_ringbuf_get returns the pointer to the element at |offset|. */ -void *ngtcp2_ringbuf_get(ngtcp2_ringbuf *rb, size_t offset); +void *ngtcp2_ringbuf_get(const ngtcp2_ringbuf *rb, size_t offset); /* ngtcp2_ringbuf_len returns the number of elements stored. */ #define ngtcp2_ringbuf_len(RB) ((RB)->len) @@ -115,9 +116,8 @@ void *ngtcp2_ringbuf_get(ngtcp2_ringbuf *rb, size_t offset); int ngtcp2_ringbuf_full(ngtcp2_ringbuf *rb); /* ngtcp2_static_ringbuf_def defines ngtcp2_ringbuf struct wrapper - which uses a statically allocated buffer that is suitable for a - usage that does not change buffer size with ngtcp2_ringbuf_resize. - ngtcp2_ringbuf_free should never be called for rb field. */ + which uses a statically allocated buffer. ngtcp2_ringbuf_free + should never be called for rb field. */ #define ngtcp2_static_ringbuf_def(NAME, NMEMB, SIZE) \ typedef struct ngtcp2_static_ringbuf_##NAME { \ ngtcp2_ringbuf rb; \ @@ -125,8 +125,8 @@ int ngtcp2_ringbuf_full(ngtcp2_ringbuf *rb); } ngtcp2_static_ringbuf_##NAME; \ \ static inline void ngtcp2_static_ringbuf_##NAME##_init( \ - ngtcp2_static_ringbuf_##NAME *srb) { \ + ngtcp2_static_ringbuf_##NAME *srb) { \ ngtcp2_ringbuf_buf_init(&srb->rb, (NMEMB), (SIZE), srb->buf, NULL); \ } -#endif /* NGTCP2_RINGBUF_H */ +#endif /* !defined(NGTCP2_RINGBUF_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rob.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rob.c index 5cac383f7bb166..ce6c2113ddf1ce 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rob.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rob.c @@ -56,7 +56,6 @@ int ngtcp2_rob_data_new(ngtcp2_rob_data **pd, uint64_t offset, size_t chunk, (*pd)->range.begin = offset; (*pd)->range.end = offset + chunk; (*pd)->begin = (uint8_t *)(*pd) + sizeof(ngtcp2_rob_data); - (*pd)->end = (*pd)->begin + chunk; return 0; } @@ -69,8 +68,8 @@ int ngtcp2_rob_init(ngtcp2_rob *rob, size_t chunk, const ngtcp2_mem *mem) { int rv; ngtcp2_rob_gap *g; - ngtcp2_ksl_init(&rob->gapksl, ngtcp2_ksl_range_compar, sizeof(ngtcp2_range), - mem); + ngtcp2_ksl_init(&rob->gapksl, ngtcp2_ksl_range_compar, + ngtcp2_ksl_range_search, sizeof(ngtcp2_range), mem); rv = ngtcp2_rob_gap_new(&g, 0, UINT64_MAX, mem); if (rv != 0) { @@ -82,8 +81,8 @@ int ngtcp2_rob_init(ngtcp2_rob *rob, size_t chunk, const ngtcp2_mem *mem) { goto fail_gapksl_ksl_insert; } - ngtcp2_ksl_init(&rob->dataksl, ngtcp2_ksl_range_compar, sizeof(ngtcp2_range), - mem); + ngtcp2_ksl_init(&rob->dataksl, ngtcp2_ksl_range_compar, + ngtcp2_ksl_range_search, sizeof(ngtcp2_range), mem); rob->chunk = chunk; rob->mem = mem; @@ -126,8 +125,8 @@ static int rob_write_data(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, ngtcp2_range range = {offset, offset + len}; ngtcp2_ksl_it it; - for (it = ngtcp2_ksl_lower_bound_compar(&rob->dataksl, &range, - ngtcp2_ksl_range_exclusive_compar); + for (it = ngtcp2_ksl_lower_bound_search(&rob->dataksl, &range, + ngtcp2_ksl_range_exclusive_search); len; ngtcp2_ksl_it_next(&it)) { if (ngtcp2_ksl_it_end(&it)) { d = NULL; @@ -149,7 +148,8 @@ static int rob_write_data(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, } } - n = (size_t)ngtcp2_min((uint64_t)len, d->range.begin + rob->chunk - offset); + n = (size_t)ngtcp2_min_uint64((uint64_t)len, + d->range.begin + rob->chunk - offset); memcpy(d->begin + (offset - d->range.begin), data, n); offset += n; data += n; @@ -166,8 +166,8 @@ int ngtcp2_rob_push(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, ngtcp2_range m, l, r, q = {offset, offset + datalen}; ngtcp2_ksl_it it; - it = ngtcp2_ksl_lower_bound_compar(&rob->gapksl, &q, - ngtcp2_ksl_range_exclusive_compar); + it = ngtcp2_ksl_lower_bound_search(&rob->gapksl, &q, + ngtcp2_ksl_range_exclusive_search); for (; !ngtcp2_ksl_it_end(&it);) { g = ngtcp2_ksl_it_get(&it); @@ -176,9 +176,11 @@ int ngtcp2_rob_push(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, if (!ngtcp2_range_len(&m)) { break; } + if (ngtcp2_range_eq(&g->range, &m)) { ngtcp2_ksl_remove_hint(&rob->gapksl, &it, &it, &g->range); ngtcp2_rob_gap_del(g, rob->mem); + rv = rob_write_data(rob, m.begin, data + (m.begin - offset), (size_t)ngtcp2_range_len(&m)); if (rv != 0) { @@ -187,17 +189,21 @@ int ngtcp2_rob_push(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, continue; } + ngtcp2_range_cut(&l, &r, &g->range, &m); + if (ngtcp2_range_len(&l)) { ngtcp2_ksl_update_key(&rob->gapksl, &g->range, &l); g->range = l; if (ngtcp2_range_len(&r)) { ngtcp2_rob_gap *ng; + rv = ngtcp2_rob_gap_new(&ng, r.begin, r.end, rob->mem); if (rv != 0) { return rv; } + rv = ngtcp2_ksl_insert(&rob->gapksl, &it, &ng->range, ng); if (rv != 0) { ngtcp2_rob_gap_del(ng, rob->mem); @@ -208,13 +214,16 @@ int ngtcp2_rob_push(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, ngtcp2_ksl_update_key(&rob->gapksl, &g->range, &r); g->range = r; } + rv = rob_write_data(rob, m.begin, data + (m.begin - offset), (size_t)ngtcp2_range_len(&m)); if (rv != 0) { return rv; } + ngtcp2_ksl_it_next(&it); } + return 0; } @@ -230,12 +239,16 @@ void ngtcp2_rob_remove_prefix(ngtcp2_rob *rob, uint64_t offset) { if (offset <= g->range.begin) { break; } + if (offset < g->range.end) { ngtcp2_range r = {offset, g->range.end}; + ngtcp2_ksl_update_key(&rob->gapksl, &g->range, &r); g->range.begin = offset; + break; } + ngtcp2_ksl_remove_hint(&rob->gapksl, &it, &it, &g->range); ngtcp2_rob_gap_del(g, rob->mem); } @@ -247,12 +260,13 @@ void ngtcp2_rob_remove_prefix(ngtcp2_rob *rob, uint64_t offset) { if (offset < d->range.begin + rob->chunk) { return; } + ngtcp2_ksl_remove_hint(&rob->dataksl, &it, &it, &d->range); ngtcp2_rob_data_del(d, rob->mem); } } -size_t ngtcp2_rob_data_at(ngtcp2_rob *rob, const uint8_t **pdest, +size_t ngtcp2_rob_data_at(const ngtcp2_rob *rob, const uint8_t **pdest, uint64_t offset) { ngtcp2_rob_gap *g; ngtcp2_rob_data *d; @@ -278,8 +292,9 @@ size_t ngtcp2_rob_data_at(ngtcp2_rob *rob, const uint8_t **pdest, *pdest = d->begin + (offset - d->range.begin); - return (size_t)(ngtcp2_min(g->range.begin, d->range.begin + rob->chunk) - - offset); + return ( + size_t)(ngtcp2_min_uint64(g->range.begin, d->range.begin + rob->chunk) - + offset); } void ngtcp2_rob_pop(ngtcp2_rob *rob, uint64_t offset, size_t len) { @@ -299,7 +314,7 @@ void ngtcp2_rob_pop(ngtcp2_rob *rob, uint64_t offset, size_t len) { ngtcp2_rob_data_del(d, rob->mem); } -uint64_t ngtcp2_rob_first_gap_offset(ngtcp2_rob *rob) { +uint64_t ngtcp2_rob_first_gap_offset(const ngtcp2_rob *rob) { ngtcp2_ksl_it it = ngtcp2_ksl_begin(&rob->gapksl); ngtcp2_rob_gap *g; @@ -312,6 +327,6 @@ uint64_t ngtcp2_rob_first_gap_offset(ngtcp2_rob *rob) { return g->range.begin; } -int ngtcp2_rob_data_buffered(ngtcp2_rob *rob) { +int ngtcp2_rob_data_buffered(const ngtcp2_rob *rob) { return ngtcp2_ksl_len(&rob->dataksl) != 0; } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rob.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rob.h index 6518d56c539185..d53b5160b10230 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rob.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rob.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -70,12 +70,10 @@ void ngtcp2_rob_gap_del(ngtcp2_rob_gap *g, const ngtcp2_mem *mem); * ngtcp2_rob_data holds the buffered stream data. */ typedef struct ngtcp2_rob_data { - /* range is the range of this gap. */ + /* range is the range of this data. */ ngtcp2_range range; /* begin points to the buffer. */ uint8_t *begin; - /* end points to the one beyond of the last byte of the buffer */ - uint8_t *end; } ngtcp2_rob_data; /* @@ -110,8 +108,8 @@ typedef struct ngtcp2_rob { /* gapksl maintains the range of offset which is not received yet. Initially, its range is [0, UINT64_MAX). */ ngtcp2_ksl gapksl; - /* dataksl maintains the list of buffers which store received data - ordered by stream offset. */ + /* dataksl maintains the buffers which store received out-of-order + data ordered by stream offset. */ ngtcp2_ksl dataksl; /* mem is custom memory allocator */ const ngtcp2_mem *mem; @@ -137,8 +135,8 @@ int ngtcp2_rob_init(ngtcp2_rob *rob, size_t chunk, const ngtcp2_mem *mem); void ngtcp2_rob_free(ngtcp2_rob *rob); /* - * ngtcp2_rob_push adds new data of length |datalen| at the stream - * offset |offset|. + * ngtcp2_rob_push adds new data pointed by |data| of length |datalen| + * at the stream offset |offset|. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -151,7 +149,8 @@ int ngtcp2_rob_push(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, /* * ngtcp2_rob_remove_prefix removes gap up to |offset|, exclusive. It - * also removes data buffer if it is completely included in |offset|. + * also removes buffered data if it is completely included in + * |offset|. */ void ngtcp2_rob_remove_prefix(ngtcp2_rob *rob, uint64_t offset); @@ -159,9 +158,10 @@ void ngtcp2_rob_remove_prefix(ngtcp2_rob *rob, uint64_t offset); * ngtcp2_rob_data_at stores the pointer to the buffer of stream * offset |offset| to |*pdest| if it is available, and returns the * valid length of available data. If no data is available, it - * returns 0. + * returns 0. This function only returns the data before the first + * gap. It returns 0 even if data is available after the first gap. */ -size_t ngtcp2_rob_data_at(ngtcp2_rob *rob, const uint8_t **pdest, +size_t ngtcp2_rob_data_at(const ngtcp2_rob *rob, const uint8_t **pdest, uint64_t offset); /* @@ -181,11 +181,11 @@ void ngtcp2_rob_pop(ngtcp2_rob *rob, uint64_t offset, size_t len); * ngtcp2_rob_first_gap_offset returns the offset to the first gap. * If there is no gap, it returns UINT64_MAX. */ -uint64_t ngtcp2_rob_first_gap_offset(ngtcp2_rob *rob); +uint64_t ngtcp2_rob_first_gap_offset(const ngtcp2_rob *rob); /* * ngtcp2_rob_data_buffered returns nonzero if any data is buffered. */ -int ngtcp2_rob_data_buffered(ngtcp2_rob *rob); +int ngtcp2_rob_data_buffered(const ngtcp2_rob *rob); -#endif /* NGTCP2_ROB_H */ +#endif /* !defined(NGTCP2_ROB_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rst.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rst.c index b8587e3e9dbac8..89c89acdc265a2 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rst.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rst.c @@ -35,26 +35,25 @@ void ngtcp2_rs_init(ngtcp2_rs *rs) { rs->interval = UINT64_MAX; rs->delivered = 0; rs->prior_delivered = 0; - rs->prior_ts = 0; + rs->prior_ts = UINT64_MAX; rs->tx_in_flight = 0; rs->lost = 0; rs->prior_lost = 0; rs->send_elapsed = 0; rs->ack_elapsed = 0; + rs->last_end_seq = -1; rs->is_app_limited = 0; } void ngtcp2_rst_init(ngtcp2_rst *rst) { ngtcp2_rs_init(&rst->rs); - ngtcp2_window_filter_init(&rst->wf, 12); rst->delivered = 0; rst->delivered_ts = 0; rst->first_sent_ts = 0; rst->app_limited = 0; - rst->next_round_delivered = 0; - rst->round_count = 0; rst->is_cwnd_limited = 0; rst->lost = 0; + rst->last_seq = -1; } void ngtcp2_rst_on_pkt_sent(ngtcp2_rst *rst, ngtcp2_rtb_entry *ent, @@ -68,27 +67,21 @@ void ngtcp2_rst_on_pkt_sent(ngtcp2_rst *rst, ngtcp2_rtb_entry *ent, ent->rst.is_app_limited = rst->app_limited != 0; ent->rst.tx_in_flight = cstat->bytes_in_flight + ent->pktlen; ent->rst.lost = rst->lost; + ent->rst.end_seq = ++rst->last_seq; } -void ngtcp2_rst_on_ack_recv(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat, - uint64_t pkt_delivered) { +void ngtcp2_rst_on_ack_recv(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat) { ngtcp2_rs *rs = &rst->rs; - uint64_t rate; if (rst->app_limited && rst->delivered > rst->app_limited) { rst->app_limited = 0; } - if (pkt_delivered >= rst->next_round_delivered) { - rst->next_round_delivered = pkt_delivered; - ++rst->round_count; - } - - if (rs->prior_ts == 0) { + if (rs->prior_ts == UINT64_MAX) { return; } - rs->interval = ngtcp2_max(rs->send_elapsed, rs->ack_elapsed); + rs->interval = ngtcp2_max_uint64(rs->send_elapsed, rs->ack_elapsed); rs->delivered = rst->delivered - rs->prior_delivered; rs->lost = rst->lost - rs->prior_lost; @@ -102,12 +95,13 @@ void ngtcp2_rst_on_ack_recv(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat, return; } - rate = rs->delivered * NGTCP2_SECONDS / rs->interval; + cstat->delivery_rate_sec = rs->delivered * NGTCP2_SECONDS / rs->interval; +} - if (rate > ngtcp2_window_filter_get_best(&rst->wf) || !rst->app_limited) { - ngtcp2_window_filter_update(&rst->wf, rate, rst->round_count); - cstat->delivery_rate_sec = ngtcp2_window_filter_get_best(&rst->wf); - } +static int rst_is_newest_pkt(const ngtcp2_rst *rst, const ngtcp2_rtb_entry *ent, + const ngtcp2_rs *rs) { + return ent->ts > rst->first_sent_ts || + (ent->ts == rst->first_sent_ts && ent->rst.end_seq > rs->last_end_seq); } void ngtcp2_rst_update_rate_sample(ngtcp2_rst *rst, const ngtcp2_rtb_entry *ent, @@ -117,7 +111,7 @@ void ngtcp2_rst_update_rate_sample(ngtcp2_rst *rst, const ngtcp2_rtb_entry *ent, rst->delivered += ent->pktlen; rst->delivered_ts = ts; - if (ent->rst.delivered > rs->prior_delivered) { + if (rs->prior_ts == UINT64_MAX || rst_is_newest_pkt(rst, ent, rs)) { rs->prior_delivered = ent->rst.delivered; rs->prior_ts = ent->rst.delivered_ts; rs->is_app_limited = ent->rst.is_app_limited; @@ -125,6 +119,7 @@ void ngtcp2_rst_update_rate_sample(ngtcp2_rst *rst, const ngtcp2_rtb_entry *ent, rs->ack_elapsed = rst->delivered_ts - ent->rst.delivered_ts; rs->tx_in_flight = ent->rst.tx_in_flight; rs->prior_lost = ent->rst.lost; + rs->last_end_seq = ent->rst.end_seq; rst->first_sent_ts = ent->ts; } } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rst.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rst.h index c9e1e161b7766f..95616eee97d99f 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rst.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rst.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -51,6 +51,7 @@ typedef struct ngtcp2_rs { uint64_t prior_lost; ngtcp2_duration send_elapsed; ngtcp2_duration ack_elapsed; + int64_t last_end_seq; int is_app_limited; } ngtcp2_rs; @@ -58,18 +59,20 @@ void ngtcp2_rs_init(ngtcp2_rs *rs); /* * ngtcp2_rst implements delivery rate estimation described in - * https://tools.ietf.org/html/draft-cheng-iccrg-delivery-rate-estimation-00 + * https://ietf-wg-ccwg.github.io/draft-cardwell-ccwg-bbr/draft-cardwell-ccwg-bbr.html */ typedef struct ngtcp2_rst { ngtcp2_rs rs; - ngtcp2_window_filter wf; uint64_t delivered; ngtcp2_tstamp delivered_ts; ngtcp2_tstamp first_sent_ts; uint64_t app_limited; - uint64_t next_round_delivered; - uint64_t round_count; uint64_t lost; + /* last_seq is the sequence number of packets across all packet + number spaces. If we would adopt single packet number sequence + across all packet number spaces, we can replace this with a + packet number. */ + int64_t last_seq; int is_cwnd_limited; } ngtcp2_rst; @@ -77,10 +80,9 @@ void ngtcp2_rst_init(ngtcp2_rst *rst); void ngtcp2_rst_on_pkt_sent(ngtcp2_rst *rst, ngtcp2_rtb_entry *ent, const ngtcp2_conn_stat *cstat); -void ngtcp2_rst_on_ack_recv(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat, - uint64_t pkt_delivered); +void ngtcp2_rst_on_ack_recv(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat); void ngtcp2_rst_update_rate_sample(ngtcp2_rst *rst, const ngtcp2_rtb_entry *ent, ngtcp2_tstamp ts); void ngtcp2_rst_update_app_limited(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat); -#endif /* NGTCP2_RST_H */ +#endif /* !defined(NGTCP2_RST_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rtb.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rtb.c index 5ebdce7d0e2715..4d417186e15854 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rtb.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rtb.c @@ -38,7 +38,7 @@ #include "ngtcp2_tstamp.h" #include "ngtcp2_frame_chain.h" -ngtcp2_objalloc_def(rtb_entry, ngtcp2_rtb_entry, oplent); +ngtcp2_objalloc_def(rtb_entry, ngtcp2_rtb_entry, oplent) static void rtb_entry_init(ngtcp2_rtb_entry *ent, const ngtcp2_pkt_hd *hd, ngtcp2_frame_chain *frc, ngtcp2_tstamp ts, @@ -53,7 +53,6 @@ static void rtb_entry_init(ngtcp2_rtb_entry *ent, const ngtcp2_pkt_hd *hd, ent->lost_ts = UINT64_MAX; ent->pktlen = pktlen; ent->flags = flags; - ent->next = NULL; } int ngtcp2_rtb_entry_objalloc_new(ngtcp2_rtb_entry **pent, @@ -82,19 +81,14 @@ void ngtcp2_rtb_entry_objalloc_del(ngtcp2_rtb_entry *ent, ngtcp2_objalloc_rtb_entry_release(objalloc, ent); } -static int greater(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) { - return *(int64_t *)lhs > *(int64_t *)rhs; -} - -void ngtcp2_rtb_init(ngtcp2_rtb *rtb, ngtcp2_pktns_id pktns_id, - ngtcp2_strm *crypto, ngtcp2_rst *rst, ngtcp2_cc *cc, +void ngtcp2_rtb_init(ngtcp2_rtb *rtb, ngtcp2_rst *rst, ngtcp2_cc *cc, int64_t cc_pkt_num, ngtcp2_log *log, ngtcp2_qlog *qlog, ngtcp2_objalloc *rtb_entry_objalloc, ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) { rtb->rtb_entry_objalloc = rtb_entry_objalloc; rtb->frc_objalloc = frc_objalloc; - ngtcp2_ksl_init(&rtb->ents, greater, sizeof(int64_t), mem); - rtb->crypto = crypto; + ngtcp2_ksl_init(&rtb->ents, ngtcp2_ksl_int64_greater, + ngtcp2_ksl_int64_greater_search, sizeof(int64_t), mem); rtb->rst = rst; rtb->cc = cc; rtb->log = log; @@ -105,7 +99,6 @@ void ngtcp2_rtb_init(ngtcp2_rtb *rtb, ngtcp2_pktns_id pktns_id, rtb->num_retransmittable = 0; rtb->num_pto_eliciting = 0; rtb->probe_pkt_left = 0; - rtb->pktns_id = pktns_id; rtb->cc_pkt_num = cc_pkt_num; rtb->cc_bytes_in_flight = 0; rtb->persistent_congestion_start_ts = UINT64_MAX; @@ -145,9 +138,11 @@ static void rtb_on_add(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { ++rtb->num_ack_eliciting; } + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) { ++rtb->num_retransmittable; } + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING) { ++rtb->num_pto_eliciting; } @@ -209,10 +204,10 @@ static size_t rtb_on_remove(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, #define NGTCP2_RECLAIM_FLAG_ON_LOSS 0x01u /* - * rtb_reclaim_frame queues unacknowledged frames included in |ent| - * for retransmission. The re-queued frames are not deleted from - * |ent|. It returns the number of frames queued. |flags| is bitwise - * OR of 0 or more of NGTCP2_RECLAIM_FLAG_*. + * rtb_reclaim_frame copies and queues frames included in |ent| for + * retransmission. The frames are not deleted from |ent|. It returns + * the number of frames queued. |flags| is bitwise OR of 0 or more of + * NGTCP2_RECLAIM_FLAG_*. */ static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, ngtcp2_conn *conn, ngtcp2_pktns *pktns, @@ -229,11 +224,13 @@ static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, /* TODO Reconsider the order of pfrc */ for (frc = ent->frc; frc; frc = frc->next) { fr = &frc->fr; + /* Check that a late ACK acknowledged this frame. */ if (frc->binder && (frc->binder->flags & NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK)) { continue; } + switch (frc->fr.type) { case NGTCP2_FRAME_STREAM: strm = ngtcp2_conn_find_stream(conn, fr->stream.stream_id); @@ -244,14 +241,15 @@ static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, gap = ngtcp2_strm_get_unacked_range_after(strm, fr->stream.offset); range.begin = fr->stream.offset; - range.end = fr->stream.offset + - ngtcp2_vec_len(fr->stream.data, fr->stream.datacnt); + range.end = + fr->stream.offset + ngtcp2_vec_len(fr->stream.data, fr->stream.datacnt); range = ngtcp2_range_intersect(&range, &gap); + if (ngtcp2_range_len(&range) == 0) { if (!fr->stream.fin) { /* 0 length STREAM frame with offset == 0 must be - retransmitted if no non-empty data is sent to this stream - and no data in this stream is acknowledged. */ + retransmitted if no non-empty data are sent to this + stream, and no data in this stream are acknowledged. */ if (fr->stream.offset != 0 || fr->stream.datacnt != 0 || strm->tx.offset || (strm->flags & NGTCP2_STRM_FLAG_ANY_ACKED)) { continue; @@ -268,7 +266,7 @@ static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, } rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( - &nfrc, fr->stream.datacnt, rtb->frc_objalloc, rtb->mem); + &nfrc, fr->stream.datacnt, rtb->frc_objalloc, rtb->mem); if (rv != 0) { return rv; } @@ -282,8 +280,10 @@ static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, ngtcp2_frame_chain_objalloc_del(nfrc, rtb->frc_objalloc, rtb->mem); return rv; } + if (!ngtcp2_strm_is_tx_queued(strm)) { strm->cycle = ngtcp2_conn_tx_strmq_first_cycle(conn); + rv = ngtcp2_conn_tx_strmq_push(conn, strm); if (rv != 0) { return rv; @@ -294,20 +294,22 @@ static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, continue; case NGTCP2_FRAME_CRYPTO: - /* Don't resend CRYPTO frame if the whole region it contains has - been acknowledged */ - gap = ngtcp2_strm_get_unacked_range_after(rtb->crypto, fr->stream.offset); + /* Do not resend CRYPTO frame if the whole region it contains + has been acknowledged */ + gap = ngtcp2_strm_get_unacked_range_after(&pktns->crypto.strm, + fr->stream.offset); range.begin = fr->stream.offset; - range.end = fr->stream.offset + - ngtcp2_vec_len(fr->stream.data, fr->stream.datacnt); + range.end = + fr->stream.offset + ngtcp2_vec_len(fr->stream.data, fr->stream.datacnt); range = ngtcp2_range_intersect(&range, &gap); + if (ngtcp2_range_len(&range) == 0) { continue; } rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( - &nfrc, fr->stream.datacnt, rtb->frc_objalloc, rtb->mem); + &nfrc, fr->stream.datacnt, rtb->frc_objalloc, rtb->mem); if (rv != 0) { return rv; } @@ -328,8 +330,8 @@ static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, continue; case NGTCP2_FRAME_NEW_TOKEN: rv = ngtcp2_frame_chain_new_token_objalloc_new( - &nfrc, fr->new_token.token, fr->new_token.tokenlen, rtb->frc_objalloc, - rtb->mem); + &nfrc, fr->new_token.token, fr->new_token.tokenlen, rtb->frc_objalloc, + rtb->mem); if (rv != 0) { return rv; } @@ -366,7 +368,7 @@ static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, case NGTCP2_FRAME_MAX_STREAM_DATA: strm = ngtcp2_conn_find_stream(conn, fr->max_stream_data.stream_id); if (strm == NULL || !ngtcp2_strm_require_retransmit_max_stream_data( - strm, &fr->max_stream_data)) { + strm, &fr->max_stream_data)) { continue; } @@ -374,7 +376,7 @@ static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, case NGTCP2_FRAME_STREAM_DATA_BLOCKED: strm = ngtcp2_conn_find_stream(conn, fr->stream_data_blocked.stream_id); if (strm == NULL || !ngtcp2_strm_require_retransmit_stream_data_blocked( - strm, &fr->stream_data_blocked)) { + strm, &fr->stream_data_blocked)) { continue; } @@ -423,6 +425,7 @@ static int conn_process_lost_datagram(ngtcp2_conn *conn, if (rv != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } + break; } } @@ -430,10 +433,9 @@ static int conn_process_lost_datagram(ngtcp2_conn *conn, return 0; } -static int rtb_on_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_ksl_it *it, - ngtcp2_rtb_entry *ent, ngtcp2_conn_stat *cstat, - ngtcp2_conn *conn, ngtcp2_pktns *pktns, - ngtcp2_tstamp ts) { +static int rtb_on_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, + ngtcp2_conn_stat *cstat, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, ngtcp2_tstamp ts) { int rv; ngtcp2_ssize reclaimed; ngtcp2_cc *cc = rtb->cc; @@ -451,7 +453,7 @@ static int rtb_on_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_ksl_it *it, } else if (rtb->cc->on_pkt_lost) { cc->on_pkt_lost(cc, cstat, ngtcp2_cc_pkt_init(&pkt, ent->hd.pkt_num, ent->pktlen, - rtb->pktns_id, ent->ts, ent->rst.lost, + pktns->id, ent->ts, ent->rst.lost, ent->rst.tx_in_flight, ent->rst.is_app_limited), ts); @@ -463,34 +465,25 @@ static int rtb_on_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_ksl_it *it, ent->hd.pkt_num); assert(!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)); assert(UINT64_MAX == ent->lost_ts); - - ent->flags |= NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED; - ent->lost_ts = ts; - - ++rtb->num_lost_pkts; - - ngtcp2_ksl_it_next(it); - - return 0; - } - - if (conn->callbacks.lost_datagram && - (ent->flags & NGTCP2_RTB_ENTRY_FLAG_DATAGRAM)) { - rv = conn_process_lost_datagram(conn, ent); - if (rv != 0) { - return rv; + } else { + if (conn->callbacks.lost_datagram && + (ent->flags & NGTCP2_RTB_ENTRY_FLAG_DATAGRAM)) { + rv = conn_process_lost_datagram(conn, ent); + if (rv != 0) { + return rv; + } } - } - if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) { - assert(ent->frc); - assert(!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)); - assert(UINT64_MAX == ent->lost_ts); + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) { + assert(ent->frc); + assert(!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)); + assert(UINT64_MAX == ent->lost_ts); - reclaimed = + reclaimed = rtb_reclaim_frame(rtb, NGTCP2_RECLAIM_FLAG_ON_LOSS, conn, pktns, ent); - if (reclaimed < 0) { - return (int)reclaimed; + if (reclaimed < 0) { + return (int)reclaimed; + } } } @@ -499,8 +492,6 @@ static int rtb_on_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_ksl_it *it, ++rtb->num_lost_pkts; - ngtcp2_ksl_it_next(it); - return 0; } @@ -518,7 +509,7 @@ int ngtcp2_rtb_add(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, return 0; } -ngtcp2_ksl_it ngtcp2_rtb_head(ngtcp2_rtb *rtb) { +ngtcp2_ksl_it ngtcp2_rtb_head(const ngtcp2_rtb *rtb) { return ngtcp2_ksl_begin(&rtb->ents); } @@ -566,22 +557,24 @@ static void conn_ack_crypto_data(ngtcp2_conn *conn, ngtcp2_pktns *pktns, return; } -static int rtb_process_acked_pkt(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, - ngtcp2_conn *conn) { +static int process_acked_pkt(ngtcp2_rtb_entry *ent, ngtcp2_conn *conn, + ngtcp2_pktns *pktns) { ngtcp2_frame_chain *frc; uint64_t prev_stream_offset, stream_offset; ngtcp2_strm *strm; int rv; uint64_t datalen; - ngtcp2_strm *crypto = rtb->crypto; - ngtcp2_pktns *pktns = NULL; + ngtcp2_strm *crypto = &pktns->crypto.strm; if ((ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) && conn->pmtud && conn->pmtud->tx_pkt_num <= ent->hd.pkt_num) { ngtcp2_pmtud_probe_success(conn->pmtud, ent->pktlen); - conn->dcid.current.max_udp_payload_size = - ngtcp2_max(conn->dcid.current.max_udp_payload_size, ent->pktlen); + if (conn->dcid.current.max_udp_payload_size < ent->pktlen) { + conn->dcid.current.max_udp_payload_size = ent->pktlen; + conn->cstat.max_tx_udp_payload_size = + ngtcp2_conn_get_path_max_tx_udp_payload_size(conn); + } if (ngtcp2_pmtud_finished(conn->pmtud)) { ngtcp2_conn_stop_pmtud(conn); @@ -611,23 +604,25 @@ static int rtb_process_acked_pkt(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, } prev_stream_offset = ngtcp2_strm_get_acked_offset(strm); + rv = ngtcp2_strm_ack_data( - strm, frc->fr.stream.offset, - ngtcp2_vec_len(frc->fr.stream.data, frc->fr.stream.datacnt)); + strm, frc->fr.stream.offset, + ngtcp2_vec_len(frc->fr.stream.data, frc->fr.stream.datacnt)); if (rv != 0) { return rv; } if (conn->callbacks.acked_stream_data_offset) { stream_offset = ngtcp2_strm_get_acked_offset(strm); + datalen = stream_offset - prev_stream_offset; if (datalen == 0 && !frc->fr.stream.fin) { break; } rv = conn->callbacks.acked_stream_data_offset( - conn, strm->stream_id, prev_stream_offset, datalen, conn->user_data, - strm->stream_user_data); + conn, strm->stream_id, prev_stream_offset, datalen, conn->user_data, + strm->stream_user_data); if (rv != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } @@ -637,36 +632,25 @@ static int rtb_process_acked_pkt(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, if (rv != 0) { return rv; } + break; case NGTCP2_FRAME_CRYPTO: prev_stream_offset = ngtcp2_strm_get_acked_offset(crypto); + rv = ngtcp2_strm_ack_data( - crypto, frc->fr.stream.offset, - ngtcp2_vec_len(frc->fr.stream.data, frc->fr.stream.datacnt)); + crypto, frc->fr.stream.offset, + ngtcp2_vec_len(frc->fr.stream.data, frc->fr.stream.datacnt)); if (rv != 0) { return rv; } stream_offset = ngtcp2_strm_get_acked_offset(crypto); + datalen = stream_offset - prev_stream_offset; if (datalen == 0) { break; } - switch (rtb->pktns_id) { - case NGTCP2_PKTNS_ID_INITIAL: - pktns = conn->in_pktns; - break; - case NGTCP2_PKTNS_ID_HANDSHAKE: - pktns = conn->hs_pktns; - break; - case NGTCP2_PKTNS_ID_APPLICATION: - pktns = &conn->pktns; - break; - default: - ngtcp2_unreachable(); - } - conn_ack_crypto_data(conn, pktns, datalen); break; @@ -675,11 +659,14 @@ static int rtb_process_acked_pkt(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, if (strm == NULL) { break; } + strm->flags |= NGTCP2_STRM_FLAG_RESET_STREAM_ACKED; + rv = ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm); if (rv != 0) { return rv; } + break; case NGTCP2_FRAME_RETIRE_CONNECTION_ID: ngtcp2_conn_untrack_retired_dcid_seq(conn, @@ -702,14 +689,17 @@ static int rtb_process_acked_pkt(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, if (rv != 0) { return NGTCP2_ERR_CALLBACK_FAILURE; } + break; } } + return 0; } static void rtb_on_pkt_acked(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, - ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) { + ngtcp2_conn_stat *cstat, const ngtcp2_pktns *pktns, + ngtcp2_tstamp ts) { ngtcp2_cc *cc = rtb->cc; ngtcp2_cc_pkt pkt; @@ -718,7 +708,7 @@ static void rtb_on_pkt_acked(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, if (cc->on_pkt_acked) { cc->on_pkt_acked(cc, cstat, ngtcp2_cc_pkt_init(&pkt, ent->hd.pkt_num, ent->pktlen, - rtb->pktns_id, ent->ts, ent->rst.lost, + pktns->id, ent->ts, ent->rst.lost, ent->rst.tx_in_flight, ent->rst.is_app_limited), ts); @@ -745,12 +735,13 @@ static void conn_verify_ecn(ngtcp2_conn *conn, ngtcp2_pktns *pktns, pktns->rx.ecn.ack.ect1 > fr->ecn.ect1 || pktns->rx.ecn.ack.ce > fr->ecn.ce || (fr->ecn.ect0 - pktns->rx.ecn.ack.ect0) + - (fr->ecn.ce - pktns->rx.ecn.ack.ce) < - ecn_acked || + (fr->ecn.ce - pktns->rx.ecn.ack.ce) < + ecn_acked || fr->ecn.ect0 > pktns->tx.ecn.ect0 || fr->ecn.ect1))) { ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "path is not ECN capable"); conn->tx.ecn.state = NGTCP2_ECN_STATE_FAILED; + return; } @@ -762,7 +753,7 @@ static void conn_verify_ecn(ngtcp2_conn *conn, ngtcp2_pktns *pktns, if (fr->type == NGTCP2_FRAME_ACK_ECN) { if (cc->congestion_event && largest_pkt_sent_ts != UINT64_MAX && fr->ecn.ce > pktns->rx.ecn.ack.ce) { - cc->congestion_event(cc, cstat, largest_pkt_sent_ts, ts); + cc->congestion_event(cc, cstat, largest_pkt_sent_ts, 0, ts); } pktns->rx.ecn.ack.ect0 = fr->ecn.ect0; @@ -784,7 +775,7 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, size_t i; int rv; ngtcp2_ksl_it it; - ngtcp2_ssize num_acked = 0; + size_t num_acked = 0; ngtcp2_tstamp largest_pkt_sent_ts = UINT64_MAX; int64_t pkt_num; ngtcp2_cc *cc = rtb->cc; @@ -820,6 +811,7 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, conn_verify_ecn(conn, pktns, rtb->cc, cstat, fr, ecn_acked, largest_pkt_sent_ts, ts); } + return 0; } @@ -848,7 +840,7 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, ++num_acked; } - for (i = 0; i < fr->rangecnt;) { + for (i = 0; i < fr->rangecnt; ++i) { largest_ack = min_ack - (int64_t)fr->ranges[i].gap - 2; min_ack = largest_ack - (int64_t)fr->ranges[i].len; @@ -862,6 +854,7 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, if (pkt_num < min_ack) { break; } + ent = ngtcp2_ksl_it_get(&it); if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { @@ -871,12 +864,11 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, rtb_remove(rtb, &it, &acked_ent, ent, cstat); ++num_acked; } - - ++i; } if (largest_pkt_sent_ts != UINT64_MAX && ack_eliciting_pkt_acked) { - cc_ack.rtt = pkt_ts - largest_pkt_sent_ts; + cc_ack.rtt = + ngtcp2_max_uint64(pkt_ts - largest_pkt_sent_ts, NGTCP2_NANOSECONDS); rv = ngtcp2_conn_update_rtt(conn, cc_ack.rtt, fr->ack_delay_unscaled, ts); if (rv == 0 && cc->new_rtt_sample) { @@ -891,7 +883,7 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, ++ecn_acked; } - rv = rtb_process_acked_pkt(rtb, ent, conn); + rv = process_acked_pkt(ent, conn, pktns); if (rv != 0) { goto fail; } @@ -903,7 +895,7 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, cc_ack.pkt_delivered = ent->rst.delivered; } - rtb_on_pkt_acked(rtb, ent, cstat, ts); + rtb_on_pkt_acked(rtb, ent, cstat, pktns, ts); acked_ent = ent->next; ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, rtb->frc_objalloc, rtb->mem); @@ -916,7 +908,7 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, } else { /* For unit tests */ for (ent = acked_ent; ent; ent = acked_ent) { - rtb_on_pkt_acked(rtb, ent, cstat, ts); + rtb_on_pkt_acked(rtb, ent, cstat, pktns, ts); acked_ent = ent->next; ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, rtb->frc_objalloc, rtb->mem); @@ -924,27 +916,29 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, } if (rtb->cc->on_spurious_congestion && num_lost_pkts && - rtb->num_lost_pkts - rtb->num_lost_pmtud_pkts == 0) { + rtb->num_lost_pkts == rtb->num_lost_pmtud_pkts) { rtb->cc->on_spurious_congestion(cc, cstat, ts); } - ngtcp2_rst_on_ack_recv(rtb->rst, cstat, cc_ack.pkt_delivered); + if (num_acked) { + ngtcp2_rst_on_ack_recv(rtb->rst, cstat); - if (conn && num_acked > 0) { - rv = rtb_detect_lost_pkt(rtb, &cc_ack.bytes_lost, conn, pktns, cstat, ts); - if (rv != 0) { - return rv; + if (conn) { + rv = rtb_detect_lost_pkt(rtb, &cc_ack.bytes_lost, conn, pktns, cstat, ts); + if (rv != 0) { + return rv; + } } } rtb->rst->lost += cc_ack.bytes_lost; cc_ack.largest_pkt_sent_ts = largest_pkt_sent_ts; - if (cc->on_ack_recv) { + if (num_acked && cc->on_ack_recv) { cc->on_ack_recv(cc, cstat, &cc_ack, ts); } - return num_acked; + return (ngtcp2_ssize)num_acked; fail: for (ent = acked_ent; ent; ent = acked_ent) { @@ -958,7 +952,8 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, static int rtb_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat, const ngtcp2_rtb_entry *ent, ngtcp2_duration loss_delay, - size_t pkt_thres, ngtcp2_tstamp ts) { + size_t pkt_thres, const ngtcp2_pktns *pktns, + ngtcp2_tstamp ts) { ngtcp2_tstamp loss_time; if (ngtcp2_tstamp_elapsed(ent->ts, loss_delay, ts) || @@ -966,27 +961,27 @@ static int rtb_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat, return 1; } - loss_time = cstat->loss_time[rtb->pktns_id]; + loss_time = cstat->loss_time[pktns->id]; if (loss_time == UINT64_MAX) { loss_time = ent->ts + loss_delay; } else { - loss_time = ngtcp2_min(loss_time, ent->ts + loss_delay); + loss_time = ngtcp2_min_uint64(loss_time, ent->ts + loss_delay); } - cstat->loss_time[rtb->pktns_id] = loss_time; + cstat->loss_time[pktns->id] = loss_time; return 0; } /* - * rtb_compute_pkt_loss_delay computes loss delay. + * compute_pkt_loss_delay computes loss delay. */ static ngtcp2_duration compute_pkt_loss_delay(const ngtcp2_conn_stat *cstat) { /* 9/8 is kTimeThreshold */ ngtcp2_duration loss_delay = - ngtcp2_max(cstat->latest_rtt, cstat->smoothed_rtt) * 9 / 8; - return ngtcp2_max(loss_delay, NGTCP2_GRANULARITY); + ngtcp2_max_uint64(cstat->latest_rtt, cstat->smoothed_rtt) * 9 / 8; + return ngtcp2_max_uint64(loss_delay, NGTCP2_GRANULARITY); } /* @@ -999,9 +994,9 @@ static int conn_all_ecn_pkt_lost(ngtcp2_conn *conn) { ngtcp2_pktns *pktns = &conn->pktns; return (!in_pktns || in_pktns->tx.ecn.validation_pkt_sent == - in_pktns->tx.ecn.validation_pkt_lost) && + in_pktns->tx.ecn.validation_pkt_lost) && (!hs_pktns || hs_pktns->tx.ecn.validation_pkt_sent == - hs_pktns->tx.ecn.validation_pkt_lost) && + hs_pktns->tx.ecn.validation_pkt_lost) && pktns->tx.ecn.validation_pkt_sent == pktns->tx.ecn.validation_pkt_lost; } @@ -1017,16 +1012,16 @@ static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost, ngtcp2_cc *cc = rtb->cc; int rv; uint64_t pkt_thres = - rtb->cc_bytes_in_flight / cstat->max_tx_udp_payload_size / 2; + rtb->cc_bytes_in_flight / cstat->max_tx_udp_payload_size / 2; size_t ecn_pkt_lost = 0; ngtcp2_tstamp start_ts; ngtcp2_duration pto = ngtcp2_conn_compute_pto(conn, pktns); uint64_t bytes_lost = 0; ngtcp2_duration max_ack_delay; - pkt_thres = ngtcp2_max(pkt_thres, NGTCP2_PKT_THRESHOLD); - pkt_thres = ngtcp2_min(pkt_thres, 256); - cstat->loss_time[rtb->pktns_id] = UINT64_MAX; + pkt_thres = ngtcp2_max_uint64(pkt_thres, NGTCP2_PKT_THRESHOLD); + pkt_thres = ngtcp2_min_uint64(pkt_thres, 256); + cstat->loss_time[pktns->id] = UINT64_MAX; loss_delay = compute_pkt_loss_delay(cstat); it = ngtcp2_ksl_lower_bound(&rtb->ents, &rtb->largest_acked_tx_pkt_num); @@ -1037,25 +1032,27 @@ static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost, break; } - if (rtb_pkt_lost(rtb, cstat, ent, loss_delay, (size_t)pkt_thres, ts)) { + if (rtb_pkt_lost(rtb, cstat, ent, loss_delay, (size_t)pkt_thres, pktns, + ts)) { /* All entries from ent are considered to be lost. */ latest_ts = oldest_ts = ent->ts; /* +1 to pick this packet for persistent congestion in the following loop. */ last_lost_pkt_num = ent->hd.pkt_num + 1; max_ack_delay = conn->remote.transport_params - ? conn->remote.transport_params->max_ack_delay - : 0; + ? conn->remote.transport_params->max_ack_delay + : 0; congestion_period = - (cstat->smoothed_rtt + - ngtcp2_max(4 * cstat->rttvar, NGTCP2_GRANULARITY) + max_ack_delay) * - NGTCP2_PERSISTENT_CONGESTION_THRESHOLD; + (cstat->smoothed_rtt + + ngtcp2_max_uint64(4 * cstat->rttvar, NGTCP2_GRANULARITY) + + max_ack_delay) * + NGTCP2_PERSISTENT_CONGESTION_THRESHOLD; - start_ts = ngtcp2_max(rtb->persistent_congestion_start_ts, - cstat->first_rtt_sample_ts); + start_ts = ngtcp2_max_uint64(rtb->persistent_congestion_start_ts, + cstat->first_rtt_sample_ts); - for (; !ngtcp2_ksl_it_end(&it);) { + for (; !ngtcp2_ksl_it_end(&it); ngtcp2_ksl_it_next(&it)) { ent = ngtcp2_ksl_it_get(&it); if (last_lost_pkt_num == ent->hd.pkt_num + 1 && ent->ts >= start_ts) { @@ -1066,12 +1063,12 @@ static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost, } if ((ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)) { - if (rtb->pktns_id != NGTCP2_PKTNS_ID_APPLICATION || + if (pktns->id != NGTCP2_PKTNS_ID_APPLICATION || last_lost_pkt_num == -1 || latest_ts - oldest_ts >= congestion_period) { break; } - ngtcp2_ksl_it_next(&it); + continue; } @@ -1081,7 +1078,7 @@ static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost, } bytes_lost += rtb_on_remove(rtb, ent, cstat); - rv = rtb_on_pkt_lost(rtb, &it, ent, cstat, conn, pktns, ts); + rv = rtb_on_pkt_lost(rtb, ent, cstat, conn, pktns, ts); if (rv != 0) { return rv; } @@ -1098,13 +1095,16 @@ static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost, if (conn->tx.ecn.validation_start_ts == UINT64_MAX) { break; } + if (ts - conn->tx.ecn.validation_start_ts < 3 * pto) { pktns->tx.ecn.validation_pkt_lost += ecn_pkt_lost; assert(pktns->tx.ecn.validation_pkt_sent >= pktns->tx.ecn.validation_pkt_lost); break; } + conn->tx.ecn.state = NGTCP2_ECN_STATE_UNKNOWN; + /* fall through */ case NGTCP2_ECN_STATE_UNKNOWN: pktns->tx.ecn.validation_pkt_lost += ecn_pkt_lost; @@ -1119,7 +1119,7 @@ static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost, } if (cc->congestion_event) { - cc->congestion_event(cc, cstat, latest_ts, ts); + cc->congestion_event(cc, cstat, latest_ts, bytes_lost, ts); } loss_window = latest_ts - oldest_ts; @@ -1130,23 +1130,22 @@ static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost, * persistent congestion there, then it is a lot easier to just * not enable it during handshake. */ - if (rtb->pktns_id == NGTCP2_PKTNS_ID_APPLICATION && loss_window > 0) { - if (loss_window >= congestion_period) { - ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_LDC, - "persistent congestion loss_window=%" PRIu64 - " congestion_period=%" PRIu64, - loss_window, congestion_period); - - /* Reset min_rtt, srtt, and rttvar here. Next new RTT - sample will be used to recalculate these values. */ - cstat->min_rtt = UINT64_MAX; - cstat->smoothed_rtt = conn->local.settings.initial_rtt; - cstat->rttvar = conn->local.settings.initial_rtt / 2; - cstat->first_rtt_sample_ts = UINT64_MAX; - - if (cc->on_persistent_congestion) { - cc->on_persistent_congestion(cc, cstat, ts); - } + if (pktns->id == NGTCP2_PKTNS_ID_APPLICATION && loss_window && + loss_window >= congestion_period) { + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_LDC, + "persistent congestion loss_window=%" PRIu64 + " congestion_period=%" PRIu64, + loss_window, congestion_period); + + /* Reset min_rtt, srtt, and rttvar here. Next new RTT + sample will be used to recalculate these values. */ + cstat->min_rtt = UINT64_MAX; + cstat->smoothed_rtt = conn->local.settings.initial_rtt; + cstat->rttvar = conn->local.settings.initial_rtt / 2; + cstat->first_rtt_sample_ts = UINT64_MAX; + + if (cc->on_persistent_congestion) { + cc->on_persistent_congestion(cc, cstat, ts); } } @@ -1243,7 +1242,7 @@ void ngtcp2_rtb_remove_expired_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_duration pto, } } -ngtcp2_tstamp ngtcp2_rtb_lost_pkt_ts(ngtcp2_rtb *rtb) { +ngtcp2_tstamp ngtcp2_rtb_lost_pkt_ts(const ngtcp2_rtb *rtb) { ngtcp2_ksl_it it; ngtcp2_rtb_entry *ent; @@ -1338,11 +1337,13 @@ static int rtb_on_pkt_lost_resched_move(ngtcp2_rtb *rtb, ngtcp2_conn *conn, ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem); break; } + rv = ngtcp2_strm_streamfrq_push(strm, frc); if (rv != 0) { ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem); return rv; } + if (!ngtcp2_strm_is_tx_queued(strm)) { strm->cycle = ngtcp2_conn_tx_strmq_first_cycle(conn); rv = ngtcp2_conn_tx_strmq_push(conn, strm); @@ -1350,6 +1351,7 @@ static int rtb_on_pkt_lost_resched_move(ngtcp2_rtb *rtb, ngtcp2_conn *conn, return rv; } } + break; case NGTCP2_FRAME_CRYPTO: frc = *pfrc; @@ -1363,6 +1365,7 @@ static int rtb_on_pkt_lost_resched_move(ngtcp2_rtb *rtb, ngtcp2_conn *conn, ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem); return rv; } + break; case NGTCP2_FRAME_DATAGRAM: case NGTCP2_FRAME_DATAGRAM_LEN: @@ -1379,6 +1382,7 @@ static int rtb_on_pkt_lost_resched_move(ngtcp2_rtb *rtb, ngtcp2_conn *conn, *pfrc = (*pfrc)->next; ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem); + break; default: pfrc = &(*pfrc)->next; @@ -1408,8 +1412,10 @@ int ngtcp2_rtb_remove_all(ngtcp2_rtb *rtb, ngtcp2_conn *conn, assert(0 == rv); rv = rtb_on_pkt_lost_resched_move(rtb, conn, pktns, ent); + ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, rtb->frc_objalloc, rtb->mem); + if (rv != 0) { return rv; } @@ -1443,7 +1449,7 @@ void ngtcp2_rtb_remove_early_data(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat) { } } -int ngtcp2_rtb_empty(ngtcp2_rtb *rtb) { +int ngtcp2_rtb_empty(const ngtcp2_rtb *rtb) { return ngtcp2_ksl_len(&rtb->ents) == 0; } @@ -1460,7 +1466,7 @@ ngtcp2_ssize ngtcp2_rtb_reclaim_on_pto(ngtcp2_rtb *rtb, ngtcp2_conn *conn, size_t atmost = num_pkts; it = ngtcp2_ksl_end(&rtb->ents); - for (; !ngtcp2_ksl_it_begin(&it) && num_pkts >= 1;) { + for (; !ngtcp2_ksl_it_begin(&it) && num_pkts;) { ngtcp2_ksl_it_prev(&it); ent = ngtcp2_ksl_it_get(&it); @@ -1473,13 +1479,13 @@ ngtcp2_ssize ngtcp2_rtb_reclaim_on_pto(ngtcp2_rtb *rtb, ngtcp2_conn *conn, assert(ent->frc); reclaimed = - rtb_reclaim_frame(rtb, NGTCP2_RECLAIM_FLAG_NONE, conn, pktns, ent); + rtb_reclaim_frame(rtb, NGTCP2_RECLAIM_FLAG_NONE, conn, pktns, ent); if (reclaimed < 0) { return reclaimed; } - /* Mark reclaimed even if reclaimed == 0 so that we can skip it in - the next run. */ + /* Mark ent reclaimed even if reclaimed == 0 so that we can skip + it in the next run. */ ent->flags |= NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED; assert(rtb->num_retransmittable); diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rtb.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rtb.h index a1ff208b19eac7..2ef772b2e14f4b 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_rtb.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_rtb.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -85,7 +85,7 @@ typedef struct ngtcp2_rtb_entry ngtcp2_rtb_entry; /* * ngtcp2_rtb_entry is an object stored in ngtcp2_rtb. It corresponds - * to the one packet which is waiting for its ACK. + * to the one packet which is waiting for its acknowledgement. */ struct ngtcp2_rtb_entry { union { @@ -98,10 +98,11 @@ struct ngtcp2_rtb_entry { uint8_t flags; } hd; ngtcp2_frame_chain *frc; - /* ts is the time point when a packet included in this entry is sent - to a peer. */ + /* ts is the time point when a packet included in this entry is + sent to a remote endpoint. */ ngtcp2_tstamp ts; - /* lost_ts is the time when this entry is marked lost. */ + /* lost_ts is the time when this entry is declared to be + lost. */ ngtcp2_tstamp lost_ts; /* pktlen is the length of QUIC packet */ size_t pktlen; @@ -111,6 +112,7 @@ struct ngtcp2_rtb_entry { ngtcp2_tstamp first_sent_ts; uint64_t tx_in_flight; uint64_t lost; + int64_t end_seq; int is_app_limited; } rst; /* flags is bitwise-OR of zero or more of @@ -122,11 +124,11 @@ struct ngtcp2_rtb_entry { }; }; -ngtcp2_objalloc_decl(rtb_entry, ngtcp2_rtb_entry, oplent); +ngtcp2_objalloc_decl(rtb_entry, ngtcp2_rtb_entry, oplent) /* - * ngtcp2_rtb_entry_new allocates ngtcp2_rtb_entry object, and assigns - * its pointer to |*pent|. + * ngtcp2_rtb_entry_objalloc_new allocates ngtcp2_rtb_entry object via + * |objalloc|, and assigns its pointer to |*pent|. */ int ngtcp2_rtb_entry_objalloc_new(ngtcp2_rtb_entry **pent, const ngtcp2_pkt_hd *hd, @@ -145,7 +147,7 @@ void ngtcp2_rtb_entry_objalloc_del(ngtcp2_rtb_entry *ent, const ngtcp2_mem *mem); /* - * ngtcp2_rtb tracks sent packets, and its ACK timeout for + * ngtcp2_rtb tracks sent packets, and its acknowledgement timeout for * retransmission. */ typedef struct ngtcp2_rtb { @@ -154,34 +156,33 @@ typedef struct ngtcp2_rtb { /* ents includes ngtcp2_rtb_entry sorted by decreasing order of packet number. */ ngtcp2_ksl ents; - /* crypto is CRYPTO stream. */ - ngtcp2_strm *crypto; ngtcp2_rst *rst; ngtcp2_cc *cc; ngtcp2_log *log; ngtcp2_qlog *qlog; const ngtcp2_mem *mem; /* largest_acked_tx_pkt_num is the largest packet number - acknowledged by the peer. */ + acknowledged by a remote endpoint. */ int64_t largest_acked_tx_pkt_num; - /* num_ack_eliciting is the number of ACK eliciting entries. */ + /* num_ack_eliciting is the number of ACK eliciting entries in + ents. */ size_t num_ack_eliciting; /* num_retransmittable is the number of packets which contain frames - that must be retransmitted on loss. */ + that must be retransmitted on loss in ents. */ size_t num_retransmittable; /* num_pto_eliciting is the number of packets that elicit PTO probe - packets. */ + packets in ents. */ size_t num_pto_eliciting; /* probe_pkt_left is the number of probe packet to send */ size_t probe_pkt_left; - /* pktns_id is the identifier of packet number space. */ - ngtcp2_pktns_id pktns_id; /* cc_pkt_num is the smallest packet number that is contributed to ngtcp2_conn_stat.bytes_in_flight. */ int64_t cc_pkt_num; /* cc_bytes_in_flight is the number of in-flight bytes that is contributed to ngtcp2_conn_stat.bytes_in_flight. It only - includes the bytes after congestion state is reset. */ + includes the bytes after congestion state is reset, that is only + count a packet whose packet number is greater than or equals to + cc_pkt_num. */ uint64_t cc_bytes_in_flight; /* persistent_congestion_start_ts is the time when persistent congestion evaluation is started. It happens roughly after @@ -199,8 +200,7 @@ typedef struct ngtcp2_rtb { /* * ngtcp2_rtb_init initializes |rtb|. */ -void ngtcp2_rtb_init(ngtcp2_rtb *rtb, ngtcp2_pktns_id pktns_id, - ngtcp2_strm *crypto, ngtcp2_rst *rst, ngtcp2_cc *cc, +void ngtcp2_rtb_init(ngtcp2_rtb *rtb, ngtcp2_rst *rst, ngtcp2_cc *cc, int64_t cc_pkt_num, ngtcp2_log *log, ngtcp2_qlog *qlog, ngtcp2_objalloc *rtb_entry_objalloc, ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem); @@ -227,13 +227,13 @@ int ngtcp2_rtb_add(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, * which has the largest packet number. If there is no entry, * returned value satisfies ngtcp2_ksl_it_end(&it) != 0. */ -ngtcp2_ksl_it ngtcp2_rtb_head(ngtcp2_rtb *rtb); +ngtcp2_ksl_it ngtcp2_rtb_head(const ngtcp2_rtb *rtb); /* - * ngtcp2_rtb_recv_ack removes acked ngtcp2_rtb_entry from |rtb|. - * |pkt_num| is a packet number which includes |fr|. |pkt_ts| is the - * timestamp when packet is received. |ts| should be the current - * time. Usually they are the same, but for buffered packets, + * ngtcp2_rtb_recv_ack removes an acknowledged ngtcp2_rtb_entry from + * |rtb|. |pkt_num| is a packet number which includes |fr|. |pkt_ts| + * is the timestamp when packet is received. |ts| should be the + * current time. Usually they are the same, but for buffered packets, * |pkt_ts| would be earlier than |ts|. * * This function returns the number of newly acknowledged packets if @@ -252,7 +252,7 @@ ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, /* * ngtcp2_rtb_detect_lost_pkt detects lost packets and prepends the * frames contained them to |*pfrc|. Even when this function fails, - * some frames might be prepended to |*pfrc| and the caller should + * some frames might be prepended to |*pfrc|, and the caller should * handle them. */ int ngtcp2_rtb_detect_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_conn *conn, @@ -266,16 +266,16 @@ void ngtcp2_rtb_remove_expired_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_duration pto, ngtcp2_tstamp ts); /* - * ngtcp2_rtb_lost_pkt_ts returns the earliest time when the still - * retained packet was lost. It returns UINT64_MAX if no such packet - * exists. + * ngtcp2_rtb_lost_pkt_ts returns the timestamp when an oldest lost + * packet tracked by |rtb| was declared lost. It returns UINT64_MAX + * if no such packet exists. */ -ngtcp2_tstamp ngtcp2_rtb_lost_pkt_ts(ngtcp2_rtb *rtb); +ngtcp2_tstamp ngtcp2_rtb_lost_pkt_ts(const ngtcp2_rtb *rtb); /* - * ngtcp2_rtb_remove_all removes all packets from |rtb| and prepends + * ngtcp2_rtb_remove_all removes all packets from |rtb|, and prepends * all frames to |*pfrc|. Even when this function fails, some frames - * might be prepended to |*pfrc| and the caller should handle them. + * might be prepended to |*pfrc|, and the caller should handle them. */ int ngtcp2_rtb_remove_all(ngtcp2_rtb *rtb, ngtcp2_conn *conn, ngtcp2_pktns *pktns, ngtcp2_conn_stat *cstat); @@ -286,9 +286,9 @@ int ngtcp2_rtb_remove_all(ngtcp2_rtb *rtb, ngtcp2_conn *conn, void ngtcp2_rtb_remove_early_data(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat); /* - * ngtcp2_rtb_empty returns nonzero if |rtb| have no entry. + * ngtcp2_rtb_empty returns nonzero if |rtb| has no entry. */ -int ngtcp2_rtb_empty(ngtcp2_rtb *rtb); +int ngtcp2_rtb_empty(const ngtcp2_rtb *rtb); /* * ngtcp2_rtb_reset_cc_state resets congestion state in |rtb|. @@ -298,15 +298,15 @@ int ngtcp2_rtb_empty(ngtcp2_rtb *rtb); void ngtcp2_rtb_reset_cc_state(ngtcp2_rtb *rtb, int64_t cc_pkt_num); /* - * ngtcp2_rtb_remove_expired_lost_pkt ensures that the number of lost - * packets at most |n|. + * ngtcp2_rtb_remove_excessive_lost_pkt ensures that the number of + * lost packets is at most |n|. */ void ngtcp2_rtb_remove_excessive_lost_pkt(ngtcp2_rtb *rtb, size_t n); /* * ngtcp2_rtb_reclaim_on_pto reclaims up to |num_pkts| packets which - * are in-flight and not marked lost to send them in PTO probe. The - * reclaimed frames are chained to |*pfrc|. + * are in-flight and not marked lost. The reclaimed frames may be + * sent in a PTO probe packet. * * This function returns the number of packets reclaimed if it * succeeds, or one of the following negative error codes: @@ -317,4 +317,4 @@ void ngtcp2_rtb_remove_excessive_lost_pkt(ngtcp2_rtb *rtb, size_t n); ngtcp2_ssize ngtcp2_rtb_reclaim_on_pto(ngtcp2_rtb *rtb, ngtcp2_conn *conn, ngtcp2_pktns *pktns, size_t num_pkts); -#endif /* NGTCP2_RTB_H */ +#endif /* !defined(NGTCP2_RTB_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_settings.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_settings.c new file mode 100644 index 00000000000000..77a68bd112e3b2 --- /dev/null +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_settings.c @@ -0,0 +1,91 @@ +/* + * ngtcp2 + * + * Copyright (c) 2024 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_settings.h" + +#include +#include + +#include "ngtcp2_unreachable.h" + +void ngtcp2_settings_default_versioned(int settings_version, + ngtcp2_settings *settings) { + size_t len = ngtcp2_settingslen_version(settings_version); + + memset(settings, 0, len); + + switch (settings_version) { + case NGTCP2_SETTINGS_VERSION: + case NGTCP2_SETTINGS_V1: + settings->cc_algo = NGTCP2_CC_ALGO_CUBIC; + settings->initial_rtt = NGTCP2_DEFAULT_INITIAL_RTT; + settings->ack_thresh = 2; + settings->max_tx_udp_payload_size = 1500 - 48; + settings->handshake_timeout = UINT64_MAX; + + break; + } +} + +static void settings_copy(ngtcp2_settings *dest, const ngtcp2_settings *src, + int settings_version) { + assert(settings_version != NGTCP2_SETTINGS_VERSION); + + memcpy(dest, src, ngtcp2_settingslen_version(settings_version)); +} + +const ngtcp2_settings * +ngtcp2_settings_convert_to_latest(ngtcp2_settings *dest, int settings_version, + const ngtcp2_settings *src) { + if (settings_version == NGTCP2_SETTINGS_VERSION) { + return src; + } + + ngtcp2_settings_default(dest); + + settings_copy(dest, src, settings_version); + + return dest; +} + +void ngtcp2_settings_convert_to_old(int settings_version, ngtcp2_settings *dest, + const ngtcp2_settings *src) { + assert(settings_version != NGTCP2_SETTINGS_VERSION); + + settings_copy(dest, src, settings_version); +} + +size_t ngtcp2_settingslen_version(int settings_version) { + ngtcp2_settings settings; + + switch (settings_version) { + case NGTCP2_SETTINGS_VERSION: + return sizeof(settings); + case NGTCP2_SETTINGS_V1: + return offsetof(ngtcp2_settings, initial_pkt_num) + + sizeof(settings.initial_pkt_num); + default: + ngtcp2_unreachable(); + } +} diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_settings.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_settings.h new file mode 100644 index 00000000000000..80466d43e47a7a --- /dev/null +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_settings.h @@ -0,0 +1,73 @@ +/* + * ngtcp2 + * + * Copyright (c) 2024 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_SETTINGS_H +#define NGTCP2_SETTINGS_H + +#ifdef HAVE_CONFIG_H +# include +#endif /* defined(HAVE_CONFIG_H) */ + +#include + +/* + * ngtcp2_settings_convert_to_latest converts |src| of version + * |settings_version| to the latest version NGTCP2_SETTINGS_VERSION. + * + * |dest| must point to the latest version. |src| may be the older + * version, and if so, it may have fewer fields. Accessing those + * fields causes undefined behavior. + * + * If |settings_version| == NGTCP2_SETTINGS_VERSION, no conversion is + * made, and |src| is returned. Otherwise, first |dest| is + * initialized via ngtcp2_settings_default, and then all valid fields + * in |src| are copied into |dest|. Finally, |dest| is returned. + */ +const ngtcp2_settings * +ngtcp2_settings_convert_to_latest(ngtcp2_settings *dest, int settings_version, + const ngtcp2_settings *src); + +/* + * ngtcp2_settings_convert_to_old converts |src| of the latest version + * to |dest| of version |settings_version|. + * + * |settings_version| must not be the latest version + * NGTCP2_SETTINGS_VERSION. + * + * |dest| points to the older version, and it may have fewer fields. + * Accessing those fields causes undefined behavior. + * + * This function copies all valid fields in version |settings_version| + * from |src| to |dest|. + */ +void ngtcp2_settings_convert_to_old(int settings_version, ngtcp2_settings *dest, + const ngtcp2_settings *src); + +/* + * ngtcp2_settingslen_version returns the effective length of + * ngtcp2_settings at the version |settings_version|. + */ +size_t ngtcp2_settingslen_version(int settings_version); + +#endif /* !defined(NGTCP2_SETTINGS_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_str.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_str.h index deb75e356d70d4..f970c153e805a8 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_str.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_str.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -91,4 +91,4 @@ char *ngtcp2_encode_printable_ascii(char *dest, const uint8_t *data, */ int ngtcp2_cmemeq(const uint8_t *a, const uint8_t *b, size_t n); -#endif /* NGTCP2_STR_H */ +#endif /* !defined(NGTCP2_STR_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_strm.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_strm.c index c00e86fa8c1afa..8ea969c4addbdc 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_strm.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_strm.c @@ -32,16 +32,13 @@ #include "ngtcp2_vec.h" #include "ngtcp2_frame_chain.h" -static int offset_less(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) { - return *(int64_t *)lhs < *(int64_t *)rhs; -} - void ngtcp2_strm_init(ngtcp2_strm *strm, int64_t stream_id, uint32_t flags, uint64_t max_rx_offset, uint64_t max_tx_offset, void *stream_user_data, ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) { - strm->frc_objalloc = frc_objalloc; + strm->pe.index = NGTCP2_PQ_BAD_INDEX; strm->cycle = 0; + strm->frc_objalloc = frc_objalloc; strm->tx.acked_offset = NULL; strm->tx.cont_acked_offset = 0; strm->tx.streamfrq = NULL; @@ -56,13 +53,12 @@ void ngtcp2_strm_init(ngtcp2_strm *strm, int64_t stream_id, uint32_t flags, strm->rx.rob = NULL; strm->rx.cont_offset = 0; strm->rx.last_offset = 0; + strm->rx.max_offset = strm->rx.unsent_max_offset = strm->rx.window = + max_rx_offset; + strm->mem = mem; strm->stream_id = stream_id; - strm->flags = flags; strm->stream_user_data = stream_user_data; - strm->rx.window = strm->rx.max_offset = strm->rx.unsent_max_offset = - max_rx_offset; - strm->pe.index = NGTCP2_PQ_BAD_INDEX; - strm->mem = mem; + strm->flags = flags; strm->app_error_code = 0; } @@ -114,7 +110,7 @@ static int strm_rob_init(ngtcp2_strm *strm) { return 0; } -uint64_t ngtcp2_strm_rx_offset(ngtcp2_strm *strm) { +uint64_t ngtcp2_strm_rx_offset(const ngtcp2_strm *strm) { if (strm->rx.rob == NULL) { return strm->rx.cont_offset; } @@ -123,7 +119,7 @@ uint64_t ngtcp2_strm_rx_offset(ngtcp2_strm *strm) { /* strm_rob_heavily_fragmented returns nonzero if the number of gaps in |rob| exceeds the limit. */ -static int strm_rob_heavily_fragmented(ngtcp2_rob *rob) { +static int strm_rob_heavily_fragmented(const ngtcp2_rob *rob) { return ngtcp2_ksl_len(&rob->gapksl) >= 5000; } @@ -180,7 +176,8 @@ static int strm_streamfrq_init(ngtcp2_strm *strm) { return NGTCP2_ERR_NOMEM; } - ngtcp2_ksl_init(streamfrq, offset_less, sizeof(uint64_t), strm->mem); + ngtcp2_ksl_init(streamfrq, ngtcp2_ksl_uint64_less, + ngtcp2_ksl_uint64_less_search, sizeof(uint64_t), strm->mem); strm->tx.streamfrq = streamfrq; @@ -271,6 +268,7 @@ static int strm_streamfrq_unacked_pop(ngtcp2_strm *strm, } ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + continue; } @@ -306,11 +304,12 @@ static int strm_streamfrq_unacked_pop(ngtcp2_strm *strm, fr->data[0].len -= (size_t)base_offset; *pfrc = frc; + return 0; } rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( - &nfrc, fr->datacnt - end_idx, strm->frc_objalloc, strm->mem); + &nfrc, fr->datacnt - end_idx, strm->frc_objalloc, strm->mem); if (rv != 0) { ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); return rv; @@ -336,6 +335,7 @@ static int strm_streamfrq_unacked_pop(ngtcp2_strm *strm, assert(ngtcp2_err_is_fatal(rv)); ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; } @@ -350,14 +350,17 @@ static int strm_streamfrq_unacked_pop(ngtcp2_strm *strm, fr->fin = 0; fr->offset = offset + base_offset; fr->datacnt = end_idx - idx; + if (end_base_offset) { assert(fr->data[fr->datacnt - 1].len > end_base_offset); fr->data[fr->datacnt - 1].len = (size_t)end_base_offset; } + fr->data[0].base += base_offset; fr->data[0].len -= (size_t)base_offset; *pfrc = frc; + return 0; } @@ -367,7 +370,7 @@ static int strm_streamfrq_unacked_pop(ngtcp2_strm *strm, int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, size_t left) { ngtcp2_stream *fr, *nfr; - ngtcp2_frame_chain *frc, *nfrc; + ngtcp2_frame_chain *frc, *nfrc, *sfrc; int rv; size_t nmerged; uint64_t datalen; @@ -385,6 +388,7 @@ int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, if (rv != 0) { return rv; } + if (frc == NULL) { *pfrc = NULL; return 0; @@ -401,7 +405,9 @@ int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); return rv; } + *pfrc = NULL; + return 0; } @@ -410,13 +416,13 @@ int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, acnt = fr->datacnt; bcnt = 0; - ngtcp2_vec_split(a, &acnt, b, &bcnt, left, NGTCP2_MAX_STREAM_DATACNT); + ngtcp2_vec_split(b, &bcnt, a, &acnt, left, NGTCP2_MAX_STREAM_DATACNT); assert(acnt > 0); assert(bcnt > 0); rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( - &nfrc, bcnt, strm->frc_objalloc, strm->mem); + &nfrc, bcnt, strm->frc_objalloc, strm->mem); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); @@ -437,11 +443,12 @@ int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, assert(ngtcp2_err_is_fatal(rv)); ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; } rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( - &nfrc, acnt, strm->frc_objalloc, strm->mem); + &nfrc, acnt, strm->frc_objalloc, strm->mem); if (rv != 0) { assert(ngtcp2_err_is_fatal(rv)); ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); @@ -491,7 +498,9 @@ int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, break; } - nmerged = ngtcp2_vec_merge(a, &acnt, nfr->data, &nfr->datacnt, left, + bcnt = nfr->datacnt; + + nmerged = ngtcp2_vec_merge(a, &acnt, nfr->data, &bcnt, left, NGTCP2_MAX_STREAM_DATACNT); if (nmerged == 0) { rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc); @@ -499,27 +508,56 @@ int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, assert(ngtcp2_err_is_fatal(rv)); ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; } + break; } datalen += nmerged; left -= nmerged; - if (nfr->datacnt == 0) { + if (bcnt == 0) { fr->fin = nfr->fin; ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); continue; } - nfr->offset += nmerged; + if (nfr->datacnt <= NGTCP2_FRAME_CHAIN_STREAM_DATACNT_THRES || + bcnt > NGTCP2_FRAME_CHAIN_STREAM_DATACNT_THRES) { + nfr->offset += nmerged; + nfr->datacnt = bcnt; + + rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + } else { + rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( + &sfrc, bcnt, strm->frc_objalloc, strm->mem); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + + sfrc->fr.stream = nfrc->fr.stream; + sfrc->fr.stream.offset += nmerged; + sfrc->fr.stream.datacnt = bcnt; + ngtcp2_vec_copy(sfrc->fr.stream.data, nfrc->fr.stream.data, bcnt); - rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc); - if (rv != 0) { ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); - ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); - return rv; + + rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &sfrc->fr.stream.offset, + sfrc); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(sfrc, strm->frc_objalloc, strm->mem); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } } break; @@ -531,13 +569,14 @@ int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, } *pfrc = frc; + return 0; } assert(acnt > fr->datacnt); rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( - &nfrc, acnt, strm->frc_objalloc, strm->mem); + &nfrc, acnt, strm->frc_objalloc, strm->mem); if (rv != 0) { ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); return rv; @@ -555,7 +594,7 @@ int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, return 0; } -uint64_t ngtcp2_strm_streamfrq_unacked_offset(ngtcp2_strm *strm) { +uint64_t ngtcp2_strm_streamfrq_unacked_offset(const ngtcp2_strm *strm) { ngtcp2_frame_chain *frc; ngtcp2_stream *fr; ngtcp2_range gap; @@ -577,9 +616,11 @@ uint64_t ngtcp2_strm_streamfrq_unacked_offset(ngtcp2_strm *strm) { if (gap.begin <= fr->offset) { return fr->offset; } + if (gap.begin < fr->offset + datalen) { return gap.begin; } + if (fr->offset + datalen == gap.begin && fr->fin && !(strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED)) { return fr->offset + datalen; @@ -589,17 +630,18 @@ uint64_t ngtcp2_strm_streamfrq_unacked_offset(ngtcp2_strm *strm) { return (uint64_t)-1; } -ngtcp2_frame_chain *ngtcp2_strm_streamfrq_top(ngtcp2_strm *strm) { +ngtcp2_frame_chain *ngtcp2_strm_streamfrq_top(const ngtcp2_strm *strm) { ngtcp2_ksl_it it; assert(strm->tx.streamfrq); assert(ngtcp2_ksl_len(strm->tx.streamfrq)); it = ngtcp2_ksl_begin(strm->tx.streamfrq); + return ngtcp2_ksl_it_get(&it); } -int ngtcp2_strm_streamfrq_empty(ngtcp2_strm *strm) { +int ngtcp2_strm_streamfrq_empty(const ngtcp2_strm *strm) { return strm->tx.streamfrq == NULL || ngtcp2_ksl_len(strm->tx.streamfrq) == 0; } @@ -616,14 +658,15 @@ void ngtcp2_strm_streamfrq_clear(ngtcp2_strm *strm) { frc = ngtcp2_ksl_it_get(&it); ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); } + ngtcp2_ksl_clear(strm->tx.streamfrq); } -int ngtcp2_strm_is_tx_queued(ngtcp2_strm *strm) { +int ngtcp2_strm_is_tx_queued(const ngtcp2_strm *strm) { return strm->pe.index != NGTCP2_PQ_BAD_INDEX; } -int ngtcp2_strm_is_all_tx_data_acked(ngtcp2_strm *strm) { +int ngtcp2_strm_is_all_tx_data_acked(const ngtcp2_strm *strm) { if (strm->tx.acked_offset == NULL) { return strm->tx.cont_acked_offset == strm->tx.offset; } @@ -632,12 +675,12 @@ int ngtcp2_strm_is_all_tx_data_acked(ngtcp2_strm *strm) { strm->tx.offset; } -int ngtcp2_strm_is_all_tx_data_fin_acked(ngtcp2_strm *strm) { +int ngtcp2_strm_is_all_tx_data_fin_acked(const ngtcp2_strm *strm) { return (strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED) && ngtcp2_strm_is_all_tx_data_acked(strm); } -ngtcp2_range ngtcp2_strm_get_unacked_range_after(ngtcp2_strm *strm, +ngtcp2_range ngtcp2_strm_get_unacked_range_after(const ngtcp2_strm *strm, uint64_t offset) { ngtcp2_range gap; @@ -650,7 +693,7 @@ ngtcp2_range ngtcp2_strm_get_unacked_range_after(ngtcp2_strm *strm, return ngtcp2_gaptr_get_first_gap_after(strm->tx.acked_offset, offset); } -uint64_t ngtcp2_strm_get_acked_offset(ngtcp2_strm *strm) { +uint64_t ngtcp2_strm_get_acked_offset(const ngtcp2_strm *strm) { if (strm->tx.acked_offset == NULL) { return strm->tx.cont_acked_offset; } @@ -660,7 +703,7 @@ uint64_t ngtcp2_strm_get_acked_offset(ngtcp2_strm *strm) { static int strm_acked_offset_init(ngtcp2_strm *strm) { ngtcp2_gaptr *acked_offset = - ngtcp2_mem_malloc(strm->mem, sizeof(*acked_offset)); + ngtcp2_mem_malloc(strm->mem, sizeof(*acked_offset)); if (acked_offset == NULL) { return NGTCP2_ERR_NOMEM; @@ -688,7 +731,7 @@ int ngtcp2_strm_ack_data(ngtcp2_strm *strm, uint64_t offset, uint64_t len) { } rv = - ngtcp2_gaptr_push(strm->tx.acked_offset, 0, strm->tx.cont_acked_offset); + ngtcp2_gaptr_push(strm->tx.acked_offset, 0, strm->tx.cont_acked_offset); if (rv != 0) { return rv; } @@ -709,24 +752,24 @@ void ngtcp2_strm_set_app_error_code(ngtcp2_strm *strm, strm->app_error_code = app_error_code; } -int ngtcp2_strm_require_retransmit_reset_stream(ngtcp2_strm *strm) { +int ngtcp2_strm_require_retransmit_reset_stream(const ngtcp2_strm *strm) { return !ngtcp2_strm_is_all_tx_data_fin_acked(strm); } -int ngtcp2_strm_require_retransmit_stop_sending(ngtcp2_strm *strm) { +int ngtcp2_strm_require_retransmit_stop_sending(const ngtcp2_strm *strm) { return !(strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) || ngtcp2_strm_rx_offset(strm) != strm->rx.last_offset; } -int ngtcp2_strm_require_retransmit_max_stream_data(ngtcp2_strm *strm, - ngtcp2_max_stream_data *fr) { +int ngtcp2_strm_require_retransmit_max_stream_data( + const ngtcp2_strm *strm, const ngtcp2_max_stream_data *fr) { return fr->max_stream_data == strm->rx.max_offset && !(strm->flags & (NGTCP2_STRM_FLAG_SHUT_RD | NGTCP2_STRM_FLAG_STOP_SENDING)); } int ngtcp2_strm_require_retransmit_stream_data_blocked( - ngtcp2_strm *strm, ngtcp2_stream_data_blocked *fr) { + const ngtcp2_strm *strm, const ngtcp2_stream_data_blocked *fr) { return fr->offset == strm->tx.max_offset && !(strm->flags & NGTCP2_STRM_FLAG_SHUT_WR); } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_strm.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_strm.h index 385302a5eafa9f..c72f8b9dc89aca 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_strm.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_strm.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -86,6 +86,9 @@ typedef struct ngtcp2_frame_chain ngtcp2_frame_chain; received from the remote endpoint. In this case, NGTCP2_STRM_FLAG_SHUT_WR is also set. */ #define NGTCP2_STRM_FLAG_STOP_SENDING_RECVED 0x800u +/* NGTCP2_STRM_FLAG_ANY_SENT indicates that any STREAM frame, + including empty one, has been sent. */ +#define NGTCP2_STRM_FLAG_ANY_SENT 0x1000u typedef struct ngtcp2_strm ngtcp2_strm; @@ -105,13 +108,13 @@ struct ngtcp2_strm { acked_offset is used instead. */ uint64_t cont_acked_offset; /* streamfrq contains STREAM or CRYPTO frame for - retransmission. The flow control credits have been paid - when they are transmitted first time. There are no + retransmission. The flow control credits have already been + paid when they are transmitted first time. There are no restriction regarding flow control for retransmission. */ ngtcp2_ksl *streamfrq; - /* offset is the next offset of outgoing data. In other words, it - is the number of bytes sent in this stream without - duplication. */ + /* offset is the next offset of new outgoing data. In other + words, it is the number of bytes sent in this stream + without duplication. */ uint64_t offset; /* max_tx_offset is the maximum offset that local endpoint can send for this stream. */ @@ -200,7 +203,7 @@ void ngtcp2_strm_free(ngtcp2_strm *strm); * ngtcp2_strm_rx_offset returns the minimum offset of stream data * which is not received yet. */ -uint64_t ngtcp2_strm_rx_offset(ngtcp2_strm *strm); +uint64_t ngtcp2_strm_rx_offset(const ngtcp2_strm *strm); /* * ngtcp2_strm_recv_reordering handles reordered data. @@ -215,8 +218,8 @@ int ngtcp2_strm_recv_reordering(ngtcp2_strm *strm, const uint8_t *data, size_t datalen, uint64_t offset); /* - * ngtcp2_strm_update_rx_offset tells that data up to offset bytes are - * received in order. + * ngtcp2_strm_update_rx_offset tells that data up to |offset| bytes + * are received in order. */ void ngtcp2_strm_update_rx_offset(ngtcp2_strm *strm, uint64_t offset); @@ -227,13 +230,14 @@ void ngtcp2_strm_update_rx_offset(ngtcp2_strm *strm, uint64_t offset); void ngtcp2_strm_discard_reordered_data(ngtcp2_strm *strm); /* - * ngtcp2_strm_shutdown shutdowns |strm|. |flags| should be - * NGTCP2_STRM_FLAG_SHUT_RD, and/or NGTCP2_STRM_FLAG_SHUT_WR. + * ngtcp2_strm_shutdown shutdowns |strm|. |flags| should be one of + * NGTCP2_STRM_FLAG_SHUT_RD, NGTCP2_STRM_FLAG_SHUT_WR, and + * NGTCP2_STRM_FLAG_SHUT_RDWR. */ void ngtcp2_strm_shutdown(ngtcp2_strm *strm, uint32_t flags); /* - * ngtcp2_strm_streamfrq_push pushes |frc| to streamfrq for + * ngtcp2_strm_streamfrq_push pushes |frc| to strm->tx.streamfrq for * retransmission. * * This function returns 0 if it succeeds, or one of the following @@ -245,11 +249,12 @@ void ngtcp2_strm_shutdown(ngtcp2_strm *strm, uint32_t flags); int ngtcp2_strm_streamfrq_push(ngtcp2_strm *strm, ngtcp2_frame_chain *frc); /* - * ngtcp2_strm_streamfrq_pop pops the first ngtcp2_frame_chain and - * assigns it to |*pfrc|. This function splits into or merges several - * ngtcp2_frame_chain objects so that the returned ngtcp2_frame_chain - * has at most |left| data length. If there is no frames to send, - * this function returns 0 and |*pfrc| is NULL. + * ngtcp2_strm_streamfrq_pop assigns a ngtcp2_frame_chain that only + * contains unacknowledged stream data with smallest offset to |*pfrc| + * for retransmission. The assigned ngtcp2_frame_chain has stream + * data at most |left| bytes. strm->tx.streamfrq is adjusted to + * exclude the portion of data included in it. If there is no stream + * data to send, this function returns 0 and |*pfrc| is NULL. * * This function returns 0 if it succeeds, or one of the following * negative error codes: @@ -264,18 +269,18 @@ int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, * ngtcp2_strm_streamfrq_unacked_offset returns the smallest offset of * unacknowledged stream data held in strm->tx.streamfrq. */ -uint64_t ngtcp2_strm_streamfrq_unacked_offset(ngtcp2_strm *strm); +uint64_t ngtcp2_strm_streamfrq_unacked_offset(const ngtcp2_strm *strm); /* * ngtcp2_strm_streamfrq_top returns the first ngtcp2_frame_chain. * The queue must not be empty. */ -ngtcp2_frame_chain *ngtcp2_strm_streamfrq_top(ngtcp2_strm *strm); +ngtcp2_frame_chain *ngtcp2_strm_streamfrq_top(const ngtcp2_strm *strm); /* * ngtcp2_strm_streamfrq_empty returns nonzero if streamfrq is empty. */ -int ngtcp2_strm_streamfrq_empty(ngtcp2_strm *strm); +int ngtcp2_strm_streamfrq_empty(const ngtcp2_strm *strm); /* * ngtcp2_strm_streamfrq_clear removes all frames from streamfrq. @@ -285,26 +290,26 @@ void ngtcp2_strm_streamfrq_clear(ngtcp2_strm *strm); /* * ngtcp2_strm_is_tx_queued returns nonzero if |strm| is queued. */ -int ngtcp2_strm_is_tx_queued(ngtcp2_strm *strm); +int ngtcp2_strm_is_tx_queued(const ngtcp2_strm *strm); /* * ngtcp2_strm_is_all_tx_data_acked returns nonzero if all outgoing * data for |strm| which have sent so far have been acknowledged. */ -int ngtcp2_strm_is_all_tx_data_acked(ngtcp2_strm *strm); +int ngtcp2_strm_is_all_tx_data_acked(const ngtcp2_strm *strm); /* * ngtcp2_strm_is_all_tx_data_fin_acked behaves like * ngtcp2_strm_is_all_tx_data_acked, but it also requires that STREAM * frame with fin bit set is acknowledged. */ -int ngtcp2_strm_is_all_tx_data_fin_acked(ngtcp2_strm *strm); +int ngtcp2_strm_is_all_tx_data_fin_acked(const ngtcp2_strm *strm); /* * ngtcp2_strm_get_unacked_range_after returns the range that is not - * acknowledged yet and intersects or comes after |offset|. + * acknowledged yet and includes or comes after |offset|. */ -ngtcp2_range ngtcp2_strm_get_unacked_range_after(ngtcp2_strm *strm, +ngtcp2_range ngtcp2_strm_get_unacked_range_after(const ngtcp2_strm *strm, uint64_t offset); /* @@ -312,11 +317,11 @@ ngtcp2_range ngtcp2_strm_get_unacked_range_after(ngtcp2_strm *strm, * this offset have been acknowledged by a remote endpoint. It * returns 0 if no data is acknowledged. */ -uint64_t ngtcp2_strm_get_acked_offset(ngtcp2_strm *strm); +uint64_t ngtcp2_strm_get_acked_offset(const ngtcp2_strm *strm); /* - * ngtcp2_strm_ack_data tells |strm| that the data [offset, - * offset+len) is acknowledged by a remote endpoint. + * ngtcp2_strm_ack_data tells |strm| that the data [|offset|, |offset| + * + |len|) is acknowledged by a remote endpoint. */ int ngtcp2_strm_ack_data(ngtcp2_strm *strm, uint64_t offset, uint64_t len); @@ -331,26 +336,26 @@ void ngtcp2_strm_set_app_error_code(ngtcp2_strm *strm, uint64_t app_error_code); * ngtcp2_strm_require_retransmit_reset_stream returns nonzero if * RESET_STREAM frame should be retransmitted. */ -int ngtcp2_strm_require_retransmit_reset_stream(ngtcp2_strm *strm); +int ngtcp2_strm_require_retransmit_reset_stream(const ngtcp2_strm *strm); /* * ngtcp2_strm_require_retransmit_stop_sending returns nonzero if * STOP_SENDING frame should be retransmitted. */ -int ngtcp2_strm_require_retransmit_stop_sending(ngtcp2_strm *strm); +int ngtcp2_strm_require_retransmit_stop_sending(const ngtcp2_strm *strm); /* * ngtcp2_strm_require_retransmit_max_stream_data returns nonzero if * MAX_STREAM_DATA frame should be retransmitted. */ -int ngtcp2_strm_require_retransmit_max_stream_data(ngtcp2_strm *strm, - ngtcp2_max_stream_data *fr); +int ngtcp2_strm_require_retransmit_max_stream_data( + const ngtcp2_strm *strm, const ngtcp2_max_stream_data *fr); /* * ngtcp2_strm_require_retransmit_stream_data_blocked returns nonzero * if STREAM_DATA_BLOCKED frame frame should be retransmitted. */ int ngtcp2_strm_require_retransmit_stream_data_blocked( - ngtcp2_strm *strm, ngtcp2_stream_data_blocked *fr); + const ngtcp2_strm *strm, const ngtcp2_stream_data_blocked *fr); -#endif /* NGTCP2_STRM_H */ +#endif /* !defined(NGTCP2_STRM_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_transport_params.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_transport_params.c new file mode 100644 index 00000000000000..dda59c48858185 --- /dev/null +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_transport_params.c @@ -0,0 +1,886 @@ +/* + * ngtcp2 + * + * Copyright (c) 2023 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_transport_params.h" + +#include +#include + +#include "ngtcp2_conv.h" +#include "ngtcp2_str.h" +#include "ngtcp2_mem.h" +#include "ngtcp2_unreachable.h" + +void ngtcp2_transport_params_default_versioned( + int transport_params_version, ngtcp2_transport_params *params) { + size_t len; + + switch (transport_params_version) { + case NGTCP2_TRANSPORT_PARAMS_VERSION: + len = sizeof(*params); + + break; + default: + ngtcp2_unreachable(); + } + + memset(params, 0, len); + + switch (transport_params_version) { + case NGTCP2_TRANSPORT_PARAMS_VERSION: + params->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE; + params->active_connection_id_limit = + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT; + params->ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT; + params->max_ack_delay = NGTCP2_DEFAULT_MAX_ACK_DELAY; + + break; + } +} + +/* + * varint_paramlen returns the length of a single transport parameter + * which has variable integer in its parameter. + */ +static size_t varint_paramlen(ngtcp2_transport_param_id id, uint64_t param) { + size_t valuelen = ngtcp2_put_uvarintlen(param); + return ngtcp2_put_uvarintlen(id) + ngtcp2_put_uvarintlen(valuelen) + valuelen; +} + +/* + * write_varint_param writes parameter |id| of the given |value| in + * varint encoding. It returns p + the number of bytes written. + */ +static uint8_t *write_varint_param(uint8_t *p, ngtcp2_transport_param_id id, + uint64_t value) { + p = ngtcp2_put_uvarint(p, id); + p = ngtcp2_put_uvarint(p, ngtcp2_put_uvarintlen(value)); + return ngtcp2_put_uvarint(p, value); +} + +/* + * zero_paramlen returns the length of a single transport parameter + * which has zero length value in its parameter. + */ +static size_t zero_paramlen(ngtcp2_transport_param_id id) { + return ngtcp2_put_uvarintlen(id) + 1; +} + +/* + * write_zero_param writes parameter |id| that has zero length value. + * It returns p + the number of bytes written. + */ +static uint8_t *write_zero_param(uint8_t *p, ngtcp2_transport_param_id id) { + p = ngtcp2_put_uvarint(p, id); + *p++ = 0; + + return p; +} + +/* + * cid_paramlen returns the length of a single transport parameter + * which has |cid| as value. + */ +static size_t cid_paramlen(ngtcp2_transport_param_id id, + const ngtcp2_cid *cid) { + return ngtcp2_put_uvarintlen(id) + ngtcp2_put_uvarintlen(cid->datalen) + + cid->datalen; +} + +/* + * write_cid_param writes parameter |id| of the given |cid|. It + * returns p + the number of bytes written. + */ +static uint8_t *write_cid_param(uint8_t *p, ngtcp2_transport_param_id id, + const ngtcp2_cid *cid) { + assert(cid->datalen == 0 || cid->datalen >= NGTCP2_MIN_CIDLEN); + assert(cid->datalen <= NGTCP2_MAX_CIDLEN); + + p = ngtcp2_put_uvarint(p, id); + p = ngtcp2_put_uvarint(p, cid->datalen); + if (cid->datalen) { + p = ngtcp2_cpymem(p, cid->data, cid->datalen); + } + return p; +} + +static const uint8_t empty_address[16]; + +ngtcp2_ssize ngtcp2_transport_params_encode_versioned( + uint8_t *dest, size_t destlen, int transport_params_version, + const ngtcp2_transport_params *params) { + uint8_t *p; + size_t len = 0; + /* For some reason, gcc 7.3.0 requires this initialization. */ + size_t preferred_addrlen = 0; + size_t version_infolen = 0; + const ngtcp2_sockaddr_in *sa_in; + const ngtcp2_sockaddr_in6 *sa_in6; + ngtcp2_transport_params paramsbuf; + + params = ngtcp2_transport_params_convert_to_latest( + ¶msbuf, transport_params_version, params); + + if (params->original_dcid_present) { + len += + cid_paramlen(NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID, + ¶ms->original_dcid); + } + + if (params->stateless_reset_token_present) { + len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN) + + ngtcp2_put_uvarintlen(NGTCP2_STATELESS_RESET_TOKENLEN) + + NGTCP2_STATELESS_RESET_TOKENLEN; + } + + if (params->preferred_addr_present) { + assert(params->preferred_addr.cid.datalen >= NGTCP2_MIN_CIDLEN); + assert(params->preferred_addr.cid.datalen <= NGTCP2_MAX_CIDLEN); + preferred_addrlen = 4 /* ipv4Address */ + 2 /* ipv4Port */ + + 16 /* ipv6Address */ + 2 /* ipv6Port */ + + 1 + params->preferred_addr.cid.datalen /* CID */ + + NGTCP2_STATELESS_RESET_TOKENLEN; + len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS) + + ngtcp2_put_uvarintlen(preferred_addrlen) + preferred_addrlen; + } + if (params->retry_scid_present) { + len += cid_paramlen(NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID, + ¶ms->retry_scid); + } + + if (params->initial_scid_present) { + len += cid_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID, + ¶ms->initial_scid); + } + + if (params->initial_max_stream_data_bidi_local) { + len += + varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL, + params->initial_max_stream_data_bidi_local); + } + if (params->initial_max_stream_data_bidi_remote) { + len += varint_paramlen( + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE, + params->initial_max_stream_data_bidi_remote); + } + if (params->initial_max_stream_data_uni) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI, + params->initial_max_stream_data_uni); + } + if (params->initial_max_data) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA, + params->initial_max_data); + } + if (params->initial_max_streams_bidi) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI, + params->initial_max_streams_bidi); + } + if (params->initial_max_streams_uni) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI, + params->initial_max_streams_uni); + } + if (params->max_udp_payload_size != + NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE, + params->max_udp_payload_size); + } + if (params->ack_delay_exponent != NGTCP2_DEFAULT_ACK_DELAY_EXPONENT) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT, + params->ack_delay_exponent); + } + if (params->disable_active_migration) { + len += zero_paramlen(NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION); + } + if (params->max_ack_delay != NGTCP2_DEFAULT_MAX_ACK_DELAY) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY, + params->max_ack_delay / NGTCP2_MILLISECONDS); + } + if (params->max_idle_timeout) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT, + params->max_idle_timeout / NGTCP2_MILLISECONDS); + } + if (params->active_connection_id_limit && + params->active_connection_id_limit != + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT, + params->active_connection_id_limit); + } + if (params->max_datagram_frame_size) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE, + params->max_datagram_frame_size); + } + if (params->grease_quic_bit) { + len += zero_paramlen(NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT); + } + if (params->version_info_present) { + version_infolen = + sizeof(uint32_t) + params->version_info.available_versionslen; + len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION) + + ngtcp2_put_uvarintlen(version_infolen) + version_infolen; + } + + if (dest == NULL && destlen == 0) { + return (ngtcp2_ssize)len; + } + + if (destlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = dest; + + if (params->original_dcid_present) { + p = write_cid_param( + p, NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID, + ¶ms->original_dcid); + } + + if (params->stateless_reset_token_present) { + p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN); + p = ngtcp2_put_uvarint(p, sizeof(params->stateless_reset_token)); + p = ngtcp2_cpymem(p, params->stateless_reset_token, + sizeof(params->stateless_reset_token)); + } + + if (params->preferred_addr_present) { + p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS); + p = ngtcp2_put_uvarint(p, preferred_addrlen); + + if (params->preferred_addr.ipv4_present) { + sa_in = ¶ms->preferred_addr.ipv4; + p = ngtcp2_cpymem(p, &sa_in->sin_addr, sizeof(sa_in->sin_addr)); + p = ngtcp2_put_uint16(p, sa_in->sin_port); + } else { + p = ngtcp2_cpymem(p, empty_address, sizeof(sa_in->sin_addr)); + p = ngtcp2_put_uint16(p, 0); + } + + if (params->preferred_addr.ipv6_present) { + sa_in6 = ¶ms->preferred_addr.ipv6; + p = ngtcp2_cpymem(p, &sa_in6->sin6_addr, sizeof(sa_in6->sin6_addr)); + p = ngtcp2_put_uint16(p, sa_in6->sin6_port); + } else { + p = ngtcp2_cpymem(p, empty_address, sizeof(sa_in6->sin6_addr)); + p = ngtcp2_put_uint16(p, 0); + } + + *p++ = (uint8_t)params->preferred_addr.cid.datalen; + if (params->preferred_addr.cid.datalen) { + p = ngtcp2_cpymem(p, params->preferred_addr.cid.data, + params->preferred_addr.cid.datalen); + } + p = ngtcp2_cpymem(p, params->preferred_addr.stateless_reset_token, + sizeof(params->preferred_addr.stateless_reset_token)); + } + + if (params->retry_scid_present) { + p = write_cid_param(p, NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID, + ¶ms->retry_scid); + } + + if (params->initial_scid_present) { + p = write_cid_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID, + ¶ms->initial_scid); + } + + if (params->initial_max_stream_data_bidi_local) { + p = write_varint_param( + p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL, + params->initial_max_stream_data_bidi_local); + } + + if (params->initial_max_stream_data_bidi_remote) { + p = write_varint_param( + p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE, + params->initial_max_stream_data_bidi_remote); + } + + if (params->initial_max_stream_data_uni) { + p = + write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI, + params->initial_max_stream_data_uni); + } + + if (params->initial_max_data) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA, + params->initial_max_data); + } + + if (params->initial_max_streams_bidi) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI, + params->initial_max_streams_bidi); + } + + if (params->initial_max_streams_uni) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI, + params->initial_max_streams_uni); + } + + if (params->max_udp_payload_size != + NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE, + params->max_udp_payload_size); + } + + if (params->ack_delay_exponent != NGTCP2_DEFAULT_ACK_DELAY_EXPONENT) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT, + params->ack_delay_exponent); + } + + if (params->disable_active_migration) { + p = write_zero_param(p, NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION); + } + + if (params->max_ack_delay != NGTCP2_DEFAULT_MAX_ACK_DELAY) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY, + params->max_ack_delay / NGTCP2_MILLISECONDS); + } + + if (params->max_idle_timeout) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT, + params->max_idle_timeout / NGTCP2_MILLISECONDS); + } + + if (params->active_connection_id_limit && + params->active_connection_id_limit != + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT, + params->active_connection_id_limit); + } + + if (params->max_datagram_frame_size) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE, + params->max_datagram_frame_size); + } + + if (params->grease_quic_bit) { + p = write_zero_param(p, NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT); + } + + if (params->version_info_present) { + p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION); + p = ngtcp2_put_uvarint(p, version_infolen); + p = ngtcp2_put_uint32be(p, params->version_info.chosen_version); + if (params->version_info.available_versionslen) { + p = ngtcp2_cpymem(p, params->version_info.available_versions, + params->version_info.available_versionslen); + } + } + + assert((size_t)(p - dest) == len); + + return (ngtcp2_ssize)len; +} + +/* + * decode_varint decodes a single varint from the buffer pointed by + * |*pp| of length |end - *pp|. If it decodes an integer + * successfully, it stores the integer in |*pdest|, increment |*pp| by + * the number of bytes read from |*pp|, and returns 0. Otherwise it + * returns -1. + */ +static int decode_varint(uint64_t *pdest, const uint8_t **pp, + const uint8_t *end) { + const uint8_t *p = *pp; + size_t len; + + if (p == end) { + return -1; + } + + len = ngtcp2_get_uvarintlen(p); + if ((uint64_t)(end - p) < len) { + return -1; + } + + *pp = ngtcp2_get_uvarint(pdest, p); + + return 0; +} + +/* + * decode_varint_param decodes length prefixed value from the buffer + * pointed by |*pp| of length |end - *pp|. The length and value are + * encoded in varint form. If it decodes a value successfully, it + * stores the value in |*pdest|, increment |*pp| by the number of + * bytes read from |*pp|, and returns 0. Otherwise it returns -1. + */ +static int decode_varint_param(uint64_t *pdest, const uint8_t **pp, + const uint8_t *end) { + const uint8_t *p = *pp; + uint64_t valuelen; + + if (decode_varint(&valuelen, &p, end) != 0) { + return -1; + } + + if (p == end) { + return -1; + } + + if ((uint64_t)(end - p) < valuelen) { + return -1; + } + + if (ngtcp2_get_uvarintlen(p) != valuelen) { + return -1; + } + + *pp = ngtcp2_get_uvarint(pdest, p); + + return 0; +} + +/* + * decode_zero_param decodes zero length value from the buffer pointed + * by |*pp| of length |end - *pp|. The length is encoded in varint + * form. If it decodes zero length value successfully, it increments + * |*pp| by 1, and returns 0. Otherwise it returns -1. + */ +static int decode_zero_param(const uint8_t **pp, const uint8_t *end) { + if (*pp == end || **pp != 0) { + return -1; + } + + ++*pp; + + return 0; +} + +/* + * decode_cid_param decodes length prefixed ngtcp2_cid from the buffer + * pointed by |*pp| of length |end - *pp|. The length is encoded in + * varint form. If it decodes a value successfully, it stores the + * value in |*pdest|, increment |*pp| by the number of read from + * |*pp|, and returns the number of bytes read. Otherwise it returns + * the one of the negative error code: + * + * NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM + * Could not decode Connection ID. + */ +static int decode_cid_param(ngtcp2_cid *pdest, const uint8_t **pp, + const uint8_t *end) { + const uint8_t *p = *pp; + uint64_t valuelen; + + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + if ((valuelen != 0 && valuelen < NGTCP2_MIN_CIDLEN) || + valuelen > NGTCP2_MAX_CIDLEN || (size_t)(end - p) < valuelen) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + ngtcp2_cid_init(pdest, p, (size_t)valuelen); + + p += valuelen; + + *pp = p; + + return 0; +} + +int ngtcp2_transport_params_decode_versioned(int transport_params_version, + ngtcp2_transport_params *dest, + const uint8_t *data, + size_t datalen) { + const uint8_t *p, *end, *lend; + size_t len; + uint64_t param_type; + uint64_t valuelen; + int rv; + ngtcp2_sockaddr_in *sa_in; + ngtcp2_sockaddr_in6 *sa_in6; + uint32_t version; + ngtcp2_transport_params *params, paramsbuf; + + if (transport_params_version == NGTCP2_TRANSPORT_PARAMS_VERSION) { + params = dest; + } else { + params = ¶msbuf; + } + + /* Set default values */ + memset(params, 0, sizeof(*params)); + params->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE; + params->ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT; + params->max_ack_delay = NGTCP2_DEFAULT_MAX_ACK_DELAY; + params->active_connection_id_limit = + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT; + + p = data; + end = data + datalen; + + for (; (size_t)(end - p) >= 2;) { + if (decode_varint(¶m_type, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + switch (param_type) { + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL: + if (decode_varint_param(¶ms->initial_max_stream_data_bidi_local, &p, + end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE: + if (decode_varint_param(¶ms->initial_max_stream_data_bidi_remote, &p, + end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI: + if (decode_varint_param(¶ms->initial_max_stream_data_uni, &p, end) != + 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA: + if (decode_varint_param(¶ms->initial_max_data, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI: + if (decode_varint_param(¶ms->initial_max_streams_bidi, &p, end) != + 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->initial_max_streams_bidi > NGTCP2_MAX_STREAMS) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI: + if (decode_varint_param(¶ms->initial_max_streams_uni, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->initial_max_streams_uni > NGTCP2_MAX_STREAMS) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT: + if (decode_varint_param(¶ms->max_idle_timeout, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + params->max_idle_timeout *= NGTCP2_MILLISECONDS; + break; + case NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE: + if (decode_varint_param(¶ms->max_udp_payload_size, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN: + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)valuelen != sizeof(params->stateless_reset_token)) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)(end - p) < sizeof(params->stateless_reset_token)) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + p = ngtcp2_get_bytes(params->stateless_reset_token, p, + sizeof(params->stateless_reset_token)); + params->stateless_reset_token_present = 1; + + break; + case NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT: + if (decode_varint_param(¶ms->ack_delay_exponent, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->ack_delay_exponent > 20) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS: + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)(end - p) < valuelen) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + len = 4 /* ipv4Address */ + 2 /* ipv4Port */ + 16 /* ipv6Address */ + + 2 /* ipv6Port */ + + 1 /* cid length */ + NGTCP2_STATELESS_RESET_TOKENLEN; + if (valuelen < len) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + sa_in = ¶ms->preferred_addr.ipv4; + + p = ngtcp2_get_bytes(&sa_in->sin_addr, p, sizeof(sa_in->sin_addr)); + p = ngtcp2_get_uint16(&sa_in->sin_port, p); + + if (sa_in->sin_port || memcmp(empty_address, &sa_in->sin_addr, + sizeof(sa_in->sin_addr)) != 0) { + sa_in->sin_family = NGTCP2_AF_INET; + params->preferred_addr.ipv4_present = 1; + } + + sa_in6 = ¶ms->preferred_addr.ipv6; + + p = ngtcp2_get_bytes(&sa_in6->sin6_addr, p, sizeof(sa_in6->sin6_addr)); + p = ngtcp2_get_uint16(&sa_in6->sin6_port, p); + + if (sa_in6->sin6_port || memcmp(empty_address, &sa_in6->sin6_addr, + sizeof(sa_in6->sin6_addr)) != 0) { + sa_in6->sin6_family = NGTCP2_AF_INET6; + params->preferred_addr.ipv6_present = 1; + } + + /* cid */ + params->preferred_addr.cid.datalen = *p++; + len += params->preferred_addr.cid.datalen; + if (valuelen != len || + params->preferred_addr.cid.datalen > NGTCP2_MAX_CIDLEN || + params->preferred_addr.cid.datalen < NGTCP2_MIN_CIDLEN) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->preferred_addr.cid.datalen) { + p = ngtcp2_get_bytes(params->preferred_addr.cid.data, p, + params->preferred_addr.cid.datalen); + } + + /* stateless reset token */ + p = + ngtcp2_get_bytes(params->preferred_addr.stateless_reset_token, p, + sizeof(params->preferred_addr.stateless_reset_token)); + params->preferred_addr_present = 1; + break; + case NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION: + if (decode_zero_param(&p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + params->disable_active_migration = 1; + break; + case NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID: + rv = decode_cid_param(¶ms->original_dcid, &p, end); + if (rv != 0) { + return rv; + } + params->original_dcid_present = 1; + break; + case NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID: + rv = decode_cid_param(¶ms->retry_scid, &p, end); + if (rv != 0) { + return rv; + } + params->retry_scid_present = 1; + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID: + rv = decode_cid_param(¶ms->initial_scid, &p, end); + if (rv != 0) { + return rv; + } + params->initial_scid_present = 1; + break; + case NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY: + if (decode_varint_param(¶ms->max_ack_delay, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->max_ack_delay >= 16384) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + params->max_ack_delay *= NGTCP2_MILLISECONDS; + break; + case NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT: + if (decode_varint_param(¶ms->active_connection_id_limit, &p, end) != + 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE: + if (decode_varint_param(¶ms->max_datagram_frame_size, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT: + if (decode_zero_param(&p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + params->grease_quic_bit = 1; + break; + case NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION: + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)(end - p) < valuelen) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (valuelen < sizeof(uint32_t) || (valuelen & 0x3)) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + p = ngtcp2_get_uint32be(¶ms->version_info.chosen_version, p); + if (params->version_info.chosen_version == 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (valuelen > sizeof(uint32_t)) { + params->version_info.available_versions = (uint8_t *)p; + params->version_info.available_versionslen = + (size_t)valuelen - sizeof(uint32_t); + + for (lend = p + (valuelen - sizeof(uint32_t)); p != lend;) { + p = ngtcp2_get_uint32be(&version, p); + if (version == 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + } + } + params->version_info_present = 1; + break; + default: + /* Ignore unknown parameter */ + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)(end - p) < valuelen) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + p += valuelen; + break; + } + } + + if (end - p != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + if (transport_params_version != NGTCP2_TRANSPORT_PARAMS_VERSION) { + ngtcp2_transport_params_convert_to_old(transport_params_version, dest, + params); + } + + return 0; +} + +static int transport_params_copy_new(ngtcp2_transport_params **pdest, + const ngtcp2_transport_params *src, + const ngtcp2_mem *mem) { + size_t len = sizeof(**pdest); + ngtcp2_transport_params *dest; + uint8_t *p; + + if (src->version_info_present) { + len += src->version_info.available_versionslen; + } + + dest = ngtcp2_mem_malloc(mem, len); + if (dest == NULL) { + return NGTCP2_ERR_NOMEM; + } + + *dest = *src; + + if (src->version_info_present && src->version_info.available_versionslen) { + p = (uint8_t *)dest + sizeof(*dest); + memcpy(p, src->version_info.available_versions, + src->version_info.available_versionslen); + dest->version_info.available_versions = p; + } + + *pdest = dest; + + return 0; +} + +int ngtcp2_transport_params_decode_new(ngtcp2_transport_params **pparams, + const uint8_t *data, size_t datalen, + const ngtcp2_mem *mem) { + int rv; + ngtcp2_transport_params params; + + rv = ngtcp2_transport_params_decode(¶ms, data, datalen); + if (rv < 0) { + return rv; + } + + if (mem == NULL) { + mem = ngtcp2_mem_default(); + } + + return transport_params_copy_new(pparams, ¶ms, mem); +} + +void ngtcp2_transport_params_del(ngtcp2_transport_params *params, + const ngtcp2_mem *mem) { + if (params == NULL) { + return; + } + + if (mem == NULL) { + mem = ngtcp2_mem_default(); + } + + ngtcp2_mem_free(mem, params); +} + +int ngtcp2_transport_params_copy_new(ngtcp2_transport_params **pdest, + const ngtcp2_transport_params *src, + const ngtcp2_mem *mem) { + if (src == NULL) { + *pdest = NULL; + return 0; + } + + return transport_params_copy_new(pdest, src, mem); +} + +static void transport_params_copy(ngtcp2_transport_params *dest, + const ngtcp2_transport_params *src, + int transport_params_version) { + assert(transport_params_version != NGTCP2_TRANSPORT_PARAMS_VERSION); + + switch (transport_params_version) { + case NGTCP2_TRANSPORT_PARAMS_V1: + memcpy(dest, src, + offsetof(ngtcp2_transport_params, version_info_present) + + sizeof(src->version_info_present)); + + break; + } +} + +const ngtcp2_transport_params * +ngtcp2_transport_params_convert_to_latest(ngtcp2_transport_params *dest, + int transport_params_version, + const ngtcp2_transport_params *src) { + if (transport_params_version == NGTCP2_TRANSPORT_PARAMS_VERSION) { + return src; + } + + ngtcp2_transport_params_default(dest); + + transport_params_copy(dest, src, transport_params_version); + + return dest; +} + +void ngtcp2_transport_params_convert_to_old( + int transport_params_version, ngtcp2_transport_params *dest, + const ngtcp2_transport_params *src) { + assert(transport_params_version != NGTCP2_TRANSPORT_PARAMS_VERSION); + + transport_params_copy(dest, src, transport_params_version); +} diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conversion.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_transport_params.h similarity index 55% rename from deps/ngtcp2/ngtcp2/lib/ngtcp2_conversion.h rename to deps/ngtcp2/ngtcp2/lib/ngtcp2_transport_params.h index 3457a8f2053aba..c077f06a9dd717 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_conversion.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_transport_params.h @@ -22,15 +22,62 @@ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef NGTCP2_CONVERSION_H -#define NGTCP2_CONVERSION_H +#ifndef NGTCP2_TRANSPORT_PARAMS_H +#define NGTCP2_TRANSPORT_PARAMS_H #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include +/* ngtcp2_transport_param_id is the registry of QUIC transport + parameter ID. */ +typedef uint64_t ngtcp2_transport_param_id; + +#define NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID 0x00 +#define NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT 0x01 +#define NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN 0x02 +#define NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE 0x03 +#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA 0x04 +#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL 0x05 +#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE 0x06 +#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI 0x07 +#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI 0x08 +#define NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI 0x09 +#define NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT 0x0a +#define NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY 0x0b +#define NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION 0x0c +#define NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS 0x0d +#define NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT 0x0e +#define NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID 0x0f +#define NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID 0x10 +/* https://datatracker.ietf.org/doc/html/rfc9221 */ +#define NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE 0x20 +#define NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT 0x2ab2 +/* https://datatracker.ietf.org/doc/html/rfc9368 */ +#define NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION 0x11 + +/* NGTCP2_MAX_STREAMS is the maximum number of streams. */ +#define NGTCP2_MAX_STREAMS (1LL << 60) + +/* + * ngtcp2_transport_params_copy_new makes a copy of |src|, and assigns + * it to |*pdest|. If |src| is NULL, NULL is assigned to |*pdest|. + * + * Caller is responsible to call ngtcp2_transport_params_del to free + * the memory assigned to |*pdest|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_transport_params_copy_new(ngtcp2_transport_params **pdest, + const ngtcp2_transport_params *src, + const ngtcp2_mem *mem); + /* * ngtcp2_transport_params_convert_to_latest converts |src| of version * |transport_params_version| to the latest version @@ -68,4 +115,4 @@ void ngtcp2_transport_params_convert_to_old(int transport_params_version, ngtcp2_transport_params *dest, const ngtcp2_transport_params *src); -#endif /* NGTCP2_CONVERSION_H */ +#endif /* !defined(NGTCP2_TRANSPORT_PARAMS_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_tstamp.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_tstamp.h index 9a210a320dc1ca..2b1bb51d87edc6 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_tstamp.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_tstamp.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -65,4 +65,4 @@ static inline int ngtcp2_tstamp_not_elapsed(ngtcp2_tstamp base, return base != UINT64_MAX && (base >= UINT64_MAX - d || base + d > ts); } -#endif /* NGTCP2_TSTAMP_H */ +#endif /* !defined(NGTCP2_TSTAMP_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_unreachable.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_unreachable.c index 7c7d9ae78e914d..5ab1db72cc5384 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_unreachable.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_unreachable.c @@ -26,20 +26,22 @@ #include #include +#include #ifdef HAVE_UNISTD_H +# define NGTCP2_UNREACHABLE_LOG # include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef WIN32 +#elif defined(WIN32) +# define NGTCP2_UNREACHABLE_LOG # include -#endif /* WIN32 */ +#endif /* defined(WIN32) */ void ngtcp2_unreachable_fail(const char *file, int line, const char *func) { +#ifdef NGTCP2_UNREACHABLE_LOG char *buf; size_t buflen; int rv; -#define NGTCP2_UNREACHABLE_TEMPLATE "%s:%d %s: Unreachable.\n" +# define NGTCP2_UNREACHABLE_TEMPLATE "%s:%d %s: Unreachable.\n" rv = snprintf(NULL, 0, NGTCP2_UNREACHABLE_TEMPLATE, file, line, func); if (rv < 0) { @@ -58,14 +60,15 @@ void ngtcp2_unreachable_fail(const char *file, int line, const char *func) { abort(); } -#ifndef WIN32 +# ifndef WIN32 while (write(STDERR_FILENO, buf, (size_t)rv) == -1 && errno == EINTR) ; -#else /* WIN32 */ +# else /* defined(WIN32) */ _write(_fileno(stderr), buf, (unsigned int)rv); -#endif /* WIN32 */ +# endif /* defined(WIN32) */ free(buf); +#endif /* defined(NGTCP2_UNREACHABLE_LOG) */ abort(); } diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_unreachable.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_unreachable.h index a5276fd505463f..ca7123865279f9 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_unreachable.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_unreachable.h @@ -27,26 +27,26 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include #ifdef __FILE_NAME__ # define NGTCP2_FILE_NAME __FILE_NAME__ -#else /* !__FILE_NAME__ */ +#else /* !defined(__FILE_NAME__) */ # define NGTCP2_FILE_NAME "(file)" -#endif /* !__FILE_NAME__ */ +#endif /* !defined(__FILE_NAME__) */ #define ngtcp2_unreachable() \ ngtcp2_unreachable_fail(NGTCP2_FILE_NAME, __LINE__, __func__) #ifdef _MSC_VER __declspec(noreturn) -#endif /* _MSC_VER */ +#endif /* defined(_MSC_VER) */ void ngtcp2_unreachable_fail(const char *file, int line, const char *func) #ifndef _MSC_VER __attribute__((noreturn)) -#endif /* !_MSC_VER */ +#endif /* !defined(_MSC_VER) */ ; -#endif /* NGTCP2_UNREACHABLE_H */ +#endif /* !defined(NGTCP2_UNREACHABLE_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_vec.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_vec.c index dbc7b668042695..0b9c92d47d7d88 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_vec.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_vec.c @@ -50,6 +50,7 @@ int ngtcp2_vec_new(ngtcp2_vec **pvec, const uint8_t *data, size_t datalen, p = (uint8_t *)(*pvec) + sizeof(ngtcp2_vec); (*pvec)->base = p; (*pvec)->len = datalen; + if (datalen) { /* p = */ ngtcp2_cpymem(p, data, datalen); } @@ -89,8 +90,8 @@ int64_t ngtcp2_vec_len_varint(const ngtcp2_vec *vec, size_t n) { return (int64_t)res; } -ngtcp2_ssize ngtcp2_vec_split(ngtcp2_vec *src, size_t *psrccnt, ngtcp2_vec *dst, - size_t *pdstcnt, size_t left, size_t maxcnt) { +ngtcp2_ssize ngtcp2_vec_split(ngtcp2_vec *dst, size_t *pdstcnt, ngtcp2_vec *src, + size_t *psrccnt, size_t left, size_t maxcnt) { size_t i; size_t srccnt = *psrccnt; size_t nmove; @@ -203,6 +204,7 @@ size_t ngtcp2_vec_merge(ngtcp2_vec *dst, size_t *pdstcnt, ngtcp2_vec *src, } else { dst[(*pdstcnt)++] = *b; } + left -= b->len; } @@ -222,11 +224,14 @@ size_t ngtcp2_vec_copy_at_most(ngtcp2_vec *dst, size_t dstcnt, ++i; continue; } + dst[j] = src[i]; + if (dst[j].len > left) { dst[j].len = left; return j + 1; } + left -= dst[j].len; ++i; ++j; diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_vec.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_vec.h index a39c4392fd2627..f7611efcb7d6ce 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_vec.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_vec.h @@ -27,7 +27,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -74,19 +74,21 @@ uint64_t ngtcp2_vec_len(const ngtcp2_vec *vec, size_t n); int64_t ngtcp2_vec_len_varint(const ngtcp2_vec *vec, size_t n); /* - * ngtcp2_vec_split splits |src| to |dst| so that the sum of the - * length in |src| does not exceed |left| bytes. The |maxcnt| is the - * maximum number of elements which |dst| array can contain. The - * caller must set |*psrccnt| to the number of elements of |src|. - * Similarly, the caller must set |*pdstcnt| to the number of elements - * of |dst|. The split does not necessarily occur at the boundary of - * ngtcp2_vec object. After split has done, this function updates - * |*psrccnt| and |*pdstcnt|. This function returns the number of - * bytes moved from |src| to |dst|. If split cannot be made because - * doing so exceeds |maxcnt|, this function returns -1. + * ngtcp2_vec_split splits |src| at the data position where + * ngtcp2_vec_len(|src|) does not exceed |left| bytes. The removed + * vectors are moved to |dst|. The existing elements in |dst| are + * moved forward to make up a space. The |maxcnt| is the maximum + * number of elements which |dst| array can contain. The caller must + * set |*psrccnt| to the number of elements of |src|. Similarly, the + * caller must set |*pdstcnt| to the number of elements of |dst|. The + * split does not necessarily occur at the boundary of ngtcp2_vec + * object. After split has done, this function updates |*psrccnt| and + * |*pdstcnt|. This function returns the number of bytes moved from + * |src| to |dst|. If split cannot be made because doing so exceeds + * |maxcnt|, this function returns -1. */ -ngtcp2_ssize ngtcp2_vec_split(ngtcp2_vec *src, size_t *psrccnt, ngtcp2_vec *dst, - size_t *pdstcnt, size_t left, size_t maxcnt); +ngtcp2_ssize ngtcp2_vec_split(ngtcp2_vec *dst, size_t *pdstcnt, ngtcp2_vec *src, + size_t *psrccnt, size_t left, size_t maxcnt); /* * ngtcp2_vec_merge merges |src| into |dst| by moving at most |left| @@ -117,4 +119,4 @@ size_t ngtcp2_vec_copy_at_most(ngtcp2_vec *dst, size_t dstcnt, */ void ngtcp2_vec_copy(ngtcp2_vec *dst, const ngtcp2_vec *src, size_t cnt); -#endif /* NGTCP2_VEC_H */ +#endif /* !defined(NGTCP2_VEC_H) */ diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_version.c b/deps/ngtcp2/ngtcp2/lib/ngtcp2_version.c index b31162c3ebe0d7..d2557e9ef8a580 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_version.c +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_version.c @@ -24,7 +24,7 @@ */ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include diff --git a/deps/ngtcp2/ngtcp2/lib/ngtcp2_window_filter.h b/deps/ngtcp2/ngtcp2/lib/ngtcp2_window_filter.h index 50415f10b8c37b..c90a9fdb9078da 100644 --- a/deps/ngtcp2/ngtcp2/lib/ngtcp2_window_filter.h +++ b/deps/ngtcp2/ngtcp2/lib/ngtcp2_window_filter.h @@ -37,7 +37,7 @@ #ifdef HAVE_CONFIG_H # include -#endif /* HAVE_CONFIG_H */ +#endif /* defined(HAVE_CONFIG_H) */ #include @@ -62,4 +62,4 @@ void ngtcp2_window_filter_reset(ngtcp2_window_filter *wf, uint64_t new_sample, uint64_t ngtcp2_window_filter_get_best(ngtcp2_window_filter *wf); -#endif /* NGTCP2_WINDOW_FILTER_H */ +#endif /* !defined(NGTCP2_WINDOW_FILTER_H) */ diff --git a/deps/npm/docs/content/commands/npm-ls.md b/deps/npm/docs/content/commands/npm-ls.md index 3abedf9cd8e4e6..69bc86f85f3508 100644 --- a/deps/npm/docs/content/commands/npm-ls.md +++ b/deps/npm/docs/content/commands/npm-ls.md @@ -27,7 +27,7 @@ packages will *also* show the paths to the specified packages. For example, running `npm ls promzard` in npm's source tree will show: ```bash -npm@10.9.0 /path/to/npm +npm@10.9.2 /path/to/npm └─┬ init-package-json@0.0.4 └── promzard@0.1.5 ``` diff --git a/deps/npm/docs/content/commands/npm.md b/deps/npm/docs/content/commands/npm.md index d92892ec544eb9..029b9fa7631544 100644 --- a/deps/npm/docs/content/commands/npm.md +++ b/deps/npm/docs/content/commands/npm.md @@ -14,7 +14,7 @@ Note: This command is unaware of workspaces. ### Version -10.9.0 +10.9.2 ### Description diff --git a/deps/npm/docs/output/commands/npm-access.html b/deps/npm/docs/output/commands/npm-access.html index 0a839663fcb016..9ce3c00e3525d8 100644 --- a/deps/npm/docs/output/commands/npm-access.html +++ b/deps/npm/docs/output/commands/npm-access.html @@ -141,9 +141,9 @@
-

+

npm-access - @10.9.0 + @10.9.2

Set access level on published packages
diff --git a/deps/npm/docs/output/commands/npm-adduser.html b/deps/npm/docs/output/commands/npm-adduser.html index 2ac615ad13978d..5f6627229d2c8b 100644 --- a/deps/npm/docs/output/commands/npm-adduser.html +++ b/deps/npm/docs/output/commands/npm-adduser.html @@ -141,9 +141,9 @@
-

+

npm-adduser - @10.9.0 + @10.9.2

Add a registry user account
diff --git a/deps/npm/docs/output/commands/npm-audit.html b/deps/npm/docs/output/commands/npm-audit.html index d7374ccfa66089..a8934d172e3602 100644 --- a/deps/npm/docs/output/commands/npm-audit.html +++ b/deps/npm/docs/output/commands/npm-audit.html @@ -141,9 +141,9 @@
-

+

npm-audit - @10.9.0 + @10.9.2

Run a security audit
diff --git a/deps/npm/docs/output/commands/npm-bugs.html b/deps/npm/docs/output/commands/npm-bugs.html index 9186bc100ae8f6..3ec74a05673259 100644 --- a/deps/npm/docs/output/commands/npm-bugs.html +++ b/deps/npm/docs/output/commands/npm-bugs.html @@ -141,9 +141,9 @@
-

+

npm-bugs - @10.9.0 + @10.9.2

Report bugs for a package in a web browser
diff --git a/deps/npm/docs/output/commands/npm-cache.html b/deps/npm/docs/output/commands/npm-cache.html index 55cec217dbdfb7..0a4fd00a276a47 100644 --- a/deps/npm/docs/output/commands/npm-cache.html +++ b/deps/npm/docs/output/commands/npm-cache.html @@ -141,9 +141,9 @@
-

+

npm-cache - @10.9.0 + @10.9.2

Manipulates packages cache
diff --git a/deps/npm/docs/output/commands/npm-ci.html b/deps/npm/docs/output/commands/npm-ci.html index efe858777565a9..3af67dc55c81db 100644 --- a/deps/npm/docs/output/commands/npm-ci.html +++ b/deps/npm/docs/output/commands/npm-ci.html @@ -141,9 +141,9 @@
-

+

npm-ci - @10.9.0 + @10.9.2

Clean install a project
diff --git a/deps/npm/docs/output/commands/npm-completion.html b/deps/npm/docs/output/commands/npm-completion.html index 4a91a94498f33b..3f8b509e4ec8d6 100644 --- a/deps/npm/docs/output/commands/npm-completion.html +++ b/deps/npm/docs/output/commands/npm-completion.html @@ -141,9 +141,9 @@
-

+

npm-completion - @10.9.0 + @10.9.2

Tab Completion for npm
diff --git a/deps/npm/docs/output/commands/npm-config.html b/deps/npm/docs/output/commands/npm-config.html index d18998fea8471d..c9b9a2c7550bcc 100644 --- a/deps/npm/docs/output/commands/npm-config.html +++ b/deps/npm/docs/output/commands/npm-config.html @@ -141,9 +141,9 @@
-

+

npm-config - @10.9.0 + @10.9.2

Manage the npm configuration files
diff --git a/deps/npm/docs/output/commands/npm-dedupe.html b/deps/npm/docs/output/commands/npm-dedupe.html index 194ea085383df3..0aa8bf5a5bde77 100644 --- a/deps/npm/docs/output/commands/npm-dedupe.html +++ b/deps/npm/docs/output/commands/npm-dedupe.html @@ -141,9 +141,9 @@
-

+

npm-dedupe - @10.9.0 + @10.9.2

Reduce duplication in the package tree
diff --git a/deps/npm/docs/output/commands/npm-deprecate.html b/deps/npm/docs/output/commands/npm-deprecate.html index ae40adfbbab051..0019583ee2135a 100644 --- a/deps/npm/docs/output/commands/npm-deprecate.html +++ b/deps/npm/docs/output/commands/npm-deprecate.html @@ -141,9 +141,9 @@
-

+

npm-deprecate - @10.9.0 + @10.9.2

Deprecate a version of a package
diff --git a/deps/npm/docs/output/commands/npm-diff.html b/deps/npm/docs/output/commands/npm-diff.html index 257b1c21572083..fe2123ee60fdc8 100644 --- a/deps/npm/docs/output/commands/npm-diff.html +++ b/deps/npm/docs/output/commands/npm-diff.html @@ -141,9 +141,9 @@
-

+

npm-diff - @10.9.0 + @10.9.2

The registry diff command
diff --git a/deps/npm/docs/output/commands/npm-dist-tag.html b/deps/npm/docs/output/commands/npm-dist-tag.html index 08e7770a0c1745..dce3f752ba4eaa 100644 --- a/deps/npm/docs/output/commands/npm-dist-tag.html +++ b/deps/npm/docs/output/commands/npm-dist-tag.html @@ -141,9 +141,9 @@
-

+

npm-dist-tag - @10.9.0 + @10.9.2

Modify package distribution tags
diff --git a/deps/npm/docs/output/commands/npm-docs.html b/deps/npm/docs/output/commands/npm-docs.html index 8b647251f36740..caef5afe3b1bd4 100644 --- a/deps/npm/docs/output/commands/npm-docs.html +++ b/deps/npm/docs/output/commands/npm-docs.html @@ -141,9 +141,9 @@
-

+

npm-docs - @10.9.0 + @10.9.2

Open documentation for a package in a web browser
diff --git a/deps/npm/docs/output/commands/npm-doctor.html b/deps/npm/docs/output/commands/npm-doctor.html index 110ec2f9b20bf0..d9f7a71450ab75 100644 --- a/deps/npm/docs/output/commands/npm-doctor.html +++ b/deps/npm/docs/output/commands/npm-doctor.html @@ -141,9 +141,9 @@
-

+

npm-doctor - @10.9.0 + @10.9.2

Check the health of your npm environment
diff --git a/deps/npm/docs/output/commands/npm-edit.html b/deps/npm/docs/output/commands/npm-edit.html index 9e4f7b361a5cb6..ab835ddcd352f9 100644 --- a/deps/npm/docs/output/commands/npm-edit.html +++ b/deps/npm/docs/output/commands/npm-edit.html @@ -141,9 +141,9 @@
-

+

npm-edit - @10.9.0 + @10.9.2

Edit an installed package
diff --git a/deps/npm/docs/output/commands/npm-exec.html b/deps/npm/docs/output/commands/npm-exec.html index 695fa35ab825c2..b5b8f66bdbb54f 100644 --- a/deps/npm/docs/output/commands/npm-exec.html +++ b/deps/npm/docs/output/commands/npm-exec.html @@ -141,9 +141,9 @@
-

+

npm-exec - @10.9.0 + @10.9.2

Run a command from a local or remote npm package
diff --git a/deps/npm/docs/output/commands/npm-explain.html b/deps/npm/docs/output/commands/npm-explain.html index e79255ff5fa6c6..812ce85f73a130 100644 --- a/deps/npm/docs/output/commands/npm-explain.html +++ b/deps/npm/docs/output/commands/npm-explain.html @@ -141,9 +141,9 @@
-

+

npm-explain - @10.9.0 + @10.9.2

Explain installed packages
diff --git a/deps/npm/docs/output/commands/npm-explore.html b/deps/npm/docs/output/commands/npm-explore.html index e296f4146aff98..20d761c0db149c 100644 --- a/deps/npm/docs/output/commands/npm-explore.html +++ b/deps/npm/docs/output/commands/npm-explore.html @@ -141,9 +141,9 @@
-

+

npm-explore - @10.9.0 + @10.9.2

Browse an installed package
diff --git a/deps/npm/docs/output/commands/npm-find-dupes.html b/deps/npm/docs/output/commands/npm-find-dupes.html index a8c914a0dd3d44..9b2810cf1b17ac 100644 --- a/deps/npm/docs/output/commands/npm-find-dupes.html +++ b/deps/npm/docs/output/commands/npm-find-dupes.html @@ -141,9 +141,9 @@
-

+

npm-find-dupes - @10.9.0 + @10.9.2

Find duplication in the package tree
diff --git a/deps/npm/docs/output/commands/npm-fund.html b/deps/npm/docs/output/commands/npm-fund.html index 36a63253439b60..91e0a71c3b32af 100644 --- a/deps/npm/docs/output/commands/npm-fund.html +++ b/deps/npm/docs/output/commands/npm-fund.html @@ -141,9 +141,9 @@
-

+

npm-fund - @10.9.0 + @10.9.2

Retrieve funding information
diff --git a/deps/npm/docs/output/commands/npm-help-search.html b/deps/npm/docs/output/commands/npm-help-search.html index 76dea45d852e75..35e2ec587c48e8 100644 --- a/deps/npm/docs/output/commands/npm-help-search.html +++ b/deps/npm/docs/output/commands/npm-help-search.html @@ -141,9 +141,9 @@
-

+

npm-help-search - @10.9.0 + @10.9.2

Search npm help documentation
diff --git a/deps/npm/docs/output/commands/npm-help.html b/deps/npm/docs/output/commands/npm-help.html index e6b14af2f9ec6c..17403f14c89427 100644 --- a/deps/npm/docs/output/commands/npm-help.html +++ b/deps/npm/docs/output/commands/npm-help.html @@ -141,9 +141,9 @@
-

+

npm-help - @10.9.0 + @10.9.2

Get help on npm
diff --git a/deps/npm/docs/output/commands/npm-hook.html b/deps/npm/docs/output/commands/npm-hook.html index 393700a9a7165a..d06784faf03624 100644 --- a/deps/npm/docs/output/commands/npm-hook.html +++ b/deps/npm/docs/output/commands/npm-hook.html @@ -141,9 +141,9 @@
-

+

npm-hook - @10.9.0 + @10.9.2

Manage registry hooks
diff --git a/deps/npm/docs/output/commands/npm-init.html b/deps/npm/docs/output/commands/npm-init.html index 8ff01b2f7a76d0..430763db6ba6af 100644 --- a/deps/npm/docs/output/commands/npm-init.html +++ b/deps/npm/docs/output/commands/npm-init.html @@ -141,9 +141,9 @@
-

+

npm-init - @10.9.0 + @10.9.2

Create a package.json file
diff --git a/deps/npm/docs/output/commands/npm-install-ci-test.html b/deps/npm/docs/output/commands/npm-install-ci-test.html index b0f9d237ed8e98..6a29d2d54f679e 100644 --- a/deps/npm/docs/output/commands/npm-install-ci-test.html +++ b/deps/npm/docs/output/commands/npm-install-ci-test.html @@ -141,9 +141,9 @@
-

+

npm-install-ci-test - @10.9.0 + @10.9.2

Install a project with a clean slate and run tests
diff --git a/deps/npm/docs/output/commands/npm-install-test.html b/deps/npm/docs/output/commands/npm-install-test.html index b0fbd63887fff5..32bd2271074fb6 100644 --- a/deps/npm/docs/output/commands/npm-install-test.html +++ b/deps/npm/docs/output/commands/npm-install-test.html @@ -141,9 +141,9 @@
-

+

npm-install-test - @10.9.0 + @10.9.2

Install package(s) and run tests
diff --git a/deps/npm/docs/output/commands/npm-install.html b/deps/npm/docs/output/commands/npm-install.html index fa57e02eaf9ad5..db7d717d18160b 100644 --- a/deps/npm/docs/output/commands/npm-install.html +++ b/deps/npm/docs/output/commands/npm-install.html @@ -141,9 +141,9 @@
-

+

npm-install - @10.9.0 + @10.9.2

Install a package
diff --git a/deps/npm/docs/output/commands/npm-link.html b/deps/npm/docs/output/commands/npm-link.html index 4e461ebefafd42..5778cc2a6268d2 100644 --- a/deps/npm/docs/output/commands/npm-link.html +++ b/deps/npm/docs/output/commands/npm-link.html @@ -141,9 +141,9 @@
-

+

npm-link - @10.9.0 + @10.9.2

Symlink a package folder
diff --git a/deps/npm/docs/output/commands/npm-login.html b/deps/npm/docs/output/commands/npm-login.html index 9c1584ca36bc41..81555fcecefd3e 100644 --- a/deps/npm/docs/output/commands/npm-login.html +++ b/deps/npm/docs/output/commands/npm-login.html @@ -141,9 +141,9 @@
-

+

npm-login - @10.9.0 + @10.9.2

Login to a registry user account
diff --git a/deps/npm/docs/output/commands/npm-logout.html b/deps/npm/docs/output/commands/npm-logout.html index 8908b329395254..1b6cdf8b923034 100644 --- a/deps/npm/docs/output/commands/npm-logout.html +++ b/deps/npm/docs/output/commands/npm-logout.html @@ -141,9 +141,9 @@
-

+

npm-logout - @10.9.0 + @10.9.2

Log out of the registry
diff --git a/deps/npm/docs/output/commands/npm-ls.html b/deps/npm/docs/output/commands/npm-ls.html index 2615a492a75e38..c9aa847abf1dd4 100644 --- a/deps/npm/docs/output/commands/npm-ls.html +++ b/deps/npm/docs/output/commands/npm-ls.html @@ -141,9 +141,9 @@
-

+

npm-ls - @10.9.0 + @10.9.2

List installed packages
@@ -168,7 +168,7 @@

Description

the results to only the paths to the packages named. Note that nested packages will also show the paths to the specified packages. For example, running npm ls promzard in npm's source tree will show:

-
npm@10.9.0 /path/to/npm
+
npm@10.9.2 /path/to/npm
 └─┬ init-package-json@0.0.4
   └── promzard@0.1.5
 
diff --git a/deps/npm/docs/output/commands/npm-org.html b/deps/npm/docs/output/commands/npm-org.html index 66d823f7f3c2de..6c45111f034994 100644 --- a/deps/npm/docs/output/commands/npm-org.html +++ b/deps/npm/docs/output/commands/npm-org.html @@ -141,9 +141,9 @@
-

+

npm-org - @10.9.0 + @10.9.2

Manage orgs
diff --git a/deps/npm/docs/output/commands/npm-outdated.html b/deps/npm/docs/output/commands/npm-outdated.html index 6002ff1818da2c..9be28fdfd239e1 100644 --- a/deps/npm/docs/output/commands/npm-outdated.html +++ b/deps/npm/docs/output/commands/npm-outdated.html @@ -141,9 +141,9 @@
-

+

npm-outdated - @10.9.0 + @10.9.2

Check for outdated packages
diff --git a/deps/npm/docs/output/commands/npm-owner.html b/deps/npm/docs/output/commands/npm-owner.html index e8b7a05274aa91..fc0899e5e5d66b 100644 --- a/deps/npm/docs/output/commands/npm-owner.html +++ b/deps/npm/docs/output/commands/npm-owner.html @@ -141,9 +141,9 @@
-

+

npm-owner - @10.9.0 + @10.9.2

Manage package owners
diff --git a/deps/npm/docs/output/commands/npm-pack.html b/deps/npm/docs/output/commands/npm-pack.html index 596ba9a35e3ac5..359d463780e510 100644 --- a/deps/npm/docs/output/commands/npm-pack.html +++ b/deps/npm/docs/output/commands/npm-pack.html @@ -141,9 +141,9 @@
-

+

npm-pack - @10.9.0 + @10.9.2

Create a tarball from a package
diff --git a/deps/npm/docs/output/commands/npm-ping.html b/deps/npm/docs/output/commands/npm-ping.html index de393dbd0b5d71..4b3fbbc698cbb7 100644 --- a/deps/npm/docs/output/commands/npm-ping.html +++ b/deps/npm/docs/output/commands/npm-ping.html @@ -141,9 +141,9 @@
-

+

npm-ping - @10.9.0 + @10.9.2

Ping npm registry
diff --git a/deps/npm/docs/output/commands/npm-pkg.html b/deps/npm/docs/output/commands/npm-pkg.html index ffb153cc440ce4..f1fe76e8073150 100644 --- a/deps/npm/docs/output/commands/npm-pkg.html +++ b/deps/npm/docs/output/commands/npm-pkg.html @@ -141,9 +141,9 @@
-

+

npm-pkg - @10.9.0 + @10.9.2

Manages your package.json
diff --git a/deps/npm/docs/output/commands/npm-prefix.html b/deps/npm/docs/output/commands/npm-prefix.html index da63644f2df42a..9708e55234ab4c 100644 --- a/deps/npm/docs/output/commands/npm-prefix.html +++ b/deps/npm/docs/output/commands/npm-prefix.html @@ -141,9 +141,9 @@
-

+

npm-prefix - @10.9.0 + @10.9.2

Display prefix
diff --git a/deps/npm/docs/output/commands/npm-profile.html b/deps/npm/docs/output/commands/npm-profile.html index c3679196a85bc4..6bb08b1fda63a6 100644 --- a/deps/npm/docs/output/commands/npm-profile.html +++ b/deps/npm/docs/output/commands/npm-profile.html @@ -141,9 +141,9 @@
-

+

npm-profile - @10.9.0 + @10.9.2

Change settings on your registry profile
diff --git a/deps/npm/docs/output/commands/npm-prune.html b/deps/npm/docs/output/commands/npm-prune.html index 4e844ab9f02cbb..818522d63754f9 100644 --- a/deps/npm/docs/output/commands/npm-prune.html +++ b/deps/npm/docs/output/commands/npm-prune.html @@ -141,9 +141,9 @@
-

+

npm-prune - @10.9.0 + @10.9.2

Remove extraneous packages
diff --git a/deps/npm/docs/output/commands/npm-publish.html b/deps/npm/docs/output/commands/npm-publish.html index b808cc29a15744..37c19e0b8f357c 100644 --- a/deps/npm/docs/output/commands/npm-publish.html +++ b/deps/npm/docs/output/commands/npm-publish.html @@ -141,9 +141,9 @@
-

+

npm-publish - @10.9.0 + @10.9.2

Publish a package
diff --git a/deps/npm/docs/output/commands/npm-query.html b/deps/npm/docs/output/commands/npm-query.html index d85a2f6c27ff79..4a0210e62f58ea 100644 --- a/deps/npm/docs/output/commands/npm-query.html +++ b/deps/npm/docs/output/commands/npm-query.html @@ -141,9 +141,9 @@
-

+

npm-query - @10.9.0 + @10.9.2

Dependency selector query
diff --git a/deps/npm/docs/output/commands/npm-rebuild.html b/deps/npm/docs/output/commands/npm-rebuild.html index ff28b35b86bced..0a356397c4a95b 100644 --- a/deps/npm/docs/output/commands/npm-rebuild.html +++ b/deps/npm/docs/output/commands/npm-rebuild.html @@ -141,9 +141,9 @@
-

+

npm-rebuild - @10.9.0 + @10.9.2

Rebuild a package
diff --git a/deps/npm/docs/output/commands/npm-repo.html b/deps/npm/docs/output/commands/npm-repo.html index 8ee7e9d6d2d074..ad86f81682375d 100644 --- a/deps/npm/docs/output/commands/npm-repo.html +++ b/deps/npm/docs/output/commands/npm-repo.html @@ -141,9 +141,9 @@
-

+

npm-repo - @10.9.0 + @10.9.2

Open package repository page in the browser
diff --git a/deps/npm/docs/output/commands/npm-restart.html b/deps/npm/docs/output/commands/npm-restart.html index e3171bf280910b..4a2244b048ecbc 100644 --- a/deps/npm/docs/output/commands/npm-restart.html +++ b/deps/npm/docs/output/commands/npm-restart.html @@ -141,9 +141,9 @@
-

+

npm-restart - @10.9.0 + @10.9.2

Restart a package
diff --git a/deps/npm/docs/output/commands/npm-root.html b/deps/npm/docs/output/commands/npm-root.html index 4e2e82e5bb259c..499bc84358b3e5 100644 --- a/deps/npm/docs/output/commands/npm-root.html +++ b/deps/npm/docs/output/commands/npm-root.html @@ -141,9 +141,9 @@
-

+

npm-root - @10.9.0 + @10.9.2

Display npm root
diff --git a/deps/npm/docs/output/commands/npm-run-script.html b/deps/npm/docs/output/commands/npm-run-script.html index 4673e733f49fcd..9c0ef4fedbc16e 100644 --- a/deps/npm/docs/output/commands/npm-run-script.html +++ b/deps/npm/docs/output/commands/npm-run-script.html @@ -141,9 +141,9 @@
-

+

npm-run-script - @10.9.0 + @10.9.2

Run arbitrary package scripts
diff --git a/deps/npm/docs/output/commands/npm-sbom.html b/deps/npm/docs/output/commands/npm-sbom.html index 00508eca6cd91a..b648df1654e8a6 100644 --- a/deps/npm/docs/output/commands/npm-sbom.html +++ b/deps/npm/docs/output/commands/npm-sbom.html @@ -141,9 +141,9 @@
-

+

npm-sbom - @10.9.0 + @10.9.2

Generate a Software Bill of Materials (SBOM)
diff --git a/deps/npm/docs/output/commands/npm-search.html b/deps/npm/docs/output/commands/npm-search.html index edf4e437e65400..bfd70c2a8abe92 100644 --- a/deps/npm/docs/output/commands/npm-search.html +++ b/deps/npm/docs/output/commands/npm-search.html @@ -141,9 +141,9 @@
-

+

npm-search - @10.9.0 + @10.9.2

Search for packages
diff --git a/deps/npm/docs/output/commands/npm-shrinkwrap.html b/deps/npm/docs/output/commands/npm-shrinkwrap.html index f225abaed9218f..60d198f85ce67e 100644 --- a/deps/npm/docs/output/commands/npm-shrinkwrap.html +++ b/deps/npm/docs/output/commands/npm-shrinkwrap.html @@ -141,9 +141,9 @@
-

+

npm-shrinkwrap - @10.9.0 + @10.9.2

Lock down dependency versions for publication
diff --git a/deps/npm/docs/output/commands/npm-star.html b/deps/npm/docs/output/commands/npm-star.html index 04b36628ad2176..ccda8bb3297d70 100644 --- a/deps/npm/docs/output/commands/npm-star.html +++ b/deps/npm/docs/output/commands/npm-star.html @@ -141,9 +141,9 @@
-

+

npm-star - @10.9.0 + @10.9.2

Mark your favorite packages
diff --git a/deps/npm/docs/output/commands/npm-stars.html b/deps/npm/docs/output/commands/npm-stars.html index 3183aa047f1f89..2f9619190f0122 100644 --- a/deps/npm/docs/output/commands/npm-stars.html +++ b/deps/npm/docs/output/commands/npm-stars.html @@ -141,9 +141,9 @@
-

+

npm-stars - @10.9.0 + @10.9.2

View packages marked as favorites
diff --git a/deps/npm/docs/output/commands/npm-start.html b/deps/npm/docs/output/commands/npm-start.html index b81caefe0a4431..fad3ce05c3a2c9 100644 --- a/deps/npm/docs/output/commands/npm-start.html +++ b/deps/npm/docs/output/commands/npm-start.html @@ -141,9 +141,9 @@
-

+

npm-start - @10.9.0 + @10.9.2

Start a package
diff --git a/deps/npm/docs/output/commands/npm-stop.html b/deps/npm/docs/output/commands/npm-stop.html index 85d4c782a736e7..bc70086d9991d5 100644 --- a/deps/npm/docs/output/commands/npm-stop.html +++ b/deps/npm/docs/output/commands/npm-stop.html @@ -141,9 +141,9 @@
-

+

npm-stop - @10.9.0 + @10.9.2

Stop a package
diff --git a/deps/npm/docs/output/commands/npm-team.html b/deps/npm/docs/output/commands/npm-team.html index 6ad869fa3bed9e..e03442dc68bf9c 100644 --- a/deps/npm/docs/output/commands/npm-team.html +++ b/deps/npm/docs/output/commands/npm-team.html @@ -141,9 +141,9 @@
-

+

npm-team - @10.9.0 + @10.9.2

Manage organization teams and team memberships
diff --git a/deps/npm/docs/output/commands/npm-test.html b/deps/npm/docs/output/commands/npm-test.html index bce16c92e4f087..b30af4ed9b3b30 100644 --- a/deps/npm/docs/output/commands/npm-test.html +++ b/deps/npm/docs/output/commands/npm-test.html @@ -141,9 +141,9 @@
-

+

npm-test - @10.9.0 + @10.9.2

Test a package
diff --git a/deps/npm/docs/output/commands/npm-token.html b/deps/npm/docs/output/commands/npm-token.html index 99fcafd9b8f53b..353709607b4bd4 100644 --- a/deps/npm/docs/output/commands/npm-token.html +++ b/deps/npm/docs/output/commands/npm-token.html @@ -141,9 +141,9 @@
-

+

npm-token - @10.9.0 + @10.9.2

Manage your authentication tokens
diff --git a/deps/npm/docs/output/commands/npm-uninstall.html b/deps/npm/docs/output/commands/npm-uninstall.html index dd8e7a2234604e..633dbbe58f933e 100644 --- a/deps/npm/docs/output/commands/npm-uninstall.html +++ b/deps/npm/docs/output/commands/npm-uninstall.html @@ -141,9 +141,9 @@
-

+

npm-uninstall - @10.9.0 + @10.9.2

Remove a package
diff --git a/deps/npm/docs/output/commands/npm-unpublish.html b/deps/npm/docs/output/commands/npm-unpublish.html index 56e2c59f62addb..e4f4090936d4ff 100644 --- a/deps/npm/docs/output/commands/npm-unpublish.html +++ b/deps/npm/docs/output/commands/npm-unpublish.html @@ -141,9 +141,9 @@
-

+

npm-unpublish - @10.9.0 + @10.9.2

Remove a package from the registry
diff --git a/deps/npm/docs/output/commands/npm-unstar.html b/deps/npm/docs/output/commands/npm-unstar.html index 710d8a947d665f..741ed8e707a4f9 100644 --- a/deps/npm/docs/output/commands/npm-unstar.html +++ b/deps/npm/docs/output/commands/npm-unstar.html @@ -141,9 +141,9 @@
-

+

npm-unstar - @10.9.0 + @10.9.2

Remove an item from your favorite packages
diff --git a/deps/npm/docs/output/commands/npm-update.html b/deps/npm/docs/output/commands/npm-update.html index e587ec92f8614a..287ed19fe9f5f6 100644 --- a/deps/npm/docs/output/commands/npm-update.html +++ b/deps/npm/docs/output/commands/npm-update.html @@ -141,9 +141,9 @@
-

+

npm-update - @10.9.0 + @10.9.2

Update packages
diff --git a/deps/npm/docs/output/commands/npm-version.html b/deps/npm/docs/output/commands/npm-version.html index 196a0236093673..43b978ffa94c80 100644 --- a/deps/npm/docs/output/commands/npm-version.html +++ b/deps/npm/docs/output/commands/npm-version.html @@ -141,9 +141,9 @@
-

+

npm-version - @10.9.0 + @10.9.2

Bump a package version
diff --git a/deps/npm/docs/output/commands/npm-view.html b/deps/npm/docs/output/commands/npm-view.html index 0eebee3037748d..1e388cfb922462 100644 --- a/deps/npm/docs/output/commands/npm-view.html +++ b/deps/npm/docs/output/commands/npm-view.html @@ -141,9 +141,9 @@
-

+

npm-view - @10.9.0 + @10.9.2

View registry info
diff --git a/deps/npm/docs/output/commands/npm-whoami.html b/deps/npm/docs/output/commands/npm-whoami.html index 0d1fa5ea9b87d7..944a762ad4aea1 100644 --- a/deps/npm/docs/output/commands/npm-whoami.html +++ b/deps/npm/docs/output/commands/npm-whoami.html @@ -141,9 +141,9 @@
-

+

npm-whoami - @10.9.0 + @10.9.2

Display npm username
diff --git a/deps/npm/docs/output/commands/npm.html b/deps/npm/docs/output/commands/npm.html index 005009c1dba277..eae0fdc7b20659 100644 --- a/deps/npm/docs/output/commands/npm.html +++ b/deps/npm/docs/output/commands/npm.html @@ -141,9 +141,9 @@
-

+

npm - @10.9.0 + @10.9.2

javascript package manager
@@ -158,7 +158,7 @@

Table of contents

Note: This command is unaware of workspaces.

Version

-

10.9.0

+

10.9.2

Description

npm is the package manager for the Node JavaScript platform. It puts modules in place so that node can find them, and manages dependency diff --git a/deps/npm/docs/output/commands/npx.html b/deps/npm/docs/output/commands/npx.html index d3239ed320bb70..ec5547306359f7 100644 --- a/deps/npm/docs/output/commands/npx.html +++ b/deps/npm/docs/output/commands/npx.html @@ -141,9 +141,9 @@

-

+

npx - @10.9.0 + @10.9.2

Run a command from a local or remote npm package
diff --git a/deps/npm/docs/output/configuring-npm/folders.html b/deps/npm/docs/output/configuring-npm/folders.html index f23aef6354f73d..daca0c019a942b 100644 --- a/deps/npm/docs/output/configuring-npm/folders.html +++ b/deps/npm/docs/output/configuring-npm/folders.html @@ -141,9 +141,9 @@
-

+

folders - @10.9.0 + @10.9.2

Folder Structures Used by npm
diff --git a/deps/npm/docs/output/configuring-npm/install.html b/deps/npm/docs/output/configuring-npm/install.html index cddbcaae2de8be..abda3bcc06ab62 100644 --- a/deps/npm/docs/output/configuring-npm/install.html +++ b/deps/npm/docs/output/configuring-npm/install.html @@ -141,9 +141,9 @@
-

+

install - @10.9.0 + @10.9.2

Download and install node and npm
diff --git a/deps/npm/docs/output/configuring-npm/npm-global.html b/deps/npm/docs/output/configuring-npm/npm-global.html index f23aef6354f73d..daca0c019a942b 100644 --- a/deps/npm/docs/output/configuring-npm/npm-global.html +++ b/deps/npm/docs/output/configuring-npm/npm-global.html @@ -141,9 +141,9 @@
-

+

folders - @10.9.0 + @10.9.2

Folder Structures Used by npm
diff --git a/deps/npm/docs/output/configuring-npm/npm-json.html b/deps/npm/docs/output/configuring-npm/npm-json.html index 0cd27f8d82fdd8..1645e91a762b8f 100644 --- a/deps/npm/docs/output/configuring-npm/npm-json.html +++ b/deps/npm/docs/output/configuring-npm/npm-json.html @@ -141,9 +141,9 @@
-

+

package.json - @10.9.0 + @10.9.2

Specifics of npm's package.json handling
diff --git a/deps/npm/docs/output/configuring-npm/npm-shrinkwrap-json.html b/deps/npm/docs/output/configuring-npm/npm-shrinkwrap-json.html index 5f20e99541a7a7..65f0b0c184bef9 100644 --- a/deps/npm/docs/output/configuring-npm/npm-shrinkwrap-json.html +++ b/deps/npm/docs/output/configuring-npm/npm-shrinkwrap-json.html @@ -141,9 +141,9 @@
-

+

npm-shrinkwrap.json - @10.9.0 + @10.9.2

A publishable lockfile
diff --git a/deps/npm/docs/output/configuring-npm/npmrc.html b/deps/npm/docs/output/configuring-npm/npmrc.html index a20c3d807d3d83..e5e94afe63a94c 100644 --- a/deps/npm/docs/output/configuring-npm/npmrc.html +++ b/deps/npm/docs/output/configuring-npm/npmrc.html @@ -141,9 +141,9 @@
-

+

npmrc - @10.9.0 + @10.9.2

The npm config files
diff --git a/deps/npm/docs/output/configuring-npm/package-json.html b/deps/npm/docs/output/configuring-npm/package-json.html index 0cd27f8d82fdd8..1645e91a762b8f 100644 --- a/deps/npm/docs/output/configuring-npm/package-json.html +++ b/deps/npm/docs/output/configuring-npm/package-json.html @@ -141,9 +141,9 @@
-

+

package.json - @10.9.0 + @10.9.2

Specifics of npm's package.json handling
diff --git a/deps/npm/docs/output/configuring-npm/package-lock-json.html b/deps/npm/docs/output/configuring-npm/package-lock-json.html index 58598df7ae4fbc..80fa8bf8bd4ac8 100644 --- a/deps/npm/docs/output/configuring-npm/package-lock-json.html +++ b/deps/npm/docs/output/configuring-npm/package-lock-json.html @@ -141,9 +141,9 @@
-

+

package-lock.json - @10.9.0 + @10.9.2

A manifestation of the manifest
diff --git a/deps/npm/docs/output/using-npm/config.html b/deps/npm/docs/output/using-npm/config.html index 5d70bb7e8e803e..487cc56439e9ed 100644 --- a/deps/npm/docs/output/using-npm/config.html +++ b/deps/npm/docs/output/using-npm/config.html @@ -141,9 +141,9 @@
-

+

config - @10.9.0 + @10.9.2

More than you probably want to know about npm configuration
diff --git a/deps/npm/docs/output/using-npm/dependency-selectors.html b/deps/npm/docs/output/using-npm/dependency-selectors.html index 008377b3ab55ff..24e34408de9c52 100644 --- a/deps/npm/docs/output/using-npm/dependency-selectors.html +++ b/deps/npm/docs/output/using-npm/dependency-selectors.html @@ -141,9 +141,9 @@
-

+

Dependency Selector Syntax & Querying - @10.9.0 + @10.9.2

Dependency Selector Syntax & Querying
diff --git a/deps/npm/docs/output/using-npm/developers.html b/deps/npm/docs/output/using-npm/developers.html index e75223cd623a54..d7848c6e6647f3 100644 --- a/deps/npm/docs/output/using-npm/developers.html +++ b/deps/npm/docs/output/using-npm/developers.html @@ -141,9 +141,9 @@
-

+

developers - @10.9.0 + @10.9.2

Developer Guide
diff --git a/deps/npm/docs/output/using-npm/logging.html b/deps/npm/docs/output/using-npm/logging.html index 5499bd53e285f2..d1e6b0a7532253 100644 --- a/deps/npm/docs/output/using-npm/logging.html +++ b/deps/npm/docs/output/using-npm/logging.html @@ -141,9 +141,9 @@
-

+

Logging - @10.9.0 + @10.9.2

Why, What & How We Log
diff --git a/deps/npm/docs/output/using-npm/orgs.html b/deps/npm/docs/output/using-npm/orgs.html index c9ed80f6421c5e..5b7007b3618406 100644 --- a/deps/npm/docs/output/using-npm/orgs.html +++ b/deps/npm/docs/output/using-npm/orgs.html @@ -141,9 +141,9 @@
-

+

orgs - @10.9.0 + @10.9.2

Working with Teams & Orgs
diff --git a/deps/npm/docs/output/using-npm/package-spec.html b/deps/npm/docs/output/using-npm/package-spec.html index a370699bb555bd..eee81153269653 100644 --- a/deps/npm/docs/output/using-npm/package-spec.html +++ b/deps/npm/docs/output/using-npm/package-spec.html @@ -141,9 +141,9 @@
-

+

package-spec - @10.9.0 + @10.9.2

Package name specifier
diff --git a/deps/npm/docs/output/using-npm/registry.html b/deps/npm/docs/output/using-npm/registry.html index 9d9f8a539333c9..bd464be3fd67c9 100644 --- a/deps/npm/docs/output/using-npm/registry.html +++ b/deps/npm/docs/output/using-npm/registry.html @@ -141,9 +141,9 @@
-

+

registry - @10.9.0 + @10.9.2

The JavaScript Package Registry
diff --git a/deps/npm/docs/output/using-npm/removal.html b/deps/npm/docs/output/using-npm/removal.html index 2668dd9dd8a440..f0ccb74eb50811 100644 --- a/deps/npm/docs/output/using-npm/removal.html +++ b/deps/npm/docs/output/using-npm/removal.html @@ -141,9 +141,9 @@
-

+

removal - @10.9.0 + @10.9.2

Cleaning the Slate
diff --git a/deps/npm/docs/output/using-npm/scope.html b/deps/npm/docs/output/using-npm/scope.html index 0441f8ef703e95..35ed91f147f107 100644 --- a/deps/npm/docs/output/using-npm/scope.html +++ b/deps/npm/docs/output/using-npm/scope.html @@ -141,9 +141,9 @@
-

+

scope - @10.9.0 + @10.9.2

Scoped packages
diff --git a/deps/npm/docs/output/using-npm/scripts.html b/deps/npm/docs/output/using-npm/scripts.html index ff37c2ede18062..9982862814e73a 100644 --- a/deps/npm/docs/output/using-npm/scripts.html +++ b/deps/npm/docs/output/using-npm/scripts.html @@ -141,9 +141,9 @@
-

+

scripts - @10.9.0 + @10.9.2

How npm handles the "scripts" field
diff --git a/deps/npm/docs/output/using-npm/workspaces.html b/deps/npm/docs/output/using-npm/workspaces.html index 19195cc868db4b..5e2756479c7636 100644 --- a/deps/npm/docs/output/using-npm/workspaces.html +++ b/deps/npm/docs/output/using-npm/workspaces.html @@ -141,9 +141,9 @@
-

+

workspaces - @10.9.0 + @10.9.2

Working with workspaces
diff --git a/deps/npm/lib/cli.js b/deps/npm/lib/cli.js index e11729fe3205b9..00b4fc0bd7fb72 100644 --- a/deps/npm/lib/cli.js +++ b/deps/npm/lib/cli.js @@ -1,3 +1,11 @@ +try { + const { enableCompileCache } = require('node:module') + /* istanbul ignore next */ + if (enableCompileCache) { + enableCompileCache() + } +} catch (e) { /* istanbul ignore next */ } + const validateEngines = require('./cli/validate-engines.js') const cliEntry = require('node:path').resolve(__dirname, 'cli/entry.js') diff --git a/deps/npm/man/man1/npm-access.1 b/deps/npm/man/man1/npm-access.1 index 602193f3b8e3e8..96beab28fc365d 100644 --- a/deps/npm/man/man1/npm-access.1 +++ b/deps/npm/man/man1/npm-access.1 @@ -1,4 +1,4 @@ -.TH "NPM-ACCESS" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-ACCESS" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-access\fR - Set access level on published packages .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-adduser.1 b/deps/npm/man/man1/npm-adduser.1 index 467384ec0ec40e..0096f28229122d 100644 --- a/deps/npm/man/man1/npm-adduser.1 +++ b/deps/npm/man/man1/npm-adduser.1 @@ -1,4 +1,4 @@ -.TH "NPM-ADDUSER" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-ADDUSER" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-adduser\fR - Add a registry user account .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-audit.1 b/deps/npm/man/man1/npm-audit.1 index 64bf5d5461a157..38b977d3871d59 100644 --- a/deps/npm/man/man1/npm-audit.1 +++ b/deps/npm/man/man1/npm-audit.1 @@ -1,4 +1,4 @@ -.TH "NPM-AUDIT" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-AUDIT" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-audit\fR - Run a security audit .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-bugs.1 b/deps/npm/man/man1/npm-bugs.1 index c25b04b6814137..92c57c2c3c4146 100644 --- a/deps/npm/man/man1/npm-bugs.1 +++ b/deps/npm/man/man1/npm-bugs.1 @@ -1,4 +1,4 @@ -.TH "NPM-BUGS" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-BUGS" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-bugs\fR - Report bugs for a package in a web browser .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-cache.1 b/deps/npm/man/man1/npm-cache.1 index e9da044a5b4479..da188a0914e016 100644 --- a/deps/npm/man/man1/npm-cache.1 +++ b/deps/npm/man/man1/npm-cache.1 @@ -1,4 +1,4 @@ -.TH "NPM-CACHE" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-CACHE" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-cache\fR - Manipulates packages cache .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-ci.1 b/deps/npm/man/man1/npm-ci.1 index 1ad1419cb1f98f..1ae5e934e2d2b7 100644 --- a/deps/npm/man/man1/npm-ci.1 +++ b/deps/npm/man/man1/npm-ci.1 @@ -1,4 +1,4 @@ -.TH "NPM-CI" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-CI" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-ci\fR - Clean install a project .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-completion.1 b/deps/npm/man/man1/npm-completion.1 index 6f246c436cead3..ea44a0c9c56a51 100644 --- a/deps/npm/man/man1/npm-completion.1 +++ b/deps/npm/man/man1/npm-completion.1 @@ -1,4 +1,4 @@ -.TH "NPM-COMPLETION" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-COMPLETION" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-completion\fR - Tab Completion for npm .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-config.1 b/deps/npm/man/man1/npm-config.1 index 1bef58e188b68c..dbe609ba3b727f 100644 --- a/deps/npm/man/man1/npm-config.1 +++ b/deps/npm/man/man1/npm-config.1 @@ -1,4 +1,4 @@ -.TH "NPM-CONFIG" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-CONFIG" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-config\fR - Manage the npm configuration files .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-dedupe.1 b/deps/npm/man/man1/npm-dedupe.1 index b1e21f0478ed4f..530de6344d5f4e 100644 --- a/deps/npm/man/man1/npm-dedupe.1 +++ b/deps/npm/man/man1/npm-dedupe.1 @@ -1,4 +1,4 @@ -.TH "NPM-DEDUPE" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-DEDUPE" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-dedupe\fR - Reduce duplication in the package tree .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-deprecate.1 b/deps/npm/man/man1/npm-deprecate.1 index 31e9e56cd95382..0333772b97c690 100644 --- a/deps/npm/man/man1/npm-deprecate.1 +++ b/deps/npm/man/man1/npm-deprecate.1 @@ -1,4 +1,4 @@ -.TH "NPM-DEPRECATE" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-DEPRECATE" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-deprecate\fR - Deprecate a version of a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-diff.1 b/deps/npm/man/man1/npm-diff.1 index c29ae3266d1543..168f4397430854 100644 --- a/deps/npm/man/man1/npm-diff.1 +++ b/deps/npm/man/man1/npm-diff.1 @@ -1,4 +1,4 @@ -.TH "NPM-DIFF" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-DIFF" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-diff\fR - The registry diff command .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-dist-tag.1 b/deps/npm/man/man1/npm-dist-tag.1 index 46e5cf8ac3492a..97e2927ac1e2fa 100644 --- a/deps/npm/man/man1/npm-dist-tag.1 +++ b/deps/npm/man/man1/npm-dist-tag.1 @@ -1,4 +1,4 @@ -.TH "NPM-DIST-TAG" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-DIST-TAG" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-dist-tag\fR - Modify package distribution tags .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-docs.1 b/deps/npm/man/man1/npm-docs.1 index 6cda2d87abe163..6b09a835bbf318 100644 --- a/deps/npm/man/man1/npm-docs.1 +++ b/deps/npm/man/man1/npm-docs.1 @@ -1,4 +1,4 @@ -.TH "NPM-DOCS" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-DOCS" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-docs\fR - Open documentation for a package in a web browser .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-doctor.1 b/deps/npm/man/man1/npm-doctor.1 index 7a67ed1e6b32b1..9a0374b0896b52 100644 --- a/deps/npm/man/man1/npm-doctor.1 +++ b/deps/npm/man/man1/npm-doctor.1 @@ -1,4 +1,4 @@ -.TH "NPM-DOCTOR" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-DOCTOR" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-doctor\fR - Check the health of your npm environment .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-edit.1 b/deps/npm/man/man1/npm-edit.1 index 5ae7e24b67d60e..779feeede13656 100644 --- a/deps/npm/man/man1/npm-edit.1 +++ b/deps/npm/man/man1/npm-edit.1 @@ -1,4 +1,4 @@ -.TH "NPM-EDIT" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-EDIT" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-edit\fR - Edit an installed package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-exec.1 b/deps/npm/man/man1/npm-exec.1 index 20c21e67919d65..3b5fe4c425ee3e 100644 --- a/deps/npm/man/man1/npm-exec.1 +++ b/deps/npm/man/man1/npm-exec.1 @@ -1,4 +1,4 @@ -.TH "NPM-EXEC" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-EXEC" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-exec\fR - Run a command from a local or remote npm package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-explain.1 b/deps/npm/man/man1/npm-explain.1 index 65ef0e086e2ce0..5f86d19236fdb0 100644 --- a/deps/npm/man/man1/npm-explain.1 +++ b/deps/npm/man/man1/npm-explain.1 @@ -1,4 +1,4 @@ -.TH "NPM-EXPLAIN" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-EXPLAIN" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-explain\fR - Explain installed packages .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-explore.1 b/deps/npm/man/man1/npm-explore.1 index 6b92a81c2abc0d..eb71089b62446f 100644 --- a/deps/npm/man/man1/npm-explore.1 +++ b/deps/npm/man/man1/npm-explore.1 @@ -1,4 +1,4 @@ -.TH "NPM-EXPLORE" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-EXPLORE" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-explore\fR - Browse an installed package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-find-dupes.1 b/deps/npm/man/man1/npm-find-dupes.1 index 261313e1b23ad1..88a42adcec1f20 100644 --- a/deps/npm/man/man1/npm-find-dupes.1 +++ b/deps/npm/man/man1/npm-find-dupes.1 @@ -1,4 +1,4 @@ -.TH "NPM-FIND-DUPES" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-FIND-DUPES" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-find-dupes\fR - Find duplication in the package tree .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-fund.1 b/deps/npm/man/man1/npm-fund.1 index b87d57d20c0f50..31c5ceb7ff8c1f 100644 --- a/deps/npm/man/man1/npm-fund.1 +++ b/deps/npm/man/man1/npm-fund.1 @@ -1,4 +1,4 @@ -.TH "NPM-FUND" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-FUND" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-fund\fR - Retrieve funding information .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-help-search.1 b/deps/npm/man/man1/npm-help-search.1 index 9660ca29697e9f..c954795c2778c7 100644 --- a/deps/npm/man/man1/npm-help-search.1 +++ b/deps/npm/man/man1/npm-help-search.1 @@ -1,4 +1,4 @@ -.TH "NPM-HELP-SEARCH" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-HELP-SEARCH" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-help-search\fR - Search npm help documentation .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-help.1 b/deps/npm/man/man1/npm-help.1 index a8c70fdc88a9ae..2d0fc6e3a2c0e7 100644 --- a/deps/npm/man/man1/npm-help.1 +++ b/deps/npm/man/man1/npm-help.1 @@ -1,4 +1,4 @@ -.TH "NPM-HELP" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-HELP" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-help\fR - Get help on npm .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-hook.1 b/deps/npm/man/man1/npm-hook.1 index 2c07a5afadbb64..8d5d9334692b9e 100644 --- a/deps/npm/man/man1/npm-hook.1 +++ b/deps/npm/man/man1/npm-hook.1 @@ -1,4 +1,4 @@ -.TH "NPM-HOOK" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-HOOK" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-hook\fR - Manage registry hooks .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-init.1 b/deps/npm/man/man1/npm-init.1 index cb7339dc0f4ff0..1c1b008baa1deb 100644 --- a/deps/npm/man/man1/npm-init.1 +++ b/deps/npm/man/man1/npm-init.1 @@ -1,4 +1,4 @@ -.TH "NPM-INIT" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-INIT" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-init\fR - Create a package.json file .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-install-ci-test.1 b/deps/npm/man/man1/npm-install-ci-test.1 index 52defde693a506..4bbbb51140521b 100644 --- a/deps/npm/man/man1/npm-install-ci-test.1 +++ b/deps/npm/man/man1/npm-install-ci-test.1 @@ -1,4 +1,4 @@ -.TH "NPM-INSTALL-CI-TEST" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-INSTALL-CI-TEST" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-install-ci-test\fR - Install a project with a clean slate and run tests .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-install-test.1 b/deps/npm/man/man1/npm-install-test.1 index e2023e1a985474..eef940c066c709 100644 --- a/deps/npm/man/man1/npm-install-test.1 +++ b/deps/npm/man/man1/npm-install-test.1 @@ -1,4 +1,4 @@ -.TH "NPM-INSTALL-TEST" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-INSTALL-TEST" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-install-test\fR - Install package(s) and run tests .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-install.1 b/deps/npm/man/man1/npm-install.1 index 59b3a8c81ac73f..bbe4a43c10863e 100644 --- a/deps/npm/man/man1/npm-install.1 +++ b/deps/npm/man/man1/npm-install.1 @@ -1,4 +1,4 @@ -.TH "NPM-INSTALL" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-INSTALL" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-install\fR - Install a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-link.1 b/deps/npm/man/man1/npm-link.1 index e6106495400fdc..9aca1f4a7fb80d 100644 --- a/deps/npm/man/man1/npm-link.1 +++ b/deps/npm/man/man1/npm-link.1 @@ -1,4 +1,4 @@ -.TH "NPM-LINK" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-LINK" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-link\fR - Symlink a package folder .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-login.1 b/deps/npm/man/man1/npm-login.1 index 36f2bf2d0f35dc..a5e0cb71bb0d98 100644 --- a/deps/npm/man/man1/npm-login.1 +++ b/deps/npm/man/man1/npm-login.1 @@ -1,4 +1,4 @@ -.TH "NPM-LOGIN" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-LOGIN" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-login\fR - Login to a registry user account .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-logout.1 b/deps/npm/man/man1/npm-logout.1 index 78cba39ccd85c9..8ac69429ea320d 100644 --- a/deps/npm/man/man1/npm-logout.1 +++ b/deps/npm/man/man1/npm-logout.1 @@ -1,4 +1,4 @@ -.TH "NPM-LOGOUT" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-LOGOUT" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-logout\fR - Log out of the registry .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-ls.1 b/deps/npm/man/man1/npm-ls.1 index b0364fe1531753..47d20120898b59 100644 --- a/deps/npm/man/man1/npm-ls.1 +++ b/deps/npm/man/man1/npm-ls.1 @@ -1,4 +1,4 @@ -.TH "NPM-LS" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-LS" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-ls\fR - List installed packages .SS "Synopsis" @@ -20,7 +20,7 @@ Positional arguments are \fBname@version-range\fR identifiers, which will limit .P .RS 2 .nf -npm@10.9.0 /path/to/npm +npm@10.9.2 /path/to/npm └─┬ init-package-json@0.0.4 └── promzard@0.1.5 .fi diff --git a/deps/npm/man/man1/npm-org.1 b/deps/npm/man/man1/npm-org.1 index 0465a6b5bddb54..88df3325a344fd 100644 --- a/deps/npm/man/man1/npm-org.1 +++ b/deps/npm/man/man1/npm-org.1 @@ -1,4 +1,4 @@ -.TH "NPM-ORG" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-ORG" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-org\fR - Manage orgs .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-outdated.1 b/deps/npm/man/man1/npm-outdated.1 index ad1cf07823630f..17aabbd0857183 100644 --- a/deps/npm/man/man1/npm-outdated.1 +++ b/deps/npm/man/man1/npm-outdated.1 @@ -1,4 +1,4 @@ -.TH "NPM-OUTDATED" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-OUTDATED" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-outdated\fR - Check for outdated packages .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-owner.1 b/deps/npm/man/man1/npm-owner.1 index 5842e5a51d125f..f01e67e6464e7f 100644 --- a/deps/npm/man/man1/npm-owner.1 +++ b/deps/npm/man/man1/npm-owner.1 @@ -1,4 +1,4 @@ -.TH "NPM-OWNER" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-OWNER" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-owner\fR - Manage package owners .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-pack.1 b/deps/npm/man/man1/npm-pack.1 index 6cc2acd4b580f7..b9b56a31e30eef 100644 --- a/deps/npm/man/man1/npm-pack.1 +++ b/deps/npm/man/man1/npm-pack.1 @@ -1,4 +1,4 @@ -.TH "NPM-PACK" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-PACK" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-pack\fR - Create a tarball from a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-ping.1 b/deps/npm/man/man1/npm-ping.1 index ad03cbbb6a1398..2d16431b030ea8 100644 --- a/deps/npm/man/man1/npm-ping.1 +++ b/deps/npm/man/man1/npm-ping.1 @@ -1,4 +1,4 @@ -.TH "NPM-PING" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-PING" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-ping\fR - Ping npm registry .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-pkg.1 b/deps/npm/man/man1/npm-pkg.1 index 2f45876234ec3d..e55a0280d1211b 100644 --- a/deps/npm/man/man1/npm-pkg.1 +++ b/deps/npm/man/man1/npm-pkg.1 @@ -1,4 +1,4 @@ -.TH "NPM-PKG" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-PKG" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-pkg\fR - Manages your package.json .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-prefix.1 b/deps/npm/man/man1/npm-prefix.1 index cac44344c71d79..dca8d0c47497b4 100644 --- a/deps/npm/man/man1/npm-prefix.1 +++ b/deps/npm/man/man1/npm-prefix.1 @@ -1,4 +1,4 @@ -.TH "NPM-PREFIX" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-PREFIX" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-prefix\fR - Display prefix .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-profile.1 b/deps/npm/man/man1/npm-profile.1 index 28e3f41244ace1..89837b408f5a67 100644 --- a/deps/npm/man/man1/npm-profile.1 +++ b/deps/npm/man/man1/npm-profile.1 @@ -1,4 +1,4 @@ -.TH "NPM-PROFILE" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-PROFILE" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-profile\fR - Change settings on your registry profile .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-prune.1 b/deps/npm/man/man1/npm-prune.1 index 7634c75f0901fa..243426dffaf20f 100644 --- a/deps/npm/man/man1/npm-prune.1 +++ b/deps/npm/man/man1/npm-prune.1 @@ -1,4 +1,4 @@ -.TH "NPM-PRUNE" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-PRUNE" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-prune\fR - Remove extraneous packages .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-publish.1 b/deps/npm/man/man1/npm-publish.1 index c6724b6fe2ce51..1110b097182988 100644 --- a/deps/npm/man/man1/npm-publish.1 +++ b/deps/npm/man/man1/npm-publish.1 @@ -1,4 +1,4 @@ -.TH "NPM-PUBLISH" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-PUBLISH" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-publish\fR - Publish a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-query.1 b/deps/npm/man/man1/npm-query.1 index 0a3c6e665e7ecf..983f2cad388940 100644 --- a/deps/npm/man/man1/npm-query.1 +++ b/deps/npm/man/man1/npm-query.1 @@ -1,4 +1,4 @@ -.TH "NPM-QUERY" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-QUERY" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-query\fR - Dependency selector query .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-rebuild.1 b/deps/npm/man/man1/npm-rebuild.1 index e537cec373e3c6..396d9adf779cb1 100644 --- a/deps/npm/man/man1/npm-rebuild.1 +++ b/deps/npm/man/man1/npm-rebuild.1 @@ -1,4 +1,4 @@ -.TH "NPM-REBUILD" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-REBUILD" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-rebuild\fR - Rebuild a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-repo.1 b/deps/npm/man/man1/npm-repo.1 index 869541f6faaa51..b9c7bbdfbc233c 100644 --- a/deps/npm/man/man1/npm-repo.1 +++ b/deps/npm/man/man1/npm-repo.1 @@ -1,4 +1,4 @@ -.TH "NPM-REPO" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-REPO" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-repo\fR - Open package repository page in the browser .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-restart.1 b/deps/npm/man/man1/npm-restart.1 index b0e3b0deead7ef..1112bf9180cece 100644 --- a/deps/npm/man/man1/npm-restart.1 +++ b/deps/npm/man/man1/npm-restart.1 @@ -1,4 +1,4 @@ -.TH "NPM-RESTART" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-RESTART" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-restart\fR - Restart a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-root.1 b/deps/npm/man/man1/npm-root.1 index 94a132557e3979..2456bdba6d1816 100644 --- a/deps/npm/man/man1/npm-root.1 +++ b/deps/npm/man/man1/npm-root.1 @@ -1,4 +1,4 @@ -.TH "NPM-ROOT" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-ROOT" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-root\fR - Display npm root .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-run-script.1 b/deps/npm/man/man1/npm-run-script.1 index 424c733d6a0523..a05362361848d2 100644 --- a/deps/npm/man/man1/npm-run-script.1 +++ b/deps/npm/man/man1/npm-run-script.1 @@ -1,4 +1,4 @@ -.TH "NPM-RUN-SCRIPT" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-RUN-SCRIPT" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-run-script\fR - Run arbitrary package scripts .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-sbom.1 b/deps/npm/man/man1/npm-sbom.1 index 7bb8199c8b11c8..f784a53edd4c08 100644 --- a/deps/npm/man/man1/npm-sbom.1 +++ b/deps/npm/man/man1/npm-sbom.1 @@ -1,4 +1,4 @@ -.TH "NPM-SBOM" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-SBOM" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-sbom\fR - Generate a Software Bill of Materials (SBOM) .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-search.1 b/deps/npm/man/man1/npm-search.1 index 4fe67890bc949a..3ee7bcb7cb4e92 100644 --- a/deps/npm/man/man1/npm-search.1 +++ b/deps/npm/man/man1/npm-search.1 @@ -1,4 +1,4 @@ -.TH "NPM-SEARCH" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-SEARCH" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-search\fR - Search for packages .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-shrinkwrap.1 b/deps/npm/man/man1/npm-shrinkwrap.1 index ed3c0cb03ccca7..d7e64493a90754 100644 --- a/deps/npm/man/man1/npm-shrinkwrap.1 +++ b/deps/npm/man/man1/npm-shrinkwrap.1 @@ -1,4 +1,4 @@ -.TH "NPM-SHRINKWRAP" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-SHRINKWRAP" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-shrinkwrap\fR - Lock down dependency versions for publication .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-star.1 b/deps/npm/man/man1/npm-star.1 index 83d0ff230ec1bb..7c3a528ec3a04f 100644 --- a/deps/npm/man/man1/npm-star.1 +++ b/deps/npm/man/man1/npm-star.1 @@ -1,4 +1,4 @@ -.TH "NPM-STAR" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-STAR" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-star\fR - Mark your favorite packages .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-stars.1 b/deps/npm/man/man1/npm-stars.1 index bdeffec7a3b1b5..61d60d9534da40 100644 --- a/deps/npm/man/man1/npm-stars.1 +++ b/deps/npm/man/man1/npm-stars.1 @@ -1,4 +1,4 @@ -.TH "NPM-STARS" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-STARS" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-stars\fR - View packages marked as favorites .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-start.1 b/deps/npm/man/man1/npm-start.1 index c3cb48c6f72576..4c687a1ecba49e 100644 --- a/deps/npm/man/man1/npm-start.1 +++ b/deps/npm/man/man1/npm-start.1 @@ -1,4 +1,4 @@ -.TH "NPM-START" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-START" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-start\fR - Start a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-stop.1 b/deps/npm/man/man1/npm-stop.1 index 36ef105209b7f0..dadebe98c52834 100644 --- a/deps/npm/man/man1/npm-stop.1 +++ b/deps/npm/man/man1/npm-stop.1 @@ -1,4 +1,4 @@ -.TH "NPM-STOP" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-STOP" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-stop\fR - Stop a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-team.1 b/deps/npm/man/man1/npm-team.1 index 6712c8f92b23c8..42ee63f4743b6e 100644 --- a/deps/npm/man/man1/npm-team.1 +++ b/deps/npm/man/man1/npm-team.1 @@ -1,4 +1,4 @@ -.TH "NPM-TEAM" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-TEAM" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-team\fR - Manage organization teams and team memberships .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-test.1 b/deps/npm/man/man1/npm-test.1 index 5e63e9c6d76ab3..80eb6a1d7bb9e7 100644 --- a/deps/npm/man/man1/npm-test.1 +++ b/deps/npm/man/man1/npm-test.1 @@ -1,4 +1,4 @@ -.TH "NPM-TEST" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-TEST" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-test\fR - Test a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-token.1 b/deps/npm/man/man1/npm-token.1 index 29a2c361574121..1d14f81553b5c9 100644 --- a/deps/npm/man/man1/npm-token.1 +++ b/deps/npm/man/man1/npm-token.1 @@ -1,4 +1,4 @@ -.TH "NPM-TOKEN" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-TOKEN" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-token\fR - Manage your authentication tokens .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-uninstall.1 b/deps/npm/man/man1/npm-uninstall.1 index 0c10a60c1f07e7..f5e25641fc57c5 100644 --- a/deps/npm/man/man1/npm-uninstall.1 +++ b/deps/npm/man/man1/npm-uninstall.1 @@ -1,4 +1,4 @@ -.TH "NPM-UNINSTALL" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-UNINSTALL" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-uninstall\fR - Remove a package .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-unpublish.1 b/deps/npm/man/man1/npm-unpublish.1 index 098af2e59310bc..fcf62bbd7da263 100644 --- a/deps/npm/man/man1/npm-unpublish.1 +++ b/deps/npm/man/man1/npm-unpublish.1 @@ -1,4 +1,4 @@ -.TH "NPM-UNPUBLISH" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-UNPUBLISH" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-unpublish\fR - Remove a package from the registry .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-unstar.1 b/deps/npm/man/man1/npm-unstar.1 index 7326984aa8b4d2..e9edf3ab6634dd 100644 --- a/deps/npm/man/man1/npm-unstar.1 +++ b/deps/npm/man/man1/npm-unstar.1 @@ -1,4 +1,4 @@ -.TH "NPM-UNSTAR" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-UNSTAR" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-unstar\fR - Remove an item from your favorite packages .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-update.1 b/deps/npm/man/man1/npm-update.1 index 797a9e601cf3d2..052008d73485c9 100644 --- a/deps/npm/man/man1/npm-update.1 +++ b/deps/npm/man/man1/npm-update.1 @@ -1,4 +1,4 @@ -.TH "NPM-UPDATE" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-UPDATE" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-update\fR - Update packages .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-version.1 b/deps/npm/man/man1/npm-version.1 index d0a7dcee6fea65..f79a0c84c3f8fd 100644 --- a/deps/npm/man/man1/npm-version.1 +++ b/deps/npm/man/man1/npm-version.1 @@ -1,4 +1,4 @@ -.TH "NPM-VERSION" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-VERSION" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-version\fR - Bump a package version .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-view.1 b/deps/npm/man/man1/npm-view.1 index 2925f8d7d2cffe..e862ddc4b8780d 100644 --- a/deps/npm/man/man1/npm-view.1 +++ b/deps/npm/man/man1/npm-view.1 @@ -1,4 +1,4 @@ -.TH "NPM-VIEW" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-VIEW" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-view\fR - View registry info .SS "Synopsis" diff --git a/deps/npm/man/man1/npm-whoami.1 b/deps/npm/man/man1/npm-whoami.1 index f283db5c9d247a..a003f634a2b293 100644 --- a/deps/npm/man/man1/npm-whoami.1 +++ b/deps/npm/man/man1/npm-whoami.1 @@ -1,4 +1,4 @@ -.TH "NPM-WHOAMI" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM-WHOAMI" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-whoami\fR - Display npm username .SS "Synopsis" diff --git a/deps/npm/man/man1/npm.1 b/deps/npm/man/man1/npm.1 index b2034bc50fa5e3..28e8774ad1b4d0 100644 --- a/deps/npm/man/man1/npm.1 +++ b/deps/npm/man/man1/npm.1 @@ -1,4 +1,4 @@ -.TH "NPM" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPM" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm\fR - javascript package manager .SS "Synopsis" @@ -12,7 +12,7 @@ npm Note: This command is unaware of workspaces. .SS "Version" .P -10.9.0 +10.9.2 .SS "Description" .P npm is the package manager for the Node JavaScript platform. It puts modules in place so that node can find them, and manages dependency conflicts intelligently. diff --git a/deps/npm/man/man1/npx.1 b/deps/npm/man/man1/npx.1 index a1000443cc665e..9b4ba1af1dc365 100644 --- a/deps/npm/man/man1/npx.1 +++ b/deps/npm/man/man1/npx.1 @@ -1,4 +1,4 @@ -.TH "NPX" "1" "October 2024" "NPM@10.9.0" "" +.TH "NPX" "1" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpx\fR - Run a command from a local or remote npm package .SS "Synopsis" diff --git a/deps/npm/man/man5/folders.5 b/deps/npm/man/man5/folders.5 index 6e3045729c46ca..8eb8a8230333f9 100644 --- a/deps/npm/man/man5/folders.5 +++ b/deps/npm/man/man5/folders.5 @@ -1,4 +1,4 @@ -.TH "FOLDERS" "5" "October 2024" "NPM@10.9.0" "" +.TH "FOLDERS" "5" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBfolders\fR - Folder Structures Used by npm .SS "Description" diff --git a/deps/npm/man/man5/install.5 b/deps/npm/man/man5/install.5 index f5fabf02d7431b..0b75adf308685a 100644 --- a/deps/npm/man/man5/install.5 +++ b/deps/npm/man/man5/install.5 @@ -1,4 +1,4 @@ -.TH "INSTALL" "5" "October 2024" "NPM@10.9.0" "" +.TH "INSTALL" "5" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBinstall\fR - Download and install node and npm .SS "Description" diff --git a/deps/npm/man/man5/npm-global.5 b/deps/npm/man/man5/npm-global.5 index 6e3045729c46ca..8eb8a8230333f9 100644 --- a/deps/npm/man/man5/npm-global.5 +++ b/deps/npm/man/man5/npm-global.5 @@ -1,4 +1,4 @@ -.TH "FOLDERS" "5" "October 2024" "NPM@10.9.0" "" +.TH "FOLDERS" "5" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBfolders\fR - Folder Structures Used by npm .SS "Description" diff --git a/deps/npm/man/man5/npm-json.5 b/deps/npm/man/man5/npm-json.5 index 49773dda01d706..81fa9a8c95b8dc 100644 --- a/deps/npm/man/man5/npm-json.5 +++ b/deps/npm/man/man5/npm-json.5 @@ -1,4 +1,4 @@ -.TH "PACKAGE.JSON" "5" "October 2024" "NPM@10.9.0" "" +.TH "PACKAGE.JSON" "5" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBpackage.json\fR - Specifics of npm's package.json handling .SS "Description" diff --git a/deps/npm/man/man5/npm-shrinkwrap-json.5 b/deps/npm/man/man5/npm-shrinkwrap-json.5 index d1f394d23aa1ce..7e8efb285e959e 100644 --- a/deps/npm/man/man5/npm-shrinkwrap-json.5 +++ b/deps/npm/man/man5/npm-shrinkwrap-json.5 @@ -1,4 +1,4 @@ -.TH "NPM-SHRINKWRAP.JSON" "5" "October 2024" "NPM@10.9.0" "" +.TH "NPM-SHRINKWRAP.JSON" "5" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpm-shrinkwrap.json\fR - A publishable lockfile .SS "Description" diff --git a/deps/npm/man/man5/npmrc.5 b/deps/npm/man/man5/npmrc.5 index 3b78f989c23545..947a0fe433faa3 100644 --- a/deps/npm/man/man5/npmrc.5 +++ b/deps/npm/man/man5/npmrc.5 @@ -1,4 +1,4 @@ -.TH "NPMRC" "5" "October 2024" "NPM@10.9.0" "" +.TH "NPMRC" "5" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBnpmrc\fR - The npm config files .SS "Description" diff --git a/deps/npm/man/man5/package-json.5 b/deps/npm/man/man5/package-json.5 index 49773dda01d706..81fa9a8c95b8dc 100644 --- a/deps/npm/man/man5/package-json.5 +++ b/deps/npm/man/man5/package-json.5 @@ -1,4 +1,4 @@ -.TH "PACKAGE.JSON" "5" "October 2024" "NPM@10.9.0" "" +.TH "PACKAGE.JSON" "5" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBpackage.json\fR - Specifics of npm's package.json handling .SS "Description" diff --git a/deps/npm/man/man5/package-lock-json.5 b/deps/npm/man/man5/package-lock-json.5 index df3dcca1c4fa75..78deecab3757c1 100644 --- a/deps/npm/man/man5/package-lock-json.5 +++ b/deps/npm/man/man5/package-lock-json.5 @@ -1,4 +1,4 @@ -.TH "PACKAGE-LOCK.JSON" "5" "October 2024" "NPM@10.9.0" "" +.TH "PACKAGE-LOCK.JSON" "5" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBpackage-lock.json\fR - A manifestation of the manifest .SS "Description" diff --git a/deps/npm/man/man7/config.7 b/deps/npm/man/man7/config.7 index ee166c9d4ccee9..0ce1f25f38d5d3 100644 --- a/deps/npm/man/man7/config.7 +++ b/deps/npm/man/man7/config.7 @@ -1,4 +1,4 @@ -.TH "CONFIG" "7" "October 2024" "NPM@10.9.0" "" +.TH "CONFIG" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBconfig\fR - More than you probably want to know about npm configuration .SS "Description" diff --git a/deps/npm/man/man7/dependency-selectors.7 b/deps/npm/man/man7/dependency-selectors.7 index 54d80fee4dc58d..e15648cb400af4 100644 --- a/deps/npm/man/man7/dependency-selectors.7 +++ b/deps/npm/man/man7/dependency-selectors.7 @@ -1,4 +1,4 @@ -.TH "QUERYING" "7" "October 2024" "NPM@10.9.0" "" +.TH "QUERYING" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBQuerying\fR - Dependency Selector Syntax & Querying .SS "Description" diff --git a/deps/npm/man/man7/developers.7 b/deps/npm/man/man7/developers.7 index aa907f3f51deda..a5d429c60a5cef 100644 --- a/deps/npm/man/man7/developers.7 +++ b/deps/npm/man/man7/developers.7 @@ -1,4 +1,4 @@ -.TH "DEVELOPERS" "7" "October 2024" "NPM@10.9.0" "" +.TH "DEVELOPERS" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBdevelopers\fR - Developer Guide .SS "Description" diff --git a/deps/npm/man/man7/logging.7 b/deps/npm/man/man7/logging.7 index f36ae6e4703048..200ad578584521 100644 --- a/deps/npm/man/man7/logging.7 +++ b/deps/npm/man/man7/logging.7 @@ -1,4 +1,4 @@ -.TH "LOGGING" "7" "October 2024" "NPM@10.9.0" "" +.TH "LOGGING" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBLogging\fR - Why, What & How We Log .SS "Description" diff --git a/deps/npm/man/man7/orgs.7 b/deps/npm/man/man7/orgs.7 index 2866d7dd9800a5..8f12f6e0434a19 100644 --- a/deps/npm/man/man7/orgs.7 +++ b/deps/npm/man/man7/orgs.7 @@ -1,4 +1,4 @@ -.TH "ORGS" "7" "October 2024" "NPM@10.9.0" "" +.TH "ORGS" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBorgs\fR - Working with Teams & Orgs .SS "Description" diff --git a/deps/npm/man/man7/package-spec.7 b/deps/npm/man/man7/package-spec.7 index 0d76a5f3017253..f8a0ac73903272 100644 --- a/deps/npm/man/man7/package-spec.7 +++ b/deps/npm/man/man7/package-spec.7 @@ -1,4 +1,4 @@ -.TH "PACKAGE-SPEC" "7" "October 2024" "NPM@10.9.0" "" +.TH "PACKAGE-SPEC" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBpackage-spec\fR - Package name specifier .SS "Description" diff --git a/deps/npm/man/man7/registry.7 b/deps/npm/man/man7/registry.7 index 61ad2702f7b235..1e8134daaf751d 100644 --- a/deps/npm/man/man7/registry.7 +++ b/deps/npm/man/man7/registry.7 @@ -1,4 +1,4 @@ -.TH "REGISTRY" "7" "October 2024" "NPM@10.9.0" "" +.TH "REGISTRY" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBregistry\fR - The JavaScript Package Registry .SS "Description" diff --git a/deps/npm/man/man7/removal.7 b/deps/npm/man/man7/removal.7 index a76b743553c304..74a7703f177e47 100644 --- a/deps/npm/man/man7/removal.7 +++ b/deps/npm/man/man7/removal.7 @@ -1,4 +1,4 @@ -.TH "REMOVAL" "7" "October 2024" "NPM@10.9.0" "" +.TH "REMOVAL" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBremoval\fR - Cleaning the Slate .SS "Synopsis" diff --git a/deps/npm/man/man7/scope.7 b/deps/npm/man/man7/scope.7 index 94c22ccbd9f8fa..ccf0d9e7ee99c4 100644 --- a/deps/npm/man/man7/scope.7 +++ b/deps/npm/man/man7/scope.7 @@ -1,4 +1,4 @@ -.TH "SCOPE" "7" "October 2024" "NPM@10.9.0" "" +.TH "SCOPE" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBscope\fR - Scoped packages .SS "Description" diff --git a/deps/npm/man/man7/scripts.7 b/deps/npm/man/man7/scripts.7 index 925d3181dc9174..1758473fc89dbb 100644 --- a/deps/npm/man/man7/scripts.7 +++ b/deps/npm/man/man7/scripts.7 @@ -1,4 +1,4 @@ -.TH "SCRIPTS" "7" "October 2024" "NPM@10.9.0" "" +.TH "SCRIPTS" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBscripts\fR - How npm handles the "scripts" field .SS "Description" diff --git a/deps/npm/man/man7/workspaces.7 b/deps/npm/man/man7/workspaces.7 index 1812eb73eb7ece..cd7df95b81cf3e 100644 --- a/deps/npm/man/man7/workspaces.7 +++ b/deps/npm/man/man7/workspaces.7 @@ -1,4 +1,4 @@ -.TH "WORKSPACES" "7" "October 2024" "NPM@10.9.0" "" +.TH "WORKSPACES" "7" "December 2024" "NPM@10.9.2" "" .SH "NAME" \fBworkspaces\fR - Working with workspaces .SS "Description" diff --git a/deps/npm/node_modules/@isaacs/cliui/node_modules/ansi-regex/index.js b/deps/npm/node_modules/@isaacs/cliui/node_modules/ansi-regex/index.js index 130a0929b8ce8c..ddfdba39a783a4 100644 --- a/deps/npm/node_modules/@isaacs/cliui/node_modules/ansi-regex/index.js +++ b/deps/npm/node_modules/@isaacs/cliui/node_modules/ansi-regex/index.js @@ -1,7 +1,9 @@ export default function ansiRegex({onlyFirst = false} = {}) { + // Valid string terminator sequences are BEL, ESC\, and 0x9c + const ST = '(?:\\u0007|\\u001B\\u005C|\\u009C)'; const pattern = [ - '[\\u001B\\u009B][[\\]()#;?]*(?:(?:(?:(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]+)*|[a-zA-Z\\d]+(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)', - '(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))' + `[\\u001B\\u009B][[\\]()#;?]*(?:(?:(?:(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]+)*|[a-zA-Z\\d]+(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?${ST})`, + '(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-nq-uy=><~]))', ].join('|'); return new RegExp(pattern, onlyFirst ? undefined : 'g'); diff --git a/deps/npm/node_modules/@isaacs/cliui/node_modules/ansi-regex/package.json b/deps/npm/node_modules/@isaacs/cliui/node_modules/ansi-regex/package.json index 7bbb563bf2a70a..49f3f61021512b 100644 --- a/deps/npm/node_modules/@isaacs/cliui/node_modules/ansi-regex/package.json +++ b/deps/npm/node_modules/@isaacs/cliui/node_modules/ansi-regex/package.json @@ -1,6 +1,6 @@ { "name": "ansi-regex", - "version": "6.0.1", + "version": "6.1.0", "description": "Regular expression for matching ANSI escape codes", "license": "MIT", "repository": "chalk/ansi-regex", @@ -12,6 +12,8 @@ }, "type": "module", "exports": "./index.js", + "types": "./index.d.ts", + "sideEffects": false, "engines": { "node": ">=12" }, @@ -51,8 +53,9 @@ "pattern" ], "devDependencies": { + "ansi-escapes": "^5.0.0", "ava": "^3.15.0", - "tsd": "^0.14.0", - "xo": "^0.38.2" + "tsd": "^0.21.0", + "xo": "^0.54.2" } } diff --git a/deps/npm/node_modules/@npmcli/map-workspaces/lib/index.js b/deps/npm/node_modules/@npmcli/map-workspaces/lib/index.js index 1b39d2e3f67e08..f38b8cd33b74f7 100644 --- a/deps/npm/node_modules/@npmcli/map-workspaces/lib/index.js +++ b/deps/npm/node_modules/@npmcli/map-workspaces/lib/index.js @@ -134,7 +134,7 @@ async function mapWorkspaces (opts = {}) { try { pkg = await pkgJson.normalize(path.join(opts.cwd, match)) } catch (err) { - if (err.code === 'ENOENT') { + if (err.code === 'ENOENT' || err.code === 'ENOTDIR') { continue } else { throw err diff --git a/deps/npm/node_modules/@npmcli/map-workspaces/package.json b/deps/npm/node_modules/@npmcli/map-workspaces/package.json index f2667f25e9d5d2..78a515e027b011 100644 --- a/deps/npm/node_modules/@npmcli/map-workspaces/package.json +++ b/deps/npm/node_modules/@npmcli/map-workspaces/package.json @@ -1,6 +1,6 @@ { "name": "@npmcli/map-workspaces", - "version": "4.0.1", + "version": "4.0.2", "main": "lib/index.js", "files": [ "bin/", @@ -44,7 +44,7 @@ }, "devDependencies": { "@npmcli/eslint-config": "^5.0.0", - "@npmcli/template-oss": "4.23.3", + "@npmcli/template-oss": "4.23.4", "tap": "^16.0.1" }, "dependencies": { @@ -55,7 +55,7 @@ }, "templateOSS": { "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.23.3", + "version": "4.23.4", "publish": "true" } } diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/LICENSE b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/LICENSE similarity index 90% rename from deps/npm/node_modules/node-gyp/node_modules/isexe/LICENSE rename to deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/LICENSE index c925dbe826b670..a03cd0ed0b338b 100644 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/LICENSE +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/LICENSE @@ -1,6 +1,6 @@ The ISC License -Copyright (c) 2016-2022 Isaac Z. Schlueter and Contributors +Copyright (c) Isaac Z. Schlueter, Kat Marchán, npm, Inc., and Contributors Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/README.md b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/README.md new file mode 100644 index 00000000000000..dbb0051de23a4d --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/README.md @@ -0,0 +1,283 @@ +# pacote + +Fetches package manifests and tarballs from the npm registry. + +## USAGE + +```js +const pacote = require('pacote') + +// get a package manifest +pacote.manifest('foo@1.x').then(manifest => console.log('got it', manifest)) + +// extract a package into a folder +pacote.extract('github:npm/cli', 'some/path', options) + .then(({from, resolved, integrity}) => { + console.log('extracted!', from, resolved, integrity) + }) + +pacote.tarball('https://server.com/package.tgz').then(data => { + console.log('got ' + data.length + ' bytes of tarball data') +}) +``` + +`pacote` works with any kind of package specifier that npm can install. If +you can pass it to the npm CLI, you can pass it to pacote. (In fact, that's +exactly what the npm CLI does.) + +Anything that you can do with one kind of package, you can do with another. + +Data that isn't relevant (like a packument for a tarball) will be +simulated. + +`prepare` scripts will be run when generating tarballs from `git` and +`directory` locations, to simulate what _would_ be published to the +registry, so that you get a working package instead of just raw source +code that might need to be transpiled. + +## CLI + +This module exports a command line interface that can do most of what is +described below. Run `pacote -h` to learn more. + +``` +Pacote - The JavaScript Package Handler, v10.1.1 + +Usage: + + pacote resolve + Resolve a specifier and output the fully resolved target + Returns integrity and from if '--long' flag is set. + + pacote manifest + Fetch a manifest and print to stdout + + pacote packument + Fetch a full packument and print to stdout + + pacote tarball [] + Fetch a package tarball and save to + If is missing or '-', the tarball will be streamed to stdout. + + pacote extract + Extract a package to the destination folder. + +Configuration values all match the names of configs passed to npm, or +options passed to Pacote. Additional flags for this executable: + + --long Print an object from 'resolve', including integrity and spec. + --json Print result objects as JSON rather than node's default. + (This is the default if stdout is not a TTY.) + --help -h Print this helpful text. + +For example '--cache=/path/to/folder' will use that folder as the cache. +``` + +## API + +The `spec` refers to any kind of package specifier that npm can install. +If you can pass it to the npm CLI, you can pass it to pacote. (In fact, +that's exactly what the npm CLI does.) + +See below for valid `opts` values. + +* `pacote.resolve(spec, opts)` Resolve a specifier like `foo@latest` or + `github:user/project` all the way to a tarball url, tarball file, or git + repo with commit hash. + +* `pacote.extract(spec, dest, opts)` Extract a package's tarball into a + destination folder. Returns a promise that resolves to the + `{from,resolved,integrity}` of the extracted package. + +* `pacote.manifest(spec, opts)` Fetch (or simulate) a package's manifest + (basically, the `package.json` file, plus a bit of metadata). + See below for more on manifests and packuments. Returns a Promise that + resolves to the manifest object. + +* `pacote.packument(spec, opts)` Fetch (or simulate) a package's packument + (basically, the top-level package document listing all the manifests that + the registry returns). See below for more on manifests and packuments. + Returns a Promise that resolves to the packument object. + +* `pacote.tarball(spec, opts)` Get a package tarball data as a buffer in + memory. Returns a Promise that resolves to the tarball data Buffer, with + `from`, `resolved`, and `integrity` fields attached. + +* `pacote.tarball.file(spec, dest, opts)` Save a package tarball data to + a file on disk. Returns a Promise that resolves to + `{from,integrity,resolved}` of the fetched tarball. + +* `pacote.tarball.stream(spec, streamHandler, opts)` Fetch a tarball and + make the stream available to the `streamHandler` function. + + This is mostly an internal function, but it is exposed because it does + provide some functionality that may be difficult to achieve otherwise. + + The `streamHandler` function MUST return a Promise that resolves when + the stream (and all associated work) is ended, or rejects if the stream + has an error. + + The `streamHandler` function MAY be called multiple times, as Pacote + retries requests in some scenarios, such as cache corruption or + retriable network failures. + +### Options + +Options are passed to +[`npm-registry-fetch`](http://npm.im/npm-registry-fetch) and +[`cacache`](http://npm.im/cacache), so in addition to these, anything for +those modules can be given to pacote as well. + +Options object is cloned, and mutated along the way to add integrity, +resolved, and other properties, as they are determined. + +* `cache` Where to store cache entries and temp files. Passed to + [`cacache`](http://npm.im/cacache). Defaults to the same cache directory + that npm will use by default, based on platform and environment. +* `where` Base folder for resolving relative `file:` dependencies. +* `resolved` Shortcut for looking up resolved values. Should be specified + if known. +* `integrity` Expected integrity of fetched package tarball. If specified, + tarballs with mismatched integrity values will raise an `EINTEGRITY` + error. +* `umask` Permission mode mask for extracted files and directories. + Defaults to `0o22`. See "Extracted File Modes" below. +* `fmode` Minimum permission mode for extracted files. Defaults to + `0o666`. See "Extracted File Modes" below. +* `dmode` Minimum permission mode for extracted directories. Defaults to + `0o777`. See "Extracted File Modes" below. +* `preferOnline` Prefer to revalidate cache entries, even when it would not + be strictly necessary. Default `false`. +* `before` When picking a manifest from a packument, only consider + packages published before the specified date. Default `null`. +* `defaultTag` The default `dist-tag` to use when choosing a manifest from a + packument. Defaults to `latest`. +* `registry` The npm registry to use by default. Defaults to + `https://registry.npmjs.org/`. +* `fullMetadata` Fetch the full metadata from the registry for packuments, + including information not strictly required for installation (author, + description, etc.) Defaults to `true` when `before` is set, since the + version publish time is part of the extended packument metadata. +* `fullReadJson` Use the slower `read-package-json` package insted of + `read-package-json-fast` in order to include extra fields like "readme" in + the manifest. Defaults to `false`. +* `packumentCache` For registry packuments only, you may provide a `Map` + object which will be used to cache packument requests between pacote + calls. This allows you to easily avoid hitting the registry multiple + times (even just to validate the cache) for a given packument, since it + is unlikely to change in the span of a single command. +* `verifySignatures` A boolean that will make pacote verify the + integrity signature of a manifest, if present. There must be a + configured `_keys` entry in the config that is scoped to the + registry the manifest is being fetched from. +* `verifyAttestations` A boolean that will make pacote verify Sigstore + attestations, if present. There must be a configured `_keys` entry in the + config that is scoped to the registry the manifest is being fetched from. +* `tufCache` Where to store metadata/target files when retrieving the package + attestation key material via TUF. Defaults to the same cache directory that + npm will use by default, based on platform and environment. + +### Advanced API + +Each different type of fetcher is exposed for more advanced usage such as +using helper methods from this classes: + +* `DirFetcher` +* `FileFetcher` +* `GitFetcher` +* `RegistryFetcher` +* `RemoteFetcher` + +## Extracted File Modes + +Files are extracted with a mode matching the following formula: + +``` +( (tarball entry mode value) | (minimum mode option) ) ~ (umask) +``` + +This is in order to prevent unreadable files or unlistable directories from +cluttering a project's `node_modules` folder, even if the package tarball +specifies that the file should be inaccessible. + +It also prevents files from being group- or world-writable without explicit +opt-in by the user, because all file and directory modes are masked against +the `umask` value. + +So, a file which is `0o771` in the tarball, using the default `fmode` of +`0o666` and `umask` of `0o22`, will result in a file mode of `0o755`: + +``` +(0o771 | 0o666) => 0o777 +(0o777 ~ 0o22) => 0o755 +``` + +In almost every case, the defaults are appropriate. To respect exactly +what is in the package tarball (even if this makes an unusable system), set +both `dmode` and `fmode` options to `0`. Otherwise, the `umask` config +should be used in most cases where file mode modifications are required, +and this functions more or less the same as the `umask` value in most Unix +systems. + +## Extracted File Ownership + +When running as `root` on Unix systems, all extracted files and folders +will have their owning `uid` and `gid` values set to match the ownership +of the containing folder. + +This prevents `root`-owned files showing up in a project's `node_modules` +folder when a user runs `sudo npm install`. + +## Manifests + +A `manifest` is similar to a `package.json` file. However, it has a few +pieces of extra metadata, and sometimes lacks metadata that is inessential +to package installation. + +In addition to the common `package.json` fields, manifests include: + +* `manifest._resolved` The tarball url or file path where the package + artifact can be found. +* `manifest._from` A normalized form of the spec passed in as an argument. +* `manifest._integrity` The integrity value for the package artifact. +* `manifest._id` The canonical spec of this package version: name@version. +* `manifest.dist` Registry manifests (those included in a packument) have a + `dist` object. Only `tarball` is required, though at least one of + `shasum` or `integrity` is almost always present. + + * `tarball` The url to the associated package artifact. (Copied by + Pacote to `manifest._resolved`.) + * `integrity` The integrity SRI string for the artifact. This may not + be present for older packages on the npm registry. (Copied by Pacote + to `manifest._integrity`.) + * `shasum` Legacy integrity value. Hexadecimal-encoded sha1 hash. + (Converted to an SRI string and copied by Pacote to + `manifest._integrity` when `dist.integrity` is not present.) + * `fileCount` Number of files in the tarball. + * `unpackedSize` Size on disk of the package when unpacked. + * `signatures` Signatures of the shasum. Includes the keyid that + correlates to a [`key from the npm + registry`](https://registry.npmjs.org/-/npm/v1/keys) + +## Packuments + +A packument is the top-level package document that lists the set of +manifests for available versions for a package. + +When a packument is fetched with `accept: +application/vnd.npm.install-v1+json` in the HTTP headers, only the most +minimum necessary metadata is returned. Additional metadata is returned +when fetched with only `accept: application/json`. + +For Pacote's purposes, the following fields are relevant: + +* `versions` An object where each key is a version, and each value is the + manifest for that version. +* `dist-tags` An object mapping dist-tags to version numbers. This is how + `foo@latest` gets turned into `foo@1.2.3`. +* `time` In the full packument, an object mapping version numbers to + publication times, for the `opts.before` functionality. + +Pacote adds the following field, regardless of the accept header: + +* `_contentLength` The size of the packument. diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/bin/index.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/bin/index.js new file mode 100755 index 00000000000000..f35b62ca71a537 --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/bin/index.js @@ -0,0 +1,158 @@ +#!/usr/bin/env node + +const run = conf => { + const pacote = require('../') + switch (conf._[0]) { + case 'resolve': + case 'manifest': + case 'packument': + if (conf._[0] === 'resolve' && conf.long) { + return pacote.manifest(conf._[1], conf).then(mani => ({ + resolved: mani._resolved, + integrity: mani._integrity, + from: mani._from, + })) + } + return pacote[conf._[0]](conf._[1], conf) + + case 'tarball': + if (!conf._[2] || conf._[2] === '-') { + return pacote.tarball.stream(conf._[1], stream => { + stream.pipe( + conf.testStdout || + /* istanbul ignore next */ + process.stdout + ) + // make sure it resolves something falsey + return stream.promise().then(() => { + return false + }) + }, conf) + } else { + return pacote.tarball.file(conf._[1], conf._[2], conf) + } + + case 'extract': + return pacote.extract(conf._[1], conf._[2], conf) + + default: /* istanbul ignore next */ { + throw new Error(`bad command: ${conf._[0]}`) + } + } +} + +const version = require('../package.json').version +const usage = () => +`Pacote - The JavaScript Package Handler, v${version} + +Usage: + + pacote resolve + Resolve a specifier and output the fully resolved target + Returns integrity and from if '--long' flag is set. + + pacote manifest + Fetch a manifest and print to stdout + + pacote packument + Fetch a full packument and print to stdout + + pacote tarball [] + Fetch a package tarball and save to + If is missing or '-', the tarball will be streamed to stdout. + + pacote extract + Extract a package to the destination folder. + +Configuration values all match the names of configs passed to npm, or +options passed to Pacote. Additional flags for this executable: + + --long Print an object from 'resolve', including integrity and spec. + --json Print result objects as JSON rather than node's default. + (This is the default if stdout is not a TTY.) + --help -h Print this helpful text. + +For example '--cache=/path/to/folder' will use that folder as the cache. +` + +const shouldJSON = (conf, result) => + conf.json || + !process.stdout.isTTY && + conf.json === undefined && + result && + typeof result === 'object' + +const pretty = (conf, result) => + shouldJSON(conf, result) ? JSON.stringify(result, 0, 2) : result + +let addedLogListener = false +const main = args => { + const conf = parse(args) + if (conf.help || conf.h) { + return console.log(usage()) + } + + if (!addedLogListener) { + process.on('log', console.error) + addedLogListener = true + } + + try { + return run(conf) + .then(result => result && console.log(pretty(conf, result))) + .catch(er => { + console.error(er) + process.exit(1) + }) + } catch (er) { + console.error(er.message) + console.error(usage()) + } +} + +const parseArg = arg => { + const split = arg.slice(2).split('=') + const k = split.shift() + const v = split.join('=') + const no = /^no-/.test(k) && !v + const key = (no ? k.slice(3) : k) + .replace(/^tag$/, 'defaultTag') + .replace(/-([a-z])/g, (_, c) => c.toUpperCase()) + const value = v ? v.replace(/^~/, process.env.HOME) : !no + return { key, value } +} + +const parse = args => { + const conf = { + _: [], + cache: process.env.HOME + '/.npm/_cacache', + } + let dashdash = false + args.forEach(arg => { + if (dashdash) { + conf._.push(arg) + } else if (arg === '--') { + dashdash = true + } else if (arg === '-h') { + conf.help = true + } else if (/^--/.test(arg)) { + const { key, value } = parseArg(arg) + conf[key] = value + } else { + conf._.push(arg) + } + }) + return conf +} + +if (module === require.main) { + main(process.argv.slice(2)) +} else { + module.exports = { + main, + run, + usage, + parseArg, + parse, + } +} diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/dir.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/dir.js new file mode 100644 index 00000000000000..04846eb8a6e221 --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/dir.js @@ -0,0 +1,105 @@ +const { resolve } = require('node:path') +const packlist = require('npm-packlist') +const runScript = require('@npmcli/run-script') +const tar = require('tar') +const { Minipass } = require('minipass') +const Fetcher = require('./fetcher.js') +const FileFetcher = require('./file.js') +const _ = require('./util/protected.js') +const tarCreateOptions = require('./util/tar-create-options.js') + +class DirFetcher extends Fetcher { + constructor (spec, opts) { + super(spec, opts) + // just the fully resolved filename + this.resolved = this.spec.fetchSpec + + this.tree = opts.tree || null + this.Arborist = opts.Arborist || null + } + + // exposes tarCreateOptions as public API + static tarCreateOptions (manifest) { + return tarCreateOptions(manifest) + } + + get types () { + return ['directory'] + } + + #prepareDir () { + return this.manifest().then(mani => { + if (!mani.scripts || !mani.scripts.prepare) { + return + } + if (this.opts.ignoreScripts) { + return + } + + // we *only* run prepare. + // pre/post-pack is run by the npm CLI for publish and pack, + // but this function is *also* run when installing git deps + const stdio = this.opts.foregroundScripts ? 'inherit' : 'pipe' + + return runScript({ + // this || undefined is because runScript will be unhappy with the default null value + scriptShell: this.opts.scriptShell || undefined, + pkg: mani, + event: 'prepare', + path: this.resolved, + stdio, + env: { + npm_package_resolved: this.resolved, + npm_package_integrity: this.integrity, + npm_package_json: resolve(this.resolved, 'package.json'), + }, + }) + }) + } + + [_.tarballFromResolved] () { + if (!this.tree && !this.Arborist) { + throw new Error('DirFetcher requires either a tree or an Arborist constructor to pack') + } + + const stream = new Minipass() + stream.resolved = this.resolved + stream.integrity = this.integrity + + const { prefix, workspaces } = this.opts + + // run the prepare script, get the list of files, and tar it up + // pipe to the stream, and proxy errors the chain. + this.#prepareDir() + .then(async () => { + if (!this.tree) { + const arb = new this.Arborist({ path: this.resolved }) + this.tree = await arb.loadActual() + } + return packlist(this.tree, { path: this.resolved, prefix, workspaces }) + }) + .then(files => tar.c(tarCreateOptions(this.package), files) + .on('error', er => stream.emit('error', er)).pipe(stream)) + .catch(er => stream.emit('error', er)) + return stream + } + + manifest () { + if (this.package) { + return Promise.resolve(this.package) + } + + return this[_.readPackageJson](this.resolved) + .then(mani => this.package = { + ...mani, + _integrity: this.integrity && String(this.integrity), + _resolved: this.resolved, + _from: this.from, + }) + } + + packument () { + return FileFetcher.prototype.packument.apply(this) + } +} +module.exports = DirFetcher diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/fetcher.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/fetcher.js new file mode 100644 index 00000000000000..f2ac97619d3af1 --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/fetcher.js @@ -0,0 +1,497 @@ +// This is the base class that the other fetcher types in lib +// all descend from. +// It handles the unpacking and retry logic that is shared among +// all of the other Fetcher types. + +const { basename, dirname } = require('node:path') +const { rm, mkdir } = require('node:fs/promises') +const PackageJson = require('@npmcli/package-json') +const cacache = require('cacache') +const fsm = require('fs-minipass') +const getContents = require('@npmcli/installed-package-contents') +const npa = require('npm-package-arg') +const retry = require('promise-retry') +const ssri = require('ssri') +const tar = require('tar') +const { Minipass } = require('minipass') +const { log } = require('proc-log') +const _ = require('./util/protected.js') +const cacheDir = require('./util/cache-dir.js') +const isPackageBin = require('./util/is-package-bin.js') +const removeTrailingSlashes = require('./util/trailing-slashes.js') + +// Pacote is only concerned with the package.json contents +const packageJsonPrepare = (p) => PackageJson.prepare(p).then(pkg => pkg.content) +const packageJsonNormalize = (p) => PackageJson.normalize(p).then(pkg => pkg.content) + +class FetcherBase { + constructor (spec, opts) { + if (!opts || typeof opts !== 'object') { + throw new TypeError('options object is required') + } + this.spec = npa(spec, opts.where) + + this.allowGitIgnore = !!opts.allowGitIgnore + + // a bit redundant because presumably the caller already knows this, + // but it makes it easier to not have to keep track of the requested + // spec when we're dispatching thousands of these at once, and normalizing + // is nice. saveSpec is preferred if set, because it turns stuff like + // x/y#committish into github:x/y#committish. use name@rawSpec for + // registry deps so that we turn xyz and xyz@ -> xyz@ + this.from = this.spec.registry + ? `${this.spec.name}@${this.spec.rawSpec}` : this.spec.saveSpec + + this.#assertType() + // clone the opts object so that others aren't upset when we mutate it + // by adding/modifying the integrity value. + this.opts = { ...opts } + + this.cache = opts.cache || cacheDir().cacache + this.tufCache = opts.tufCache || cacheDir().tufcache + this.resolved = opts.resolved || null + + // default to caching/verifying with sha512, that's what we usually have + // need to change this default, or start overriding it, when sha512 + // is no longer strong enough. + this.defaultIntegrityAlgorithm = opts.defaultIntegrityAlgorithm || 'sha512' + + if (typeof opts.integrity === 'string') { + this.opts.integrity = ssri.parse(opts.integrity) + } + + this.package = null + this.type = this.constructor.name + this.fmode = opts.fmode || 0o666 + this.dmode = opts.dmode || 0o777 + // we don't need a default umask, because we don't chmod files coming + // out of package tarballs. they're forced to have a mode that is + // valid, regardless of what's in the tarball entry, and then we let + // the process's umask setting do its job. but if configured, we do + // respect it. + this.umask = opts.umask || 0 + + this.preferOnline = !!opts.preferOnline + this.preferOffline = !!opts.preferOffline + this.offline = !!opts.offline + + this.before = opts.before + this.fullMetadata = this.before ? true : !!opts.fullMetadata + this.fullReadJson = !!opts.fullReadJson + this[_.readPackageJson] = this.fullReadJson + ? packageJsonPrepare + : packageJsonNormalize + + // rrh is a registry hostname or 'never' or 'always' + // defaults to registry.npmjs.org + this.replaceRegistryHost = (!opts.replaceRegistryHost || opts.replaceRegistryHost === 'npmjs') ? + 'registry.npmjs.org' : opts.replaceRegistryHost + + this.defaultTag = opts.defaultTag || 'latest' + this.registry = removeTrailingSlashes(opts.registry || 'https://registry.npmjs.org') + + // command to run 'prepare' scripts on directories and git dirs + // To use pacote with yarn, for example, set npmBin to 'yarn' + // and npmCliConfig with yarn's equivalents. + this.npmBin = opts.npmBin || 'npm' + + // command to install deps for preparing + this.npmInstallCmd = opts.npmInstallCmd || ['install', '--force'] + + // XXX fill more of this in based on what we know from this.opts + // we explicitly DO NOT fill in --tag, though, since we are often + // going to be packing in the context of a publish, which may set + // a dist-tag, but certainly wants to keep defaulting to latest. + this.npmCliConfig = opts.npmCliConfig || [ + `--cache=${dirname(this.cache)}`, + `--prefer-offline=${!!this.preferOffline}`, + `--prefer-online=${!!this.preferOnline}`, + `--offline=${!!this.offline}`, + ...(this.before ? [`--before=${this.before.toISOString()}`] : []), + '--no-progress', + '--no-save', + '--no-audit', + // override any omit settings from the environment + '--include=dev', + '--include=peer', + '--include=optional', + // we need the actual things, not just the lockfile + '--no-package-lock-only', + '--no-dry-run', + ] + } + + get integrity () { + return this.opts.integrity || null + } + + set integrity (i) { + if (!i) { + return + } + + i = ssri.parse(i) + const current = this.opts.integrity + + // do not ever update an existing hash value, but do + // merge in NEW algos and hashes that we don't already have. + if (current) { + current.merge(i) + } else { + this.opts.integrity = i + } + } + + get notImplementedError () { + return new Error('not implemented in this fetcher type: ' + this.type) + } + + // override in child classes + // Returns a Promise that resolves to this.resolved string value + resolve () { + return this.resolved ? Promise.resolve(this.resolved) + : Promise.reject(this.notImplementedError) + } + + packument () { + return Promise.reject(this.notImplementedError) + } + + // override in child class + // returns a manifest containing: + // - name + // - version + // - _resolved + // - _integrity + // - plus whatever else was in there (corgi, full metadata, or pj file) + manifest () { + return Promise.reject(this.notImplementedError) + } + + // private, should be overridden. + // Note that they should *not* calculate or check integrity or cache, + // but *just* return the raw tarball data stream. + [_.tarballFromResolved] () { + throw this.notImplementedError + } + + // public, should not be overridden + tarball () { + return this.tarballStream(stream => stream.concat().then(data => { + data.integrity = this.integrity && String(this.integrity) + data.resolved = this.resolved + data.from = this.from + return data + })) + } + + // private + // Note: cacache will raise a EINTEGRITY error if the integrity doesn't match + #tarballFromCache () { + const startTime = Date.now() + const stream = cacache.get.stream.byDigest(this.cache, this.integrity, this.opts) + const elapsedTime = Date.now() - startTime + // cache is good, so log it as a hit in particular since there was no fetch logged + log.http( + 'cache', + `${this.spec} ${elapsedTime}ms (cache hit)` + ) + return stream + } + + get [_.cacheFetches] () { + return true + } + + #istream (stream) { + // if not caching this, just return it + if (!this.opts.cache || !this[_.cacheFetches]) { + // instead of creating a new integrity stream, we only piggyback on the + // provided stream's events + if (stream.hasIntegrityEmitter) { + stream.on('integrity', i => this.integrity = i) + return stream + } + + const istream = ssri.integrityStream(this.opts) + istream.on('integrity', i => this.integrity = i) + stream.on('error', err => istream.emit('error', err)) + return stream.pipe(istream) + } + + // we have to return a stream that gets ALL the data, and proxies errors, + // but then pipe from the original tarball stream into the cache as well. + // To do this without losing any data, and since the cacache put stream + // is not a passthrough, we have to pipe from the original stream into + // the cache AFTER we pipe into the middleStream. Since the cache stream + // has an asynchronous flush to write its contents to disk, we need to + // defer the middleStream end until the cache stream ends. + const middleStream = new Minipass() + stream.on('error', err => middleStream.emit('error', err)) + stream.pipe(middleStream, { end: false }) + const cstream = cacache.put.stream( + this.opts.cache, + `pacote:tarball:${this.from}`, + this.opts + ) + cstream.on('integrity', i => this.integrity = i) + cstream.on('error', err => stream.emit('error', err)) + stream.pipe(cstream) + + // eslint-disable-next-line promise/catch-or-return + cstream.promise().catch(() => {}).then(() => middleStream.end()) + return middleStream + } + + pickIntegrityAlgorithm () { + return this.integrity ? this.integrity.pickAlgorithm(this.opts) + : this.defaultIntegrityAlgorithm + } + + // TODO: check error class, once those are rolled out to our deps + isDataCorruptionError (er) { + return er.code === 'EINTEGRITY' || er.code === 'Z_DATA_ERROR' + } + + // override the types getter + get types () { + return false + } + + #assertType () { + if (this.types && !this.types.includes(this.spec.type)) { + throw new TypeError(`Wrong spec type (${ + this.spec.type + }) for ${ + this.constructor.name + }. Supported types: ${this.types.join(', ')}`) + } + } + + // We allow ENOENTs from cacache, but not anywhere else. + // An ENOENT trying to read a tgz file, for example, is Right Out. + isRetriableError (er) { + // TODO: check error class, once those are rolled out to our deps + return this.isDataCorruptionError(er) || + er.code === 'ENOENT' || + er.code === 'EISDIR' + } + + // Mostly internal, but has some uses + // Pass in a function which returns a promise + // Function will be called 1 or more times with streams that may fail. + // Retries: + // Function MUST handle errors on the stream by rejecting the promise, + // so that retry logic can pick it up and either retry or fail whatever + // promise it was making (ie, failing extraction, etc.) + // + // The return value of this method is a Promise that resolves the same + // as whatever the streamHandler resolves to. + // + // This should never be overridden by child classes, but it is public. + tarballStream (streamHandler) { + // Only short-circuit via cache if we have everything else we'll need, + // and the user has not expressed a preference for checking online. + + const fromCache = ( + !this.preferOnline && + this.integrity && + this.resolved + ) ? streamHandler(this.#tarballFromCache()).catch(er => { + if (this.isDataCorruptionError(er)) { + log.warn('tarball', `cached data for ${ + this.spec + } (${this.integrity}) seems to be corrupted. Refreshing cache.`) + return this.cleanupCached().then(() => { + throw er + }) + } else { + throw er + } + }) : null + + const fromResolved = er => { + if (er) { + if (!this.isRetriableError(er)) { + throw er + } + log.silly('tarball', `no local data for ${ + this.spec + }. Extracting by manifest.`) + } + return this.resolve().then(() => retry(tryAgain => + streamHandler(this.#istream(this[_.tarballFromResolved]())) + .catch(streamErr => { + // Most likely data integrity. A cache ENOENT error is unlikely + // here, since we're definitely not reading from the cache, but it + // IS possible that the fetch subsystem accessed the cache, and the + // entry got blown away or something. Try one more time to be sure. + if (this.isRetriableError(streamErr)) { + log.warn('tarball', `tarball data for ${ + this.spec + } (${this.integrity}) seems to be corrupted. Trying again.`) + return this.cleanupCached().then(() => tryAgain(streamErr)) + } + throw streamErr + }), { retries: 1, minTimeout: 0, maxTimeout: 0 })) + } + + return fromCache ? fromCache.catch(fromResolved) : fromResolved() + } + + cleanupCached () { + return cacache.rm.content(this.cache, this.integrity, this.opts) + } + + #empty (path) { + return getContents({ path, depth: 1 }).then(contents => Promise.all( + contents.map(entry => rm(entry, { recursive: true, force: true })))) + } + + async #mkdir (dest) { + await this.#empty(dest) + return await mkdir(dest, { recursive: true }) + } + + // extraction is always the same. the only difference is where + // the tarball comes from. + async extract (dest) { + await this.#mkdir(dest) + return this.tarballStream((tarball) => this.#extract(dest, tarball)) + } + + #toFile (dest) { + return this.tarballStream(str => new Promise((res, rej) => { + const writer = new fsm.WriteStream(dest) + str.on('error', er => writer.emit('error', er)) + writer.on('error', er => rej(er)) + writer.on('close', () => res({ + integrity: this.integrity && String(this.integrity), + resolved: this.resolved, + from: this.from, + })) + str.pipe(writer) + })) + } + + // don't use this.#mkdir because we don't want to rimraf anything + async tarballFile (dest) { + const dir = dirname(dest) + await mkdir(dir, { recursive: true }) + return this.#toFile(dest) + } + + #extract (dest, tarball) { + const extractor = tar.x(this.#tarxOptions({ cwd: dest })) + const p = new Promise((resolve, reject) => { + extractor.on('end', () => { + resolve({ + resolved: this.resolved, + integrity: this.integrity && String(this.integrity), + from: this.from, + }) + }) + + extractor.on('error', er => { + log.warn('tar', er.message) + log.silly('tar', er) + reject(er) + }) + + tarball.on('error', er => reject(er)) + }) + + tarball.pipe(extractor) + return p + } + + // always ensure that entries are at least as permissive as our configured + // dmode/fmode, but never more permissive than the umask allows. + #entryMode (path, mode, type) { + const m = /Directory|GNUDumpDir/.test(type) ? this.dmode + : /File$/.test(type) ? this.fmode + : /* istanbul ignore next - should never happen in a pkg */ 0 + + // make sure package bins are executable + const exe = isPackageBin(this.package, path) ? 0o111 : 0 + // always ensure that files are read/writable by the owner + return ((mode | m) & ~this.umask) | exe | 0o600 + } + + #tarxOptions ({ cwd }) { + const sawIgnores = new Set() + return { + cwd, + noChmod: true, + noMtime: true, + filter: (name, entry) => { + if (/Link$/.test(entry.type)) { + return false + } + entry.mode = this.#entryMode(entry.path, entry.mode, entry.type) + // this replicates the npm pack behavior where .gitignore files + // are treated like .npmignore files, but only if a .npmignore + // file is not present. + if (/File$/.test(entry.type)) { + const base = basename(entry.path) + if (base === '.npmignore') { + sawIgnores.add(entry.path) + } else if (base === '.gitignore' && !this.allowGitIgnore) { + // rename, but only if there's not already a .npmignore + const ni = entry.path.replace(/\.gitignore$/, '.npmignore') + if (sawIgnores.has(ni)) { + return false + } + entry.path = ni + } + return true + } + }, + strip: 1, + onwarn: /* istanbul ignore next - we can trust that tar logs */ + (code, msg, data) => { + log.warn('tar', code, msg) + log.silly('tar', code, msg, data) + }, + umask: this.umask, + // always ignore ownership info from tarball metadata + preserveOwner: false, + } + } +} + +module.exports = FetcherBase + +// Child classes +const GitFetcher = require('./git.js') +const RegistryFetcher = require('./registry.js') +const FileFetcher = require('./file.js') +const DirFetcher = require('./dir.js') +const RemoteFetcher = require('./remote.js') + +// Get an appropriate fetcher object from a spec and options +FetcherBase.get = (rawSpec, opts = {}) => { + const spec = npa(rawSpec, opts.where) + switch (spec.type) { + case 'git': + return new GitFetcher(spec, opts) + + case 'remote': + return new RemoteFetcher(spec, opts) + + case 'version': + case 'range': + case 'tag': + case 'alias': + return new RegistryFetcher(spec.subSpec || spec, opts) + + case 'file': + return new FileFetcher(spec, opts) + + case 'directory': + return new DirFetcher(spec, opts) + + default: + throw new TypeError('Unknown spec type: ' + spec.type) + } +} diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/file.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/file.js new file mode 100644 index 00000000000000..2021325085e4f0 --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/file.js @@ -0,0 +1,94 @@ +const { resolve } = require('node:path') +const { stat, chmod } = require('node:fs/promises') +const cacache = require('cacache') +const fsm = require('fs-minipass') +const Fetcher = require('./fetcher.js') +const _ = require('./util/protected.js') + +class FileFetcher extends Fetcher { + constructor (spec, opts) { + super(spec, opts) + // just the fully resolved filename + this.resolved = this.spec.fetchSpec + } + + get types () { + return ['file'] + } + + manifest () { + if (this.package) { + return Promise.resolve(this.package) + } + + // have to unpack the tarball for this. + return cacache.tmp.withTmp(this.cache, this.opts, dir => + this.extract(dir) + .then(() => this[_.readPackageJson](dir)) + .then(mani => this.package = { + ...mani, + _integrity: this.integrity && String(this.integrity), + _resolved: this.resolved, + _from: this.from, + })) + } + + #exeBins (pkg, dest) { + if (!pkg.bin) { + return Promise.resolve() + } + + return Promise.all(Object.keys(pkg.bin).map(async k => { + const script = resolve(dest, pkg.bin[k]) + // Best effort. Ignore errors here, the only result is that + // a bin script is not executable. But if it's missing or + // something, we just leave it for a later stage to trip over + // when we can provide a more useful contextual error. + try { + const st = await stat(script) + const mode = st.mode | 0o111 + if (mode === st.mode) { + return + } + await chmod(script, mode) + } catch { + // Ignore errors here + } + })) + } + + extract (dest) { + // if we've already loaded the manifest, then the super got it. + // but if not, read the unpacked manifest and chmod properly. + return super.extract(dest) + .then(result => this.package ? result + : this[_.readPackageJson](dest).then(pkg => + this.#exeBins(pkg, dest)).then(() => result)) + } + + [_.tarballFromResolved] () { + // create a read stream and return it + return new fsm.ReadStream(this.resolved) + } + + packument () { + // simulate based on manifest + return this.manifest().then(mani => ({ + name: mani.name, + 'dist-tags': { + [this.defaultTag]: mani.version, + }, + versions: { + [mani.version]: { + ...mani, + dist: { + tarball: `file:${this.resolved}`, + integrity: this.integrity && String(this.integrity), + }, + }, + }, + })) + } +} + +module.exports = FileFetcher diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/git.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/git.js new file mode 100644 index 00000000000000..077193a86f026f --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/git.js @@ -0,0 +1,317 @@ +const cacache = require('cacache') +const git = require('@npmcli/git') +const npa = require('npm-package-arg') +const pickManifest = require('npm-pick-manifest') +const { Minipass } = require('minipass') +const { log } = require('proc-log') +const DirFetcher = require('./dir.js') +const Fetcher = require('./fetcher.js') +const FileFetcher = require('./file.js') +const RemoteFetcher = require('./remote.js') +const _ = require('./util/protected.js') +const addGitSha = require('./util/add-git-sha.js') +const npm = require('./util/npm.js') + +const hashre = /^[a-f0-9]{40}$/ + +// get the repository url. +// prefer https if there's auth, since ssh will drop that. +// otherwise, prefer ssh if available (more secure). +// We have to add the git+ back because npa suppresses it. +const repoUrl = (h, opts) => + h.sshurl && !(h.https && h.auth) && addGitPlus(h.sshurl(opts)) || + h.https && addGitPlus(h.https(opts)) + +// add git+ to the url, but only one time. +const addGitPlus = url => url && `git+${url}`.replace(/^(git\+)+/, 'git+') + +class GitFetcher extends Fetcher { + constructor (spec, opts) { + super(spec, opts) + + // we never want to compare integrity for git dependencies: npm/rfcs#525 + if (this.opts.integrity) { + delete this.opts.integrity + log.warn(`skipping integrity check for git dependency ${this.spec.fetchSpec}`) + } + + this.resolvedRef = null + if (this.spec.hosted) { + this.from = this.spec.hosted.shortcut({ noCommittish: false }) + } + + // shortcut: avoid full clone when we can go straight to the tgz + // if we have the full sha and it's a hosted git platform + if (this.spec.gitCommittish && hashre.test(this.spec.gitCommittish)) { + this.resolvedSha = this.spec.gitCommittish + // use hosted.tarball() when we shell to RemoteFetcher later + this.resolved = this.spec.hosted + ? repoUrl(this.spec.hosted, { noCommittish: false }) + : this.spec.rawSpec + } else { + this.resolvedSha = '' + } + + this.Arborist = opts.Arborist || null + } + + // just exposed to make it easier to test all the combinations + static repoUrl (hosted, opts) { + return repoUrl(hosted, opts) + } + + get types () { + return ['git'] + } + + resolve () { + // likely a hosted git repo with a sha, so get the tarball url + // but in general, no reason to resolve() more than necessary! + if (this.resolved) { + return super.resolve() + } + + // fetch the git repo and then look at the current hash + const h = this.spec.hosted + // try to use ssh, fall back to git. + return h + ? this.#resolvedFromHosted(h) + : this.#resolvedFromRepo(this.spec.fetchSpec) + } + + // first try https, since that's faster and passphrase-less for + // public repos, and supports private repos when auth is provided. + // Fall back to SSH to support private repos + // NB: we always store the https url in resolved field if auth + // is present, otherwise ssh if the hosted type provides it + #resolvedFromHosted (hosted) { + return this.#resolvedFromRepo(hosted.https && hosted.https()).catch(er => { + // Throw early since we know pathspec errors will fail again if retried + if (er instanceof git.errors.GitPathspecError) { + throw er + } + const ssh = hosted.sshurl && hosted.sshurl() + // no fallthrough if we can't fall through or have https auth + if (!ssh || hosted.auth) { + throw er + } + return this.#resolvedFromRepo(ssh) + }) + } + + #resolvedFromRepo (gitRemote) { + // XXX make this a custom error class + if (!gitRemote) { + return Promise.reject(new Error(`No git url for ${this.spec}`)) + } + const gitRange = this.spec.gitRange + const name = this.spec.name + return git.revs(gitRemote, this.opts).then(remoteRefs => { + return gitRange ? pickManifest({ + versions: remoteRefs.versions, + 'dist-tags': remoteRefs['dist-tags'], + name, + }, gitRange, this.opts) + : this.spec.gitCommittish ? + remoteRefs.refs[this.spec.gitCommittish] || + remoteRefs.refs[remoteRefs.shas[this.spec.gitCommittish]] + : remoteRefs.refs.HEAD // no git committish, get default head + }).then(revDoc => { + // the committish provided isn't in the rev list + // things like HEAD~3 or @yesterday can land here. + if (!revDoc || !revDoc.sha) { + return this.#resolvedFromClone() + } + + this.resolvedRef = revDoc + this.resolvedSha = revDoc.sha + this.#addGitSha(revDoc.sha) + return this.resolved + }) + } + + #setResolvedWithSha (withSha) { + // we haven't cloned, so a tgz download is still faster + // of course, if it's not a known host, we can't do that. + this.resolved = !this.spec.hosted ? withSha + : repoUrl(npa(withSha).hosted, { noCommittish: false }) + } + + // when we get the git sha, we affix it to our spec to build up + // either a git url with a hash, or a tarball download URL + #addGitSha (sha) { + this.#setResolvedWithSha(addGitSha(this.spec, sha)) + } + + #resolvedFromClone () { + // do a full or shallow clone, then look at the HEAD + // kind of wasteful, but no other option, really + return this.#clone(() => this.resolved) + } + + #prepareDir (dir) { + return this[_.readPackageJson](dir).then(mani => { + // no need if we aren't going to do any preparation. + const scripts = mani.scripts + if (!mani.workspaces && (!scripts || !( + scripts.postinstall || + scripts.build || + scripts.preinstall || + scripts.install || + scripts.prepack || + scripts.prepare))) { + return + } + + // to avoid cases where we have an cycle of git deps that depend + // on one another, we only ever do preparation for one instance + // of a given git dep along the chain of installations. + // Note that this does mean that a dependency MAY in theory end up + // trying to run its prepare script using a dependency that has not + // been properly prepared itself, but that edge case is smaller + // and less hazardous than a fork bomb of npm and git commands. + const noPrepare = !process.env._PACOTE_NO_PREPARE_ ? [] + : process.env._PACOTE_NO_PREPARE_.split('\n') + if (noPrepare.includes(this.resolved)) { + log.info('prepare', 'skip prepare, already seen', this.resolved) + return + } + noPrepare.push(this.resolved) + + // the DirFetcher will do its own preparation to run the prepare scripts + // All we have to do is put the deps in place so that it can succeed. + return npm( + this.npmBin, + [].concat(this.npmInstallCmd).concat(this.npmCliConfig), + dir, + { ...process.env, _PACOTE_NO_PREPARE_: noPrepare.join('\n') }, + { message: 'git dep preparation failed' } + ) + }) + } + + [_.tarballFromResolved] () { + const stream = new Minipass() + stream.resolved = this.resolved + stream.from = this.from + + // check it out and then shell out to the DirFetcher tarball packer + this.#clone(dir => this.#prepareDir(dir) + .then(() => new Promise((res, rej) => { + if (!this.Arborist) { + throw new Error('GitFetcher requires an Arborist constructor to pack a tarball') + } + const df = new DirFetcher(`file:${dir}`, { + ...this.opts, + Arborist: this.Arborist, + resolved: null, + integrity: null, + }) + const dirStream = df[_.tarballFromResolved]() + dirStream.on('error', rej) + dirStream.on('end', res) + dirStream.pipe(stream) + }))).catch( + /* istanbul ignore next: very unlikely and hard to test */ + er => stream.emit('error', er) + ) + return stream + } + + // clone a git repo into a temp folder (or fetch and unpack if possible) + // handler accepts a directory, and returns a promise that resolves + // when we're done with it, at which point, cacache deletes it + // + // TODO: after cloning, create a tarball of the folder, and add to the cache + // with cacache.put.stream(), using a key that's deterministic based on the + // spec and repo, so that we don't ever clone the same thing multiple times. + #clone (handler, tarballOk = true) { + const o = { tmpPrefix: 'git-clone' } + const ref = this.resolvedSha || this.spec.gitCommittish + const h = this.spec.hosted + const resolved = this.resolved + + // can be set manually to false to fall back to actual git clone + tarballOk = tarballOk && + h && resolved === repoUrl(h, { noCommittish: false }) && h.tarball + + return cacache.tmp.withTmp(this.cache, o, async tmp => { + // if we're resolved, and have a tarball url, shell out to RemoteFetcher + if (tarballOk) { + const nameat = this.spec.name ? `${this.spec.name}@` : '' + return new RemoteFetcher(h.tarball({ noCommittish: false }), { + ...this.opts, + allowGitIgnore: true, + pkgid: `git:${nameat}${this.resolved}`, + resolved: this.resolved, + integrity: null, // it'll always be different, if we have one + }).extract(tmp).then(() => handler(tmp), er => { + // fall back to ssh download if tarball fails + if (er.constructor.name.match(/^Http/)) { + return this.#clone(handler, false) + } else { + throw er + } + }) + } + + const sha = await ( + h ? this.#cloneHosted(ref, tmp) + : this.#cloneRepo(this.spec.fetchSpec, ref, tmp) + ) + this.resolvedSha = sha + if (!this.resolved) { + await this.#addGitSha(sha) + } + return handler(tmp) + }) + } + + // first try https, since that's faster and passphrase-less for + // public repos, and supports private repos when auth is provided. + // Fall back to SSH to support private repos + // NB: we always store the https url in resolved field if auth + // is present, otherwise ssh if the hosted type provides it + #cloneHosted (ref, tmp) { + const hosted = this.spec.hosted + return this.#cloneRepo(hosted.https({ noCommittish: true }), ref, tmp) + .catch(er => { + // Throw early since we know pathspec errors will fail again if retried + if (er instanceof git.errors.GitPathspecError) { + throw er + } + const ssh = hosted.sshurl && hosted.sshurl({ noCommittish: true }) + // no fallthrough if we can't fall through or have https auth + if (!ssh || hosted.auth) { + throw er + } + return this.#cloneRepo(ssh, ref, tmp) + }) + } + + #cloneRepo (repo, ref, tmp) { + const { opts, spec } = this + return git.clone(repo, ref, tmp, { ...opts, spec }) + } + + manifest () { + if (this.package) { + return Promise.resolve(this.package) + } + + return this.spec.hosted && this.resolved + ? FileFetcher.prototype.manifest.apply(this) + : this.#clone(dir => + this[_.readPackageJson](dir) + .then(mani => this.package = { + ...mani, + _resolved: this.resolved, + _from: this.from, + })) + } + + packument () { + return FileFetcher.prototype.packument.apply(this) + } +} +module.exports = GitFetcher diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/index.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/index.js new file mode 100644 index 00000000000000..f35314d275d5fd --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/index.js @@ -0,0 +1,23 @@ +const { get } = require('./fetcher.js') +const GitFetcher = require('./git.js') +const RegistryFetcher = require('./registry.js') +const FileFetcher = require('./file.js') +const DirFetcher = require('./dir.js') +const RemoteFetcher = require('./remote.js') + +const tarball = (spec, opts) => get(spec, opts).tarball() +tarball.stream = (spec, handler, opts) => get(spec, opts).tarballStream(handler) +tarball.file = (spec, dest, opts) => get(spec, opts).tarballFile(dest) + +module.exports = { + GitFetcher, + RegistryFetcher, + FileFetcher, + DirFetcher, + RemoteFetcher, + resolve: (spec, opts) => get(spec, opts).resolve(), + extract: (spec, dest, opts) => get(spec, opts).extract(dest), + manifest: (spec, opts) => get(spec, opts).manifest(), + packument: (spec, opts) => get(spec, opts).packument(), + tarball, +} diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/registry.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/registry.js new file mode 100644 index 00000000000000..1ecf4ee1773499 --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/registry.js @@ -0,0 +1,369 @@ +const crypto = require('node:crypto') +const PackageJson = require('@npmcli/package-json') +const pickManifest = require('npm-pick-manifest') +const ssri = require('ssri') +const npa = require('npm-package-arg') +const sigstore = require('sigstore') +const fetch = require('npm-registry-fetch') +const Fetcher = require('./fetcher.js') +const RemoteFetcher = require('./remote.js') +const pacoteVersion = require('../package.json').version +const removeTrailingSlashes = require('./util/trailing-slashes.js') +const _ = require('./util/protected.js') + +// Corgis are cute. 🐕🐶 +const corgiDoc = 'application/vnd.npm.install-v1+json; q=1.0, application/json; q=0.8, */*' +const fullDoc = 'application/json' + +// Some really old packages have no time field in their packument so we need a +// cutoff date. +const MISSING_TIME_CUTOFF = '2015-01-01T00:00:00.000Z' + +class RegistryFetcher extends Fetcher { + #cacheKey + constructor (spec, opts) { + super(spec, opts) + + // you usually don't want to fetch the same packument multiple times in + // the span of a given script or command, no matter how many pacote calls + // are made, so this lets us avoid doing that. It's only relevant for + // registry fetchers, because other types simulate their packument from + // the manifest, which they memoize on this.package, so it's very cheap + // already. + this.packumentCache = this.opts.packumentCache || null + + this.registry = fetch.pickRegistry(spec, opts) + this.packumentUrl = `${removeTrailingSlashes(this.registry)}/${this.spec.escapedName}` + this.#cacheKey = `${this.fullMetadata ? 'full' : 'corgi'}:${this.packumentUrl}` + + const parsed = new URL(this.registry) + const regKey = `//${parsed.host}${parsed.pathname}` + // unlike the nerf-darted auth keys, this one does *not* allow a mismatch + // of trailing slashes. It must match exactly. + if (this.opts[`${regKey}:_keys`]) { + this.registryKeys = this.opts[`${regKey}:_keys`] + } + + // XXX pacote <=9 has some logic to ignore opts.resolved if + // the resolved URL doesn't go to the same registry. + // Consider reproducing that here, to throw away this.resolved + // in that case. + } + + async resolve () { + // fetching the manifest sets resolved and (if present) integrity + await this.manifest() + if (!this.resolved) { + throw Object.assign( + new Error('Invalid package manifest: no `dist.tarball` field'), + { package: this.spec.toString() } + ) + } + return this.resolved + } + + #headers () { + return { + // npm will override UA, but ensure that we always send *something* + 'user-agent': this.opts.userAgent || + `pacote/${pacoteVersion} node/${process.version}`, + ...(this.opts.headers || {}), + 'pacote-version': pacoteVersion, + 'pacote-req-type': 'packument', + 'pacote-pkg-id': `registry:${this.spec.name}`, + accept: this.fullMetadata ? fullDoc : corgiDoc, + } + } + + async packument () { + // note this might be either an in-flight promise for a request, + // or the actual packument, but we never want to make more than + // one request at a time for the same thing regardless. + if (this.packumentCache?.has(this.#cacheKey)) { + return this.packumentCache.get(this.#cacheKey) + } + + // npm-registry-fetch the packument + // set the appropriate header for corgis if fullMetadata isn't set + // return the res.json() promise + try { + const res = await fetch(this.packumentUrl, { + ...this.opts, + headers: this.#headers(), + spec: this.spec, + + // never check integrity for packuments themselves + integrity: null, + }) + const packument = await res.json() + const contentLength = res.headers.get('content-length') + if (contentLength) { + packument._contentLength = Number(contentLength) + } + this.packumentCache?.set(this.#cacheKey, packument) + return packument + } catch (err) { + this.packumentCache?.delete(this.#cacheKey) + if (err.code !== 'E404' || this.fullMetadata) { + throw err + } + // possible that corgis are not supported by this registry + this.fullMetadata = true + return this.packument() + } + } + + async manifest () { + if (this.package) { + return this.package + } + + // When verifying signatures, we need to fetch the full/uncompressed + // packument to get publish time as this is not included in the + // corgi/compressed packument. + if (this.opts.verifySignatures) { + this.fullMetadata = true + } + + const packument = await this.packument() + const steps = PackageJson.normalizeSteps.filter(s => s !== '_attributes') + const mani = await new PackageJson().fromContent(pickManifest(packument, this.spec.fetchSpec, { + ...this.opts, + defaultTag: this.defaultTag, + before: this.before, + })).normalize({ steps }).then(p => p.content) + + /* XXX add ETARGET and E403 revalidation of cached packuments here */ + + // add _time from packument if fetched with fullMetadata + const time = packument.time?.[mani.version] + if (time) { + mani._time = time + } + + // add _resolved and _integrity from dist object + const { dist } = mani + if (dist) { + this.resolved = mani._resolved = dist.tarball + mani._from = this.from + const distIntegrity = dist.integrity ? ssri.parse(dist.integrity) + : dist.shasum ? ssri.fromHex(dist.shasum, 'sha1', { ...this.opts }) + : null + if (distIntegrity) { + if (this.integrity && !this.integrity.match(distIntegrity)) { + // only bork if they have algos in common. + // otherwise we end up breaking if we have saved a sha512 + // previously for the tarball, but the manifest only + // provides a sha1, which is possible for older publishes. + // Otherwise, this is almost certainly a case of holding it + // wrong, and will result in weird or insecure behavior + // later on when building package tree. + for (const algo of Object.keys(this.integrity)) { + if (distIntegrity[algo]) { + throw Object.assign(new Error( + `Integrity checksum failed when using ${algo}: ` + + `wanted ${this.integrity} but got ${distIntegrity}.` + ), { code: 'EINTEGRITY' }) + } + } + } + // made it this far, the integrity is worthwhile. accept it. + // the setter here will take care of merging it into what we already + // had. + this.integrity = distIntegrity + } + } + if (this.integrity) { + mani._integrity = String(this.integrity) + if (dist.signatures) { + if (this.opts.verifySignatures) { + // validate and throw on error, then set _signatures + const message = `${mani._id}:${mani._integrity}` + for (const signature of dist.signatures) { + const publicKey = this.registryKeys && + this.registryKeys.filter(key => (key.keyid === signature.keyid))[0] + if (!publicKey) { + throw Object.assign(new Error( + `${mani._id} has a registry signature with keyid: ${signature.keyid} ` + + 'but no corresponding public key can be found' + ), { code: 'EMISSINGSIGNATUREKEY' }) + } + + const publishedTime = Date.parse(mani._time || MISSING_TIME_CUTOFF) + const validPublicKey = !publicKey.expires || + publishedTime < Date.parse(publicKey.expires) + if (!validPublicKey) { + throw Object.assign(new Error( + `${mani._id} has a registry signature with keyid: ${signature.keyid} ` + + `but the corresponding public key has expired ${publicKey.expires}` + ), { code: 'EEXPIREDSIGNATUREKEY' }) + } + const verifier = crypto.createVerify('SHA256') + verifier.write(message) + verifier.end() + const valid = verifier.verify( + publicKey.pemkey, + signature.sig, + 'base64' + ) + if (!valid) { + throw Object.assign(new Error( + `${mani._id} has an invalid registry signature with ` + + `keyid: ${publicKey.keyid} and signature: ${signature.sig}` + ), { + code: 'EINTEGRITYSIGNATURE', + keyid: publicKey.keyid, + signature: signature.sig, + resolved: mani._resolved, + integrity: mani._integrity, + }) + } + } + mani._signatures = dist.signatures + } else { + mani._signatures = dist.signatures + } + } + + if (dist.attestations) { + if (this.opts.verifyAttestations) { + // Always fetch attestations from the current registry host + const attestationsPath = new URL(dist.attestations.url).pathname + const attestationsUrl = removeTrailingSlashes(this.registry) + attestationsPath + const res = await fetch(attestationsUrl, { + ...this.opts, + // disable integrity check for attestations json payload, we check the + // integrity in the verification steps below + integrity: null, + }) + const { attestations } = await res.json() + const bundles = attestations.map(({ predicateType, bundle }) => { + const statement = JSON.parse( + Buffer.from(bundle.dsseEnvelope.payload, 'base64').toString('utf8') + ) + const keyid = bundle.dsseEnvelope.signatures[0].keyid + const signature = bundle.dsseEnvelope.signatures[0].sig + + return { + predicateType, + bundle, + statement, + keyid, + signature, + } + }) + + const attestationKeyIds = bundles.map((b) => b.keyid).filter((k) => !!k) + const attestationRegistryKeys = (this.registryKeys || []) + .filter(key => attestationKeyIds.includes(key.keyid)) + if (!attestationRegistryKeys.length) { + throw Object.assign(new Error( + `${mani._id} has attestations but no corresponding public key(s) can be found` + ), { code: 'EMISSINGSIGNATUREKEY' }) + } + + for (const { predicateType, bundle, keyid, signature, statement } of bundles) { + const publicKey = attestationRegistryKeys.find(key => key.keyid === keyid) + // Publish attestations have a keyid set and a valid public key must be found + if (keyid) { + if (!publicKey) { + throw Object.assign(new Error( + `${mani._id} has attestations with keyid: ${keyid} ` + + 'but no corresponding public key can be found' + ), { code: 'EMISSINGSIGNATUREKEY' }) + } + + const integratedTime = new Date( + Number( + bundle.verificationMaterial.tlogEntries[0].integratedTime + ) * 1000 + ) + const validPublicKey = !publicKey.expires || + (integratedTime < Date.parse(publicKey.expires)) + if (!validPublicKey) { + throw Object.assign(new Error( + `${mani._id} has attestations with keyid: ${keyid} ` + + `but the corresponding public key has expired ${publicKey.expires}` + ), { code: 'EEXPIREDSIGNATUREKEY' }) + } + } + + const subject = { + name: statement.subject[0].name, + sha512: statement.subject[0].digest.sha512, + } + + // Only type 'version' can be turned into a PURL + const purl = this.spec.type === 'version' ? npa.toPurl(this.spec) : this.spec + // Verify the statement subject matches the package, version + if (subject.name !== purl) { + throw Object.assign(new Error( + `${mani._id} package name and version (PURL): ${purl} ` + + `doesn't match what was signed: ${subject.name}` + ), { code: 'EATTESTATIONSUBJECT' }) + } + + // Verify the statement subject matches the tarball integrity + const integrityHexDigest = ssri.parse(this.integrity).hexDigest() + if (subject.sha512 !== integrityHexDigest) { + throw Object.assign(new Error( + `${mani._id} package integrity (hex digest): ` + + `${integrityHexDigest} ` + + `doesn't match what was signed: ${subject.sha512}` + ), { code: 'EATTESTATIONSUBJECT' }) + } + + try { + // Provenance attestations are signed with a signing certificate + // (including the key) so we don't need to return a public key. + // + // Publish attestations are signed with a keyid so we need to + // specify a public key from the keys endpoint: `registry-host.tld/-/npm/v1/keys` + const options = { + tufCachePath: this.tufCache, + tufForceCache: true, + keySelector: publicKey ? () => publicKey.pemkey : undefined, + } + await sigstore.verify(bundle, options) + } catch (e) { + throw Object.assign(new Error( + `${mani._id} failed to verify attestation: ${e.message}` + ), { + code: 'EATTESTATIONVERIFY', + predicateType, + keyid, + signature, + resolved: mani._resolved, + integrity: mani._integrity, + }) + } + } + mani._attestations = dist.attestations + } else { + mani._attestations = dist.attestations + } + } + } + + this.package = mani + return this.package + } + + [_.tarballFromResolved] () { + // we use a RemoteFetcher to get the actual tarball stream + return new RemoteFetcher(this.resolved, { + ...this.opts, + resolved: this.resolved, + pkgid: `registry:${this.spec.name}@${this.resolved}`, + })[_.tarballFromResolved]() + } + + get types () { + return [ + 'tag', + 'version', + 'range', + ] + } +} +module.exports = RegistryFetcher diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/remote.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/remote.js new file mode 100644 index 00000000000000..bd321e65a1f18a --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/remote.js @@ -0,0 +1,89 @@ +const fetch = require('npm-registry-fetch') +const { Minipass } = require('minipass') +const Fetcher = require('./fetcher.js') +const FileFetcher = require('./file.js') +const _ = require('./util/protected.js') +const pacoteVersion = require('../package.json').version + +class RemoteFetcher extends Fetcher { + constructor (spec, opts) { + super(spec, opts) + this.resolved = this.spec.fetchSpec + const resolvedURL = new URL(this.resolved) + if (this.replaceRegistryHost !== 'never' + && (this.replaceRegistryHost === 'always' + || this.replaceRegistryHost === resolvedURL.host)) { + this.resolved = new URL(resolvedURL.pathname, this.registry).href + } + + // nam is a fermented pork sausage that is good to eat + const nameat = this.spec.name ? `${this.spec.name}@` : '' + this.pkgid = opts.pkgid ? opts.pkgid : `remote:${nameat}${this.resolved}` + } + + // Don't need to cache tarball fetches in pacote, because make-fetch-happen + // will write into cacache anyway. + get [_.cacheFetches] () { + return false + } + + [_.tarballFromResolved] () { + const stream = new Minipass() + stream.hasIntegrityEmitter = true + + const fetchOpts = { + ...this.opts, + headers: this.#headers(), + spec: this.spec, + integrity: this.integrity, + algorithms: [this.pickIntegrityAlgorithm()], + } + + // eslint-disable-next-line promise/always-return + fetch(this.resolved, fetchOpts).then(res => { + res.body.on('error', + /* istanbul ignore next - exceedingly rare and hard to simulate */ + er => stream.emit('error', er) + ) + + res.body.on('integrity', i => { + this.integrity = i + stream.emit('integrity', i) + }) + + res.body.pipe(stream) + }).catch(er => stream.emit('error', er)) + + return stream + } + + #headers () { + return { + // npm will override this, but ensure that we always send *something* + 'user-agent': this.opts.userAgent || + `pacote/${pacoteVersion} node/${process.version}`, + ...(this.opts.headers || {}), + 'pacote-version': pacoteVersion, + 'pacote-req-type': 'tarball', + 'pacote-pkg-id': this.pkgid, + ...(this.integrity ? { 'pacote-integrity': String(this.integrity) } + : {}), + ...(this.opts.headers || {}), + } + } + + get types () { + return ['remote'] + } + + // getting a packument and/or manifest is the same as with a file: spec. + // unpack the tarball stream, and then read from the package.json file. + packument () { + return FileFetcher.prototype.packument.apply(this) + } + + manifest () { + return FileFetcher.prototype.manifest.apply(this) + } +} +module.exports = RemoteFetcher diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/add-git-sha.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/add-git-sha.js new file mode 100644 index 00000000000000..843fe5b600cafa --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/add-git-sha.js @@ -0,0 +1,15 @@ +// add a sha to a git remote url spec +const addGitSha = (spec, sha) => { + if (spec.hosted) { + const h = spec.hosted + const opt = { noCommittish: true } + const base = h.https && h.auth ? h.https(opt) : h.shortcut(opt) + + return `${base}#${sha}` + } else { + // don't use new URL for this, because it doesn't handle scp urls + return spec.rawSpec.replace(/#.*$/, '') + `#${sha}` + } +} + +module.exports = addGitSha diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/cache-dir.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/cache-dir.js new file mode 100644 index 00000000000000..ba5683a7bb5bf3 --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/cache-dir.js @@ -0,0 +1,15 @@ +const { resolve } = require('node:path') +const { tmpdir, homedir } = require('node:os') + +module.exports = (fakePlatform = false) => { + const temp = tmpdir() + const uidOrPid = process.getuid ? process.getuid() : process.pid + const home = homedir() || resolve(temp, 'npm-' + uidOrPid) + const platform = fakePlatform || process.platform + const cacheExtra = platform === 'win32' ? 'npm-cache' : '.npm' + const cacheRoot = (platform === 'win32' && process.env.LOCALAPPDATA) || home + return { + cacache: resolve(cacheRoot, cacheExtra, '_cacache'), + tufcache: resolve(cacheRoot, cacheExtra, '_tuf'), + } +} diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/is-package-bin.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/is-package-bin.js new file mode 100644 index 00000000000000..49a3f73f537ce9 --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/is-package-bin.js @@ -0,0 +1,25 @@ +// Function to determine whether a path is in the package.bin set. +// Used to prevent issues when people publish a package from a +// windows machine, and then install with --no-bin-links. +// +// Note: this is not possible in remote or file fetchers, since +// we don't have the manifest until AFTER we've unpacked. But the +// main use case is registry fetching with git a distant second, +// so that's an acceptable edge case to not handle. + +const binObj = (name, bin) => + typeof bin === 'string' ? { [name]: bin } : bin + +const hasBin = (pkg, path) => { + const bin = binObj(pkg.name, pkg.bin) + const p = path.replace(/^[^\\/]*\//, '') + for (const kv of Object.entries(bin)) { + if (kv[1] === p) { + return true + } + } + return false +} + +module.exports = (pkg, path) => + pkg && pkg.bin ? hasBin(pkg, path) : false diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/npm.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/npm.js new file mode 100644 index 00000000000000..a3005c255565fb --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/npm.js @@ -0,0 +1,14 @@ +// run an npm command +const spawn = require('@npmcli/promise-spawn') + +module.exports = (npmBin, npmCommand, cwd, env, extra) => { + const isJS = npmBin.endsWith('.js') + const cmd = isJS ? process.execPath : npmBin + const args = (isJS ? [npmBin] : []).concat(npmCommand) + // when installing to run the `prepare` script for a git dep, we need + // to ensure that we don't run into a cycle of checking out packages + // in temp directories. this lets us link previously-seen repos that + // are also being prepared. + + return spawn(cmd, args, { cwd, env }, extra) +} diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/protected.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/protected.js new file mode 100644 index 00000000000000..e05203b481e6aa --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/protected.js @@ -0,0 +1,5 @@ +module.exports = { + cacheFetches: Symbol.for('pacote.Fetcher._cacheFetches'), + readPackageJson: Symbol.for('package.Fetcher._readPackageJson'), + tarballFromResolved: Symbol.for('pacote.Fetcher._tarballFromResolved'), +} diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/tar-create-options.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/tar-create-options.js new file mode 100644 index 00000000000000..d070f0f7ba2d4e --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/tar-create-options.js @@ -0,0 +1,31 @@ +const isPackageBin = require('./is-package-bin.js') + +const tarCreateOptions = manifest => ({ + cwd: manifest._resolved, + prefix: 'package/', + portable: true, + gzip: { + // forcing the level to 9 seems to avoid some + // platform specific optimizations that cause + // integrity mismatch errors due to differing + // end results after compression + level: 9, + }, + + // ensure that package bins are always executable + // Note that npm-packlist is already filtering out + // anything that is not a regular file, ignored by + // .npmignore or package.json "files", etc. + filter: (path, stat) => { + if (isPackageBin(manifest, path)) { + stat.mode |= 0o111 + } + return true + }, + + // Provide a specific date in the 1980s for the benefit of zip, + // which is confounded by files dated at the Unix epoch 0. + mtime: new Date('1985-10-26T08:15:00.000Z'), +}) + +module.exports = tarCreateOptions diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/trailing-slashes.js b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/trailing-slashes.js new file mode 100644 index 00000000000000..c50cb6173b92eb --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/lib/util/trailing-slashes.js @@ -0,0 +1,10 @@ +const removeTrailingSlashes = (input) => { + // in order to avoid regexp redos detection + let output = input + while (output.endsWith('/')) { + output = output.slice(0, -1) + } + return output +} + +module.exports = removeTrailingSlashes diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/package.json b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/package.json new file mode 100644 index 00000000000000..335c7a6c87bd3c --- /dev/null +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/node_modules/pacote/package.json @@ -0,0 +1,79 @@ +{ + "name": "pacote", + "version": "20.0.0", + "description": "JavaScript package downloader", + "author": "GitHub Inc.", + "bin": { + "pacote": "bin/index.js" + }, + "license": "ISC", + "main": "lib/index.js", + "scripts": { + "test": "tap", + "snap": "tap", + "lint": "npm run eslint", + "postlint": "template-oss-check", + "lintfix": "npm run eslint -- --fix", + "posttest": "npm run lint", + "template-oss-apply": "template-oss-apply --force", + "eslint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"" + }, + "tap": { + "timeout": 300, + "nyc-arg": [ + "--exclude", + "tap-snapshots/**" + ] + }, + "devDependencies": { + "@npmcli/arborist": "^7.1.0", + "@npmcli/eslint-config": "^5.0.0", + "@npmcli/template-oss": "4.23.3", + "hosted-git-info": "^8.0.0", + "mutate-fs": "^2.1.1", + "nock": "^13.2.4", + "npm-registry-mock": "^1.3.2", + "tap": "^16.0.1" + }, + "files": [ + "bin/", + "lib/" + ], + "keywords": [ + "packages", + "npm", + "git" + ], + "dependencies": { + "@npmcli/git": "^6.0.0", + "@npmcli/installed-package-contents": "^3.0.0", + "@npmcli/package-json": "^6.0.0", + "@npmcli/promise-spawn": "^8.0.0", + "@npmcli/run-script": "^9.0.0", + "cacache": "^19.0.0", + "fs-minipass": "^3.0.0", + "minipass": "^7.0.2", + "npm-package-arg": "^12.0.0", + "npm-packlist": "^9.0.0", + "npm-pick-manifest": "^10.0.0", + "npm-registry-fetch": "^18.0.0", + "proc-log": "^5.0.0", + "promise-retry": "^2.0.1", + "sigstore": "^3.0.0", + "ssri": "^12.0.0", + "tar": "^6.1.11" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/npm/pacote.git" + }, + "templateOSS": { + "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", + "version": "4.23.3", + "windowsCI": false, + "publish": "true" + } +} diff --git a/deps/npm/node_modules/@npmcli/metavuln-calculator/package.json b/deps/npm/node_modules/@npmcli/metavuln-calculator/package.json index d4c3cf54d83ea7..df0b8f2f4faf1c 100644 --- a/deps/npm/node_modules/@npmcli/metavuln-calculator/package.json +++ b/deps/npm/node_modules/@npmcli/metavuln-calculator/package.json @@ -1,6 +1,6 @@ { "name": "@npmcli/metavuln-calculator", - "version": "8.0.0", + "version": "8.0.1", "main": "lib/index.js", "files": [ "bin/", @@ -41,7 +41,7 @@ "dependencies": { "cacache": "^19.0.0", "json-parse-even-better-errors": "^4.0.0", - "pacote": "^19.0.0", + "pacote": "^20.0.0", "proc-log": "^5.0.0", "semver": "^7.3.5" }, diff --git a/deps/npm/node_modules/@npmcli/package-json/lib/index.js b/deps/npm/node_modules/@npmcli/package-json/lib/index.js index f165ee23b75ab9..23f326dd59359f 100644 --- a/deps/npm/node_modules/@npmcli/package-json/lib/index.js +++ b/deps/npm/node_modules/@npmcli/package-json/lib/index.js @@ -7,6 +7,7 @@ const updateScripts = require('./update-scripts.js') const updateWorkspaces = require('./update-workspaces.js') const normalize = require('./normalize.js') const { read, parse } = require('./read-package.js') +const { packageSort } = require('./sort.js') // a list of handy specialized helper functions that take // care of special cases that are handled by the npm cli @@ -230,19 +231,23 @@ class PackageJson { return this } - async save () { + async save ({ sort } = {}) { if (!this.#canSave) { throw new Error('No package.json to save to') } const { [Symbol.for('indent')]: indent, [Symbol.for('newline')]: newline, + ...rest } = this.content const format = indent === undefined ? ' ' : indent const eol = newline === undefined ? '\n' : newline + + const content = sort ? packageSort(rest) : rest + const fileContent = `${ - JSON.stringify(this.content, null, format) + JSON.stringify(content, null, format) }\n` .replace(/\n/g, eol) diff --git a/deps/npm/node_modules/@npmcli/package-json/lib/sort.js b/deps/npm/node_modules/@npmcli/package-json/lib/sort.js new file mode 100644 index 00000000000000..0bd0d5199da583 --- /dev/null +++ b/deps/npm/node_modules/@npmcli/package-json/lib/sort.js @@ -0,0 +1,101 @@ +/** + * arbitrary sort order for package.json largely pulled from: + * https://github.com/keithamus/sort-package-json/blob/main/defaultRules.md + * + * cross checked with: + * https://github.com/npm/types/blob/main/types/index.d.ts#L104 + * https://docs.npmjs.com/cli/configuring-npm/package-json + */ +function packageSort (json) { + const { + name, + version, + private: isPrivate, + description, + keywords, + homepage, + bugs, + repository, + funding, + license, + author, + maintainers, + contributors, + type, + imports, + exports, + main, + browser, + types, + bin, + man, + directories, + files, + workspaces, + scripts, + config, + dependencies, + devDependencies, + peerDependencies, + peerDependenciesMeta, + optionalDependencies, + bundledDependencies, + bundleDependencies, + engines, + os, + cpu, + publishConfig, + devEngines, + licenses, + overrides, + ...rest + } = json + + return { + ...(typeof name !== 'undefined' ? { name } : {}), + ...(typeof version !== 'undefined' ? { version } : {}), + ...(typeof isPrivate !== 'undefined' ? { private: isPrivate } : {}), + ...(typeof description !== 'undefined' ? { description } : {}), + ...(typeof keywords !== 'undefined' ? { keywords } : {}), + ...(typeof homepage !== 'undefined' ? { homepage } : {}), + ...(typeof bugs !== 'undefined' ? { bugs } : {}), + ...(typeof repository !== 'undefined' ? { repository } : {}), + ...(typeof funding !== 'undefined' ? { funding } : {}), + ...(typeof license !== 'undefined' ? { license } : {}), + ...(typeof author !== 'undefined' ? { author } : {}), + ...(typeof maintainers !== 'undefined' ? { maintainers } : {}), + ...(typeof contributors !== 'undefined' ? { contributors } : {}), + ...(typeof type !== 'undefined' ? { type } : {}), + ...(typeof imports !== 'undefined' ? { imports } : {}), + ...(typeof exports !== 'undefined' ? { exports } : {}), + ...(typeof main !== 'undefined' ? { main } : {}), + ...(typeof browser !== 'undefined' ? { browser } : {}), + ...(typeof types !== 'undefined' ? { types } : {}), + ...(typeof bin !== 'undefined' ? { bin } : {}), + ...(typeof man !== 'undefined' ? { man } : {}), + ...(typeof directories !== 'undefined' ? { directories } : {}), + ...(typeof files !== 'undefined' ? { files } : {}), + ...(typeof workspaces !== 'undefined' ? { workspaces } : {}), + ...(typeof scripts !== 'undefined' ? { scripts } : {}), + ...(typeof config !== 'undefined' ? { config } : {}), + ...(typeof dependencies !== 'undefined' ? { dependencies } : {}), + ...(typeof devDependencies !== 'undefined' ? { devDependencies } : {}), + ...(typeof peerDependencies !== 'undefined' ? { peerDependencies } : {}), + ...(typeof peerDependenciesMeta !== 'undefined' ? { peerDependenciesMeta } : {}), + ...(typeof optionalDependencies !== 'undefined' ? { optionalDependencies } : {}), + ...(typeof bundledDependencies !== 'undefined' ? { bundledDependencies } : {}), + ...(typeof bundleDependencies !== 'undefined' ? { bundleDependencies } : {}), + ...(typeof engines !== 'undefined' ? { engines } : {}), + ...(typeof os !== 'undefined' ? { os } : {}), + ...(typeof cpu !== 'undefined' ? { cpu } : {}), + ...(typeof publishConfig !== 'undefined' ? { publishConfig } : {}), + ...(typeof devEngines !== 'undefined' ? { devEngines } : {}), + ...(typeof licenses !== 'undefined' ? { licenses } : {}), + ...(typeof overrides !== 'undefined' ? { overrides } : {}), + ...rest, + } +} + +module.exports = { + packageSort, +} diff --git a/deps/npm/node_modules/@npmcli/package-json/package.json b/deps/npm/node_modules/@npmcli/package-json/package.json index e766e8ccb4bbf5..97070e27d0d22e 100644 --- a/deps/npm/node_modules/@npmcli/package-json/package.json +++ b/deps/npm/node_modules/@npmcli/package-json/package.json @@ -1,7 +1,17 @@ { "name": "@npmcli/package-json", - "version": "6.0.1", + "version": "6.1.0", "description": "Programmatic API to update package.json", + "keywords": [ + "npm", + "oss" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/npm/package-json.git" + }, + "license": "ISC", + "author": "GitHub Inc.", "main": "lib/index.js", "files": [ "bin/", @@ -18,19 +28,6 @@ "template-oss-apply": "template-oss-apply --force", "eslint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"" }, - "keywords": [ - "npm", - "oss" - ], - "author": "GitHub Inc.", - "license": "ISC", - "devDependencies": { - "@npmcli/eslint-config": "^5.0.0", - "@npmcli/template-oss": "4.23.3", - "read-package-json": "^7.0.0", - "read-package-json-fast": "^4.0.0", - "tap": "^16.0.1" - }, "dependencies": { "@npmcli/git": "^6.0.0", "glob": "^10.2.2", @@ -40,16 +37,19 @@ "proc-log": "^5.0.0", "semver": "^7.5.3" }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/package-json.git" + "devDependencies": { + "@npmcli/eslint-config": "^5.0.0", + "@npmcli/template-oss": "4.23.5", + "read-package-json": "^7.0.0", + "read-package-json-fast": "^4.0.0", + "tap": "^16.0.1" }, "engines": { "node": "^18.17.0 || >=20.5.0" }, "templateOSS": { "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.23.3", + "version": "4.23.5", "publish": "true" }, "tap": { diff --git a/deps/npm/node_modules/@npmcli/promise-spawn/lib/index.js b/deps/npm/node_modules/@npmcli/promise-spawn/lib/index.js index e147cb8f9c746f..aa7b55d8f038d4 100644 --- a/deps/npm/node_modules/@npmcli/promise-spawn/lib/index.js +++ b/deps/npm/node_modules/@npmcli/promise-spawn/lib/index.js @@ -131,9 +131,19 @@ const open = (_args, opts = {}, extra = {}) => { let platform = process.platform // process.platform === 'linux' may actually indicate WSL, if that's the case - // we want to treat things as win32 anyway so the host can open the argument + // open the argument with sensible-browser which is pre-installed + // In WSL, set the default browser using, for example, + // export BROWSER="/mnt/c/Program Files (x86)/Google/Chrome/Application/chrome.exe" + // or + // export BROWSER="/mnt/c/Program Files (x86)/Microsoft/Edge/Application/msedge.exe" + // To permanently set the default browser, add the appropriate entry to your shell's + // RC file, e.g. .bashrc or .zshrc. if (platform === 'linux' && os.release().toLowerCase().includes('microsoft')) { - platform = 'win32' + platform = 'wsl' + if (!process.env.BROWSER) { + return Promise.reject( + new Error('Set the BROWSER environment variable to your desired browser.')) + } } let command = options.command @@ -146,6 +156,8 @@ const open = (_args, opts = {}, extra = {}) => { // accidentally interpret the first arg as the title, we stick an empty // string immediately after the start command command = 'start ""' + } else if (platform === 'wsl') { + command = 'sensible-browser' } else if (platform === 'darwin') { command = 'open' } else { diff --git a/deps/npm/node_modules/@npmcli/promise-spawn/package.json b/deps/npm/node_modules/@npmcli/promise-spawn/package.json index 9914063f85156d..f5fb026be50e85 100644 --- a/deps/npm/node_modules/@npmcli/promise-spawn/package.json +++ b/deps/npm/node_modules/@npmcli/promise-spawn/package.json @@ -1,6 +1,6 @@ { "name": "@npmcli/promise-spawn", - "version": "8.0.1", + "version": "8.0.2", "files": [ "bin/", "lib/" @@ -33,7 +33,7 @@ }, "devDependencies": { "@npmcli/eslint-config": "^5.0.0", - "@npmcli/template-oss": "4.23.3", + "@npmcli/template-oss": "4.23.4", "spawk": "^1.7.1", "tap": "^16.0.1" }, @@ -42,7 +42,7 @@ }, "templateOSS": { "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.23.3", + "version": "4.23.4", "publish": true }, "dependencies": { diff --git a/deps/npm/node_modules/@npmcli/run-script/package.json b/deps/npm/node_modules/@npmcli/run-script/package.json index e5fd2fef62e5ce..38a2ac9f87772b 100644 --- a/deps/npm/node_modules/@npmcli/run-script/package.json +++ b/deps/npm/node_modules/@npmcli/run-script/package.json @@ -1,6 +1,6 @@ { "name": "@npmcli/run-script", - "version": "9.0.1", + "version": "9.0.2", "description": "Run a lifecycle script for a package (descendant of npm-lifecycle)", "author": "GitHub Inc.", "license": "ISC", @@ -16,7 +16,7 @@ }, "devDependencies": { "@npmcli/eslint-config": "^5.0.0", - "@npmcli/template-oss": "4.23.3", + "@npmcli/template-oss": "4.23.4", "spawk": "^1.8.1", "tap": "^16.0.1" }, @@ -24,7 +24,7 @@ "@npmcli/node-gyp": "^4.0.0", "@npmcli/package-json": "^6.0.0", "@npmcli/promise-spawn": "^8.0.0", - "node-gyp": "^10.0.0", + "node-gyp": "^11.0.0", "proc-log": "^5.0.0", "which": "^5.0.0" }, @@ -42,7 +42,7 @@ }, "templateOSS": { "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.23.3", + "version": "4.23.4", "publish": "true" }, "tap": { diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/agents.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/agents.js deleted file mode 100644 index c541b93001517e..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/agents.js +++ /dev/null @@ -1,206 +0,0 @@ -'use strict' - -const net = require('net') -const tls = require('tls') -const { once } = require('events') -const timers = require('timers/promises') -const { normalizeOptions, cacheOptions } = require('./options') -const { getProxy, getProxyAgent, proxyCache } = require('./proxy.js') -const Errors = require('./errors.js') -const { Agent: AgentBase } = require('agent-base') - -module.exports = class Agent extends AgentBase { - #options - #timeouts - #proxy - #noProxy - #ProxyAgent - - constructor (options = {}) { - const { timeouts, proxy, noProxy, ...normalizedOptions } = normalizeOptions(options) - - super(normalizedOptions) - - this.#options = normalizedOptions - this.#timeouts = timeouts - - if (proxy) { - this.#proxy = new URL(proxy) - this.#noProxy = noProxy - this.#ProxyAgent = getProxyAgent(proxy) - } - } - - get proxy () { - return this.#proxy ? { url: this.#proxy } : {} - } - - #getProxy (options) { - if (!this.#proxy) { - return - } - - const proxy = getProxy(`${options.protocol}//${options.host}:${options.port}`, { - proxy: this.#proxy, - noProxy: this.#noProxy, - }) - - if (!proxy) { - return - } - - const cacheKey = cacheOptions({ - ...options, - ...this.#options, - timeouts: this.#timeouts, - proxy, - }) - - if (proxyCache.has(cacheKey)) { - return proxyCache.get(cacheKey) - } - - let ProxyAgent = this.#ProxyAgent - if (Array.isArray(ProxyAgent)) { - ProxyAgent = this.isSecureEndpoint(options) ? ProxyAgent[1] : ProxyAgent[0] - } - - const proxyAgent = new ProxyAgent(proxy, { - ...this.#options, - socketOptions: { family: this.#options.family }, - }) - proxyCache.set(cacheKey, proxyAgent) - - return proxyAgent - } - - // takes an array of promises and races them against the connection timeout - // which will throw the necessary error if it is hit. This will return the - // result of the promise race. - async #timeoutConnection ({ promises, options, timeout }, ac = new AbortController()) { - if (timeout) { - const connectionTimeout = timers.setTimeout(timeout, null, { signal: ac.signal }) - .then(() => { - throw new Errors.ConnectionTimeoutError(`${options.host}:${options.port}`) - }).catch((err) => { - if (err.name === 'AbortError') { - return - } - throw err - }) - promises.push(connectionTimeout) - } - - let result - try { - result = await Promise.race(promises) - ac.abort() - } catch (err) { - ac.abort() - throw err - } - return result - } - - async connect (request, options) { - // if the connection does not have its own lookup function - // set, then use the one from our options - options.lookup ??= this.#options.lookup - - let socket - let timeout = this.#timeouts.connection - const isSecureEndpoint = this.isSecureEndpoint(options) - - const proxy = this.#getProxy(options) - if (proxy) { - // some of the proxies will wait for the socket to fully connect before - // returning so we have to await this while also racing it against the - // connection timeout. - const start = Date.now() - socket = await this.#timeoutConnection({ - options, - timeout, - promises: [proxy.connect(request, options)], - }) - // see how much time proxy.connect took and subtract it from - // the timeout - if (timeout) { - timeout = timeout - (Date.now() - start) - } - } else { - socket = (isSecureEndpoint ? tls : net).connect(options) - } - - socket.setKeepAlive(this.keepAlive, this.keepAliveMsecs) - socket.setNoDelay(this.keepAlive) - - const abortController = new AbortController() - const { signal } = abortController - - const connectPromise = socket[isSecureEndpoint ? 'secureConnecting' : 'connecting'] - ? once(socket, isSecureEndpoint ? 'secureConnect' : 'connect', { signal }) - : Promise.resolve() - - await this.#timeoutConnection({ - options, - timeout, - promises: [ - connectPromise, - once(socket, 'error', { signal }).then((err) => { - throw err[0] - }), - ], - }, abortController) - - if (this.#timeouts.idle) { - socket.setTimeout(this.#timeouts.idle, () => { - socket.destroy(new Errors.IdleTimeoutError(`${options.host}:${options.port}`)) - }) - } - - return socket - } - - addRequest (request, options) { - const proxy = this.#getProxy(options) - // it would be better to call proxy.addRequest here but this causes the - // http-proxy-agent to call its super.addRequest which causes the request - // to be added to the agent twice. since we only support 3 agents - // currently (see the required agents in proxy.js) we have manually - // checked that the only public methods we need to call are called in the - // next block. this could change in the future and presumably we would get - // failing tests until we have properly called the necessary methods on - // each of our proxy agents - if (proxy?.setRequestProps) { - proxy.setRequestProps(request, options) - } - - request.setHeader('connection', this.keepAlive ? 'keep-alive' : 'close') - - if (this.#timeouts.response) { - let responseTimeout - request.once('finish', () => { - setTimeout(() => { - request.destroy(new Errors.ResponseTimeoutError(request, this.#proxy)) - }, this.#timeouts.response) - }) - request.once('response', () => { - clearTimeout(responseTimeout) - }) - } - - if (this.#timeouts.transfer) { - let transferTimeout - request.once('response', (res) => { - setTimeout(() => { - res.destroy(new Errors.TransferTimeoutError(request, this.#proxy)) - }, this.#timeouts.transfer) - res.once('close', () => { - clearTimeout(transferTimeout) - }) - }) - } - - return super.addRequest(request, options) - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/dns.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/dns.js deleted file mode 100644 index 3c6946c566d736..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/dns.js +++ /dev/null @@ -1,53 +0,0 @@ -'use strict' - -const { LRUCache } = require('lru-cache') -const dns = require('dns') - -// this is a factory so that each request can have its own opts (i.e. ttl) -// while still sharing the cache across all requests -const cache = new LRUCache({ max: 50 }) - -const getOptions = ({ - family = 0, - hints = dns.ADDRCONFIG, - all = false, - verbatim = undefined, - ttl = 5 * 60 * 1000, - lookup = dns.lookup, -}) => ({ - // hints and lookup are returned since both are top level properties to (net|tls).connect - hints, - lookup: (hostname, ...args) => { - const callback = args.pop() // callback is always last arg - const lookupOptions = args[0] ?? {} - - const options = { - family, - hints, - all, - verbatim, - ...(typeof lookupOptions === 'number' ? { family: lookupOptions } : lookupOptions), - } - - const key = JSON.stringify({ hostname, ...options }) - - if (cache.has(key)) { - const cached = cache.get(key) - return process.nextTick(callback, null, ...cached) - } - - lookup(hostname, options, (err, ...result) => { - if (err) { - return callback(err) - } - - cache.set(key, result, { ttl }) - return callback(null, ...result) - }) - }, -}) - -module.exports = { - cache, - getOptions, -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/errors.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/errors.js deleted file mode 100644 index 70475aec8eb357..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/errors.js +++ /dev/null @@ -1,61 +0,0 @@ -'use strict' - -class InvalidProxyProtocolError extends Error { - constructor (url) { - super(`Invalid protocol \`${url.protocol}\` connecting to proxy \`${url.host}\``) - this.code = 'EINVALIDPROXY' - this.proxy = url - } -} - -class ConnectionTimeoutError extends Error { - constructor (host) { - super(`Timeout connecting to host \`${host}\``) - this.code = 'ECONNECTIONTIMEOUT' - this.host = host - } -} - -class IdleTimeoutError extends Error { - constructor (host) { - super(`Idle timeout reached for host \`${host}\``) - this.code = 'EIDLETIMEOUT' - this.host = host - } -} - -class ResponseTimeoutError extends Error { - constructor (request, proxy) { - let msg = 'Response timeout ' - if (proxy) { - msg += `from proxy \`${proxy.host}\` ` - } - msg += `connecting to host \`${request.host}\`` - super(msg) - this.code = 'ERESPONSETIMEOUT' - this.proxy = proxy - this.request = request - } -} - -class TransferTimeoutError extends Error { - constructor (request, proxy) { - let msg = 'Transfer timeout ' - if (proxy) { - msg += `from proxy \`${proxy.host}\` ` - } - msg += `for \`${request.host}\`` - super(msg) - this.code = 'ETRANSFERTIMEOUT' - this.proxy = proxy - this.request = request - } -} - -module.exports = { - InvalidProxyProtocolError, - ConnectionTimeoutError, - IdleTimeoutError, - ResponseTimeoutError, - TransferTimeoutError, -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/index.js deleted file mode 100644 index b33d6eaef07a21..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/index.js +++ /dev/null @@ -1,56 +0,0 @@ -'use strict' - -const { LRUCache } = require('lru-cache') -const { normalizeOptions, cacheOptions } = require('./options') -const { getProxy, proxyCache } = require('./proxy.js') -const dns = require('./dns.js') -const Agent = require('./agents.js') - -const agentCache = new LRUCache({ max: 20 }) - -const getAgent = (url, { agent, proxy, noProxy, ...options } = {}) => { - // false has meaning so this can't be a simple truthiness check - if (agent != null) { - return agent - } - - url = new URL(url) - - const proxyForUrl = getProxy(url, { proxy, noProxy }) - const normalizedOptions = { - ...normalizeOptions(options), - proxy: proxyForUrl, - } - - const cacheKey = cacheOptions({ - ...normalizedOptions, - secureEndpoint: url.protocol === 'https:', - }) - - if (agentCache.has(cacheKey)) { - return agentCache.get(cacheKey) - } - - const newAgent = new Agent(normalizedOptions) - agentCache.set(cacheKey, newAgent) - - return newAgent -} - -module.exports = { - getAgent, - Agent, - // these are exported for backwards compatability - HttpAgent: Agent, - HttpsAgent: Agent, - cache: { - proxy: proxyCache, - agent: agentCache, - dns: dns.cache, - clear: () => { - proxyCache.clear() - agentCache.clear() - dns.cache.clear() - }, - }, -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/options.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/options.js deleted file mode 100644 index 0bf53f725f0846..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/options.js +++ /dev/null @@ -1,86 +0,0 @@ -'use strict' - -const dns = require('./dns') - -const normalizeOptions = (opts) => { - const family = parseInt(opts.family ?? '0', 10) - const keepAlive = opts.keepAlive ?? true - - const normalized = { - // nodejs http agent options. these are all the defaults - // but kept here to increase the likelihood of cache hits - // https://nodejs.org/api/http.html#new-agentoptions - keepAliveMsecs: keepAlive ? 1000 : undefined, - maxSockets: opts.maxSockets ?? 15, - maxTotalSockets: Infinity, - maxFreeSockets: keepAlive ? 256 : undefined, - scheduling: 'fifo', - // then spread the rest of the options - ...opts, - // we already set these to their defaults that we want - family, - keepAlive, - // our custom timeout options - timeouts: { - // the standard timeout option is mapped to our idle timeout - // and then deleted below - idle: opts.timeout ?? 0, - connection: 0, - response: 0, - transfer: 0, - ...opts.timeouts, - }, - // get the dns options that go at the top level of socket connection - ...dns.getOptions({ family, ...opts.dns }), - } - - // remove timeout since we already used it to set our own idle timeout - delete normalized.timeout - - return normalized -} - -const createKey = (obj) => { - let key = '' - const sorted = Object.entries(obj).sort((a, b) => a[0] - b[0]) - for (let [k, v] of sorted) { - if (v == null) { - v = 'null' - } else if (v instanceof URL) { - v = v.toString() - } else if (typeof v === 'object') { - v = createKey(v) - } - key += `${k}:${v}:` - } - return key -} - -const cacheOptions = ({ secureEndpoint, ...options }) => createKey({ - secureEndpoint: !!secureEndpoint, - // socket connect options - family: options.family, - hints: options.hints, - localAddress: options.localAddress, - // tls specific connect options - strictSsl: secureEndpoint ? !!options.rejectUnauthorized : false, - ca: secureEndpoint ? options.ca : null, - cert: secureEndpoint ? options.cert : null, - key: secureEndpoint ? options.key : null, - // http agent options - keepAlive: options.keepAlive, - keepAliveMsecs: options.keepAliveMsecs, - maxSockets: options.maxSockets, - maxTotalSockets: options.maxTotalSockets, - maxFreeSockets: options.maxFreeSockets, - scheduling: options.scheduling, - // timeout options - timeouts: options.timeouts, - // proxy - proxy: options.proxy, -}) - -module.exports = { - normalizeOptions, - cacheOptions, -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/proxy.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/proxy.js deleted file mode 100644 index 6272e929e57bcf..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/lib/proxy.js +++ /dev/null @@ -1,88 +0,0 @@ -'use strict' - -const { HttpProxyAgent } = require('http-proxy-agent') -const { HttpsProxyAgent } = require('https-proxy-agent') -const { SocksProxyAgent } = require('socks-proxy-agent') -const { LRUCache } = require('lru-cache') -const { InvalidProxyProtocolError } = require('./errors.js') - -const PROXY_CACHE = new LRUCache({ max: 20 }) - -const SOCKS_PROTOCOLS = new Set(SocksProxyAgent.protocols) - -const PROXY_ENV_KEYS = new Set(['https_proxy', 'http_proxy', 'proxy', 'no_proxy']) - -const PROXY_ENV = Object.entries(process.env).reduce((acc, [key, value]) => { - key = key.toLowerCase() - if (PROXY_ENV_KEYS.has(key)) { - acc[key] = value - } - return acc -}, {}) - -const getProxyAgent = (url) => { - url = new URL(url) - - const protocol = url.protocol.slice(0, -1) - if (SOCKS_PROTOCOLS.has(protocol)) { - return SocksProxyAgent - } - if (protocol === 'https' || protocol === 'http') { - return [HttpProxyAgent, HttpsProxyAgent] - } - - throw new InvalidProxyProtocolError(url) -} - -const isNoProxy = (url, noProxy) => { - if (typeof noProxy === 'string') { - noProxy = noProxy.split(',').map((p) => p.trim()).filter(Boolean) - } - - if (!noProxy || !noProxy.length) { - return false - } - - const hostSegments = url.hostname.split('.').reverse() - - return noProxy.some((no) => { - const noSegments = no.split('.').filter(Boolean).reverse() - if (!noSegments.length) { - return false - } - - for (let i = 0; i < noSegments.length; i++) { - if (hostSegments[i] !== noSegments[i]) { - return false - } - } - - return true - }) -} - -const getProxy = (url, { proxy, noProxy }) => { - url = new URL(url) - - if (!proxy) { - proxy = url.protocol === 'https:' - ? PROXY_ENV.https_proxy - : PROXY_ENV.https_proxy || PROXY_ENV.http_proxy || PROXY_ENV.proxy - } - - if (!noProxy) { - noProxy = PROXY_ENV.no_proxy - } - - if (!proxy || isNoProxy(url, noProxy)) { - return null - } - - return new URL(proxy) -} - -module.exports = { - getProxyAgent, - getProxy, - proxyCache: PROXY_CACHE, -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/package.json b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/package.json deleted file mode 100644 index ef5b4e3228cc46..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/agent/package.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "name": "@npmcli/agent", - "version": "2.2.2", - "description": "the http/https agent used by the npm cli", - "main": "lib/index.js", - "scripts": { - "gencerts": "bash scripts/create-cert.sh", - "test": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "author": "GitHub Inc.", - "license": "ISC", - "bugs": { - "url": "https://github.com/npm/agent/issues" - }, - "homepage": "https://github.com/npm/agent#readme", - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.21.3", - "publish": "true" - }, - "dependencies": { - "agent-base": "^7.1.0", - "http-proxy-agent": "^7.0.0", - "https-proxy-agent": "^7.0.1", - "lru-cache": "^10.0.1", - "socks-proxy-agent": "^8.0.3" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.21.3", - "minipass-fetch": "^3.0.3", - "nock": "^13.2.7", - "semver": "^7.5.4", - "simple-socks": "^3.1.0", - "tap": "^16.3.0" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/agent.git" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/LICENSE.md b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/LICENSE.md deleted file mode 100644 index 5fc208ff122e08..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/LICENSE.md +++ /dev/null @@ -1,20 +0,0 @@ - - -ISC License - -Copyright npm, Inc. - -Permission to use, copy, modify, and/or distribute this -software for any purpose with or without fee is hereby -granted, provided that the above copyright notice and this -permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND NPM DISCLAIMS ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO -EVENT SHALL NPM BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/common/get-options.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/common/get-options.js deleted file mode 100644 index cb5982f79077ac..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/common/get-options.js +++ /dev/null @@ -1,20 +0,0 @@ -// given an input that may or may not be an object, return an object that has -// a copy of every defined property listed in 'copy'. if the input is not an -// object, assign it to the property named by 'wrap' -const getOptions = (input, { copy, wrap }) => { - const result = {} - - if (input && typeof input === 'object') { - for (const prop of copy) { - if (input[prop] !== undefined) { - result[prop] = input[prop] - } - } - } else { - result[wrap] = input - } - - return result -} - -module.exports = getOptions diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/common/node.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/common/node.js deleted file mode 100644 index 4d13bc037359d7..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/common/node.js +++ /dev/null @@ -1,9 +0,0 @@ -const semver = require('semver') - -const satisfies = (range) => { - return semver.satisfies(process.version, range, { includePrerelease: true }) -} - -module.exports = { - satisfies, -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/LICENSE b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/LICENSE deleted file mode 100644 index 93546dfb7655bf..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -(The MIT License) - -Copyright (c) 2011-2017 JP Richardson - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files -(the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, - merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS -OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/errors.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/errors.js deleted file mode 100644 index 1cd1e05d0c533d..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/errors.js +++ /dev/null @@ -1,129 +0,0 @@ -'use strict' -const { inspect } = require('util') - -// adapted from node's internal/errors -// https://github.com/nodejs/node/blob/c8a04049/lib/internal/errors.js - -// close copy of node's internal SystemError class. -class SystemError { - constructor (code, prefix, context) { - // XXX context.code is undefined in all constructors used in cp/polyfill - // that may be a bug copied from node, maybe the constructor should use - // `code` not `errno`? nodejs/node#41104 - let message = `${prefix}: ${context.syscall} returned ` + - `${context.code} (${context.message})` - - if (context.path !== undefined) { - message += ` ${context.path}` - } - if (context.dest !== undefined) { - message += ` => ${context.dest}` - } - - this.code = code - Object.defineProperties(this, { - name: { - value: 'SystemError', - enumerable: false, - writable: true, - configurable: true, - }, - message: { - value: message, - enumerable: false, - writable: true, - configurable: true, - }, - info: { - value: context, - enumerable: true, - configurable: true, - writable: false, - }, - errno: { - get () { - return context.errno - }, - set (value) { - context.errno = value - }, - enumerable: true, - configurable: true, - }, - syscall: { - get () { - return context.syscall - }, - set (value) { - context.syscall = value - }, - enumerable: true, - configurable: true, - }, - }) - - if (context.path !== undefined) { - Object.defineProperty(this, 'path', { - get () { - return context.path - }, - set (value) { - context.path = value - }, - enumerable: true, - configurable: true, - }) - } - - if (context.dest !== undefined) { - Object.defineProperty(this, 'dest', { - get () { - return context.dest - }, - set (value) { - context.dest = value - }, - enumerable: true, - configurable: true, - }) - } - } - - toString () { - return `${this.name} [${this.code}]: ${this.message}` - } - - [Symbol.for('nodejs.util.inspect.custom')] (_recurseTimes, ctx) { - return inspect(this, { - ...ctx, - getters: true, - customInspect: false, - }) - } -} - -function E (code, message) { - module.exports[code] = class NodeError extends SystemError { - constructor (ctx) { - super(code, message, ctx) - } - } -} - -E('ERR_FS_CP_DIR_TO_NON_DIR', 'Cannot overwrite directory with non-directory') -E('ERR_FS_CP_EEXIST', 'Target already exists') -E('ERR_FS_CP_EINVAL', 'Invalid src or dest') -E('ERR_FS_CP_FIFO_PIPE', 'Cannot copy a FIFO pipe') -E('ERR_FS_CP_NON_DIR_TO_DIR', 'Cannot overwrite non-directory with directory') -E('ERR_FS_CP_SOCKET', 'Cannot copy a socket file') -E('ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY', 'Cannot overwrite symlink in subdirectory of self') -E('ERR_FS_CP_UNKNOWN', 'Cannot copy an unknown file type') -E('ERR_FS_EISDIR', 'Path is a directory') - -module.exports.ERR_INVALID_ARG_TYPE = class ERR_INVALID_ARG_TYPE extends Error { - constructor (name, expected, actual) { - super() - this.code = 'ERR_INVALID_ARG_TYPE' - this.message = `The ${name} argument must be ${expected}. Received ${typeof actual}` - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/index.js deleted file mode 100644 index 972ce7aa12abef..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/index.js +++ /dev/null @@ -1,22 +0,0 @@ -const fs = require('fs/promises') -const getOptions = require('../common/get-options.js') -const node = require('../common/node.js') -const polyfill = require('./polyfill.js') - -// node 16.7.0 added fs.cp -const useNative = node.satisfies('>=16.7.0') - -const cp = async (src, dest, opts) => { - const options = getOptions(opts, { - copy: ['dereference', 'errorOnExist', 'filter', 'force', 'preserveTimestamps', 'recursive'], - }) - - // the polyfill is tested separately from this module, no need to hack - // process.version to try to trigger it just for coverage - // istanbul ignore next - return useNative - ? fs.cp(src, dest, options) - : polyfill(src, dest, options) -} - -module.exports = cp diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/polyfill.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/polyfill.js deleted file mode 100644 index 80eb10de971918..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/cp/polyfill.js +++ /dev/null @@ -1,428 +0,0 @@ -// this file is a modified version of the code in node 17.2.0 -// which is, in turn, a modified version of the fs-extra module on npm -// node core changes: -// - Use of the assert module has been replaced with core's error system. -// - All code related to the glob dependency has been removed. -// - Bring your own custom fs module is not currently supported. -// - Some basic code cleanup. -// changes here: -// - remove all callback related code -// - drop sync support -// - change assertions back to non-internal methods (see options.js) -// - throws ENOTDIR when rmdir gets an ENOENT for a path that exists in Windows -'use strict' - -const { - ERR_FS_CP_DIR_TO_NON_DIR, - ERR_FS_CP_EEXIST, - ERR_FS_CP_EINVAL, - ERR_FS_CP_FIFO_PIPE, - ERR_FS_CP_NON_DIR_TO_DIR, - ERR_FS_CP_SOCKET, - ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY, - ERR_FS_CP_UNKNOWN, - ERR_FS_EISDIR, - ERR_INVALID_ARG_TYPE, -} = require('./errors.js') -const { - constants: { - errno: { - EEXIST, - EISDIR, - EINVAL, - ENOTDIR, - }, - }, -} = require('os') -const { - chmod, - copyFile, - lstat, - mkdir, - readdir, - readlink, - stat, - symlink, - unlink, - utimes, -} = require('fs/promises') -const { - dirname, - isAbsolute, - join, - parse, - resolve, - sep, - toNamespacedPath, -} = require('path') -const { fileURLToPath } = require('url') - -const defaultOptions = { - dereference: false, - errorOnExist: false, - filter: undefined, - force: true, - preserveTimestamps: false, - recursive: false, -} - -async function cp (src, dest, opts) { - if (opts != null && typeof opts !== 'object') { - throw new ERR_INVALID_ARG_TYPE('options', ['Object'], opts) - } - return cpFn( - toNamespacedPath(getValidatedPath(src)), - toNamespacedPath(getValidatedPath(dest)), - { ...defaultOptions, ...opts }) -} - -function getValidatedPath (fileURLOrPath) { - const path = fileURLOrPath != null && fileURLOrPath.href - && fileURLOrPath.origin - ? fileURLToPath(fileURLOrPath) - : fileURLOrPath - return path -} - -async function cpFn (src, dest, opts) { - // Warn about using preserveTimestamps on 32-bit node - // istanbul ignore next - if (opts.preserveTimestamps && process.arch === 'ia32') { - const warning = 'Using the preserveTimestamps option in 32-bit ' + - 'node is not recommended' - process.emitWarning(warning, 'TimestampPrecisionWarning') - } - const stats = await checkPaths(src, dest, opts) - const { srcStat, destStat } = stats - await checkParentPaths(src, srcStat, dest) - if (opts.filter) { - return handleFilter(checkParentDir, destStat, src, dest, opts) - } - return checkParentDir(destStat, src, dest, opts) -} - -async function checkPaths (src, dest, opts) { - const { 0: srcStat, 1: destStat } = await getStats(src, dest, opts) - if (destStat) { - if (areIdentical(srcStat, destStat)) { - throw new ERR_FS_CP_EINVAL({ - message: 'src and dest cannot be the same', - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - if (srcStat.isDirectory() && !destStat.isDirectory()) { - throw new ERR_FS_CP_DIR_TO_NON_DIR({ - message: `cannot overwrite directory ${src} ` + - `with non-directory ${dest}`, - path: dest, - syscall: 'cp', - errno: EISDIR, - }) - } - if (!srcStat.isDirectory() && destStat.isDirectory()) { - throw new ERR_FS_CP_NON_DIR_TO_DIR({ - message: `cannot overwrite non-directory ${src} ` + - `with directory ${dest}`, - path: dest, - syscall: 'cp', - errno: ENOTDIR, - }) - } - } - - if (srcStat.isDirectory() && isSrcSubdir(src, dest)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${src} to a subdirectory of self ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - return { srcStat, destStat } -} - -function areIdentical (srcStat, destStat) { - return destStat.ino && destStat.dev && destStat.ino === srcStat.ino && - destStat.dev === srcStat.dev -} - -function getStats (src, dest, opts) { - const statFunc = opts.dereference ? - (file) => stat(file, { bigint: true }) : - (file) => lstat(file, { bigint: true }) - return Promise.all([ - statFunc(src), - statFunc(dest).catch((err) => { - // istanbul ignore next: unsure how to cover. - if (err.code === 'ENOENT') { - return null - } - // istanbul ignore next: unsure how to cover. - throw err - }), - ]) -} - -async function checkParentDir (destStat, src, dest, opts) { - const destParent = dirname(dest) - const dirExists = await pathExists(destParent) - if (dirExists) { - return getStatsForCopy(destStat, src, dest, opts) - } - await mkdir(destParent, { recursive: true }) - return getStatsForCopy(destStat, src, dest, opts) -} - -function pathExists (dest) { - return stat(dest).then( - () => true, - // istanbul ignore next: not sure when this would occur - (err) => (err.code === 'ENOENT' ? false : Promise.reject(err))) -} - -// Recursively check if dest parent is a subdirectory of src. -// It works for all file types including symlinks since it -// checks the src and dest inodes. It starts from the deepest -// parent and stops once it reaches the src parent or the root path. -async function checkParentPaths (src, srcStat, dest) { - const srcParent = resolve(dirname(src)) - const destParent = resolve(dirname(dest)) - if (destParent === srcParent || destParent === parse(destParent).root) { - return - } - let destStat - try { - destStat = await stat(destParent, { bigint: true }) - } catch (err) { - // istanbul ignore else: not sure when this would occur - if (err.code === 'ENOENT') { - return - } - // istanbul ignore next: not sure when this would occur - throw err - } - if (areIdentical(srcStat, destStat)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${src} to a subdirectory of self ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - return checkParentPaths(src, srcStat, destParent) -} - -const normalizePathToArray = (path) => - resolve(path).split(sep).filter(Boolean) - -// Return true if dest is a subdir of src, otherwise false. -// It only checks the path strings. -function isSrcSubdir (src, dest) { - const srcArr = normalizePathToArray(src) - const destArr = normalizePathToArray(dest) - return srcArr.every((cur, i) => destArr[i] === cur) -} - -async function handleFilter (onInclude, destStat, src, dest, opts, cb) { - const include = await opts.filter(src, dest) - if (include) { - return onInclude(destStat, src, dest, opts, cb) - } -} - -function startCopy (destStat, src, dest, opts) { - if (opts.filter) { - return handleFilter(getStatsForCopy, destStat, src, dest, opts) - } - return getStatsForCopy(destStat, src, dest, opts) -} - -async function getStatsForCopy (destStat, src, dest, opts) { - const statFn = opts.dereference ? stat : lstat - const srcStat = await statFn(src) - // istanbul ignore else: can't portably test FIFO - if (srcStat.isDirectory() && opts.recursive) { - return onDir(srcStat, destStat, src, dest, opts) - } else if (srcStat.isDirectory()) { - throw new ERR_FS_EISDIR({ - message: `${src} is a directory (not copied)`, - path: src, - syscall: 'cp', - errno: EINVAL, - }) - } else if (srcStat.isFile() || - srcStat.isCharacterDevice() || - srcStat.isBlockDevice()) { - return onFile(srcStat, destStat, src, dest, opts) - } else if (srcStat.isSymbolicLink()) { - return onLink(destStat, src, dest) - } else if (srcStat.isSocket()) { - throw new ERR_FS_CP_SOCKET({ - message: `cannot copy a socket file: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } else if (srcStat.isFIFO()) { - throw new ERR_FS_CP_FIFO_PIPE({ - message: `cannot copy a FIFO pipe: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - // istanbul ignore next: should be unreachable - throw new ERR_FS_CP_UNKNOWN({ - message: `cannot copy an unknown file type: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) -} - -function onFile (srcStat, destStat, src, dest, opts) { - if (!destStat) { - return _copyFile(srcStat, src, dest, opts) - } - return mayCopyFile(srcStat, src, dest, opts) -} - -async function mayCopyFile (srcStat, src, dest, opts) { - if (opts.force) { - await unlink(dest) - return _copyFile(srcStat, src, dest, opts) - } else if (opts.errorOnExist) { - throw new ERR_FS_CP_EEXIST({ - message: `${dest} already exists`, - path: dest, - syscall: 'cp', - errno: EEXIST, - }) - } -} - -async function _copyFile (srcStat, src, dest, opts) { - await copyFile(src, dest) - if (opts.preserveTimestamps) { - return handleTimestampsAndMode(srcStat.mode, src, dest) - } - return setDestMode(dest, srcStat.mode) -} - -async function handleTimestampsAndMode (srcMode, src, dest) { - // Make sure the file is writable before setting the timestamp - // otherwise open fails with EPERM when invoked with 'r+' - // (through utimes call) - if (fileIsNotWritable(srcMode)) { - await makeFileWritable(dest, srcMode) - return setDestTimestampsAndMode(srcMode, src, dest) - } - return setDestTimestampsAndMode(srcMode, src, dest) -} - -function fileIsNotWritable (srcMode) { - return (srcMode & 0o200) === 0 -} - -function makeFileWritable (dest, srcMode) { - return setDestMode(dest, srcMode | 0o200) -} - -async function setDestTimestampsAndMode (srcMode, src, dest) { - await setDestTimestamps(src, dest) - return setDestMode(dest, srcMode) -} - -function setDestMode (dest, srcMode) { - return chmod(dest, srcMode) -} - -async function setDestTimestamps (src, dest) { - // The initial srcStat.atime cannot be trusted - // because it is modified by the read(2) system call - // (See https://nodejs.org/api/fs.html#fs_stat_time_values) - const updatedSrcStat = await stat(src) - return utimes(dest, updatedSrcStat.atime, updatedSrcStat.mtime) -} - -function onDir (srcStat, destStat, src, dest, opts) { - if (!destStat) { - return mkDirAndCopy(srcStat.mode, src, dest, opts) - } - return copyDir(src, dest, opts) -} - -async function mkDirAndCopy (srcMode, src, dest, opts) { - await mkdir(dest) - await copyDir(src, dest, opts) - return setDestMode(dest, srcMode) -} - -async function copyDir (src, dest, opts) { - const dir = await readdir(src) - for (let i = 0; i < dir.length; i++) { - const item = dir[i] - const srcItem = join(src, item) - const destItem = join(dest, item) - const { destStat } = await checkPaths(srcItem, destItem, opts) - await startCopy(destStat, srcItem, destItem, opts) - } -} - -async function onLink (destStat, src, dest) { - let resolvedSrc = await readlink(src) - if (!isAbsolute(resolvedSrc)) { - resolvedSrc = resolve(dirname(src), resolvedSrc) - } - if (!destStat) { - return symlink(resolvedSrc, dest) - } - let resolvedDest - try { - resolvedDest = await readlink(dest) - } catch (err) { - // Dest exists and is a regular file or directory, - // Windows may throw UNKNOWN error. If dest already exists, - // fs throws error anyway, so no need to guard against it here. - // istanbul ignore next: can only test on windows - if (err.code === 'EINVAL' || err.code === 'UNKNOWN') { - return symlink(resolvedSrc, dest) - } - // istanbul ignore next: should not be possible - throw err - } - if (!isAbsolute(resolvedDest)) { - resolvedDest = resolve(dirname(dest), resolvedDest) - } - if (isSrcSubdir(resolvedSrc, resolvedDest)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${resolvedSrc} to a subdirectory of self ` + - `${resolvedDest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - // Do not copy if src is a subdir of dest since unlinking - // dest in this case would result in removing src contents - // and therefore a broken symlink would be created. - const srcStat = await stat(src) - if (srcStat.isDirectory() && isSrcSubdir(resolvedDest, resolvedSrc)) { - throw new ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY({ - message: `cannot overwrite ${resolvedDest} with ${resolvedSrc}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - return copyLink(resolvedSrc, dest) -} - -async function copyLink (resolvedSrc, dest) { - await unlink(dest) - return symlink(resolvedSrc, dest) -} - -module.exports = cp diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/index.js deleted file mode 100644 index 81c746304cc428..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/index.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict' - -const cp = require('./cp/index.js') -const withTempDir = require('./with-temp-dir.js') -const readdirScoped = require('./readdir-scoped.js') -const moveFile = require('./move-file.js') - -module.exports = { - cp, - withTempDir, - readdirScoped, - moveFile, -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/move-file.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/move-file.js deleted file mode 100644 index d56e06d384659a..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/move-file.js +++ /dev/null @@ -1,78 +0,0 @@ -const { dirname, join, resolve, relative, isAbsolute } = require('path') -const fs = require('fs/promises') - -const pathExists = async path => { - try { - await fs.access(path) - return true - } catch (er) { - return er.code !== 'ENOENT' - } -} - -const moveFile = async (source, destination, options = {}, root = true, symlinks = []) => { - if (!source || !destination) { - throw new TypeError('`source` and `destination` file required') - } - - options = { - overwrite: true, - ...options, - } - - if (!options.overwrite && await pathExists(destination)) { - throw new Error(`The destination file exists: ${destination}`) - } - - await fs.mkdir(dirname(destination), { recursive: true }) - - try { - await fs.rename(source, destination) - } catch (error) { - if (error.code === 'EXDEV' || error.code === 'EPERM') { - const sourceStat = await fs.lstat(source) - if (sourceStat.isDirectory()) { - const files = await fs.readdir(source) - await Promise.all(files.map((file) => - moveFile(join(source, file), join(destination, file), options, false, symlinks) - )) - } else if (sourceStat.isSymbolicLink()) { - symlinks.push({ source, destination }) - } else { - await fs.copyFile(source, destination) - } - } else { - throw error - } - } - - if (root) { - await Promise.all(symlinks.map(async ({ source: symSource, destination: symDestination }) => { - let target = await fs.readlink(symSource) - // junction symlinks in windows will be absolute paths, so we need to - // make sure they point to the symlink destination - if (isAbsolute(target)) { - target = resolve(symDestination, relative(symSource, target)) - } - // try to determine what the actual file is so we can create the correct - // type of symlink in windows - let targetStat = 'file' - try { - targetStat = await fs.stat(resolve(dirname(symSource), target)) - if (targetStat.isDirectory()) { - targetStat = 'junction' - } - } catch { - // targetStat remains 'file' - } - await fs.symlink( - target, - symDestination, - targetStat - ) - })) - await fs.rm(source, { recursive: true, force: true }) - } -} - -module.exports = moveFile diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/readdir-scoped.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/readdir-scoped.js deleted file mode 100644 index cd601dfbe7486b..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/readdir-scoped.js +++ /dev/null @@ -1,20 +0,0 @@ -const { readdir } = require('fs/promises') -const { join } = require('path') - -const readdirScoped = async (dir) => { - const results = [] - - for (const item of await readdir(dir)) { - if (item.startsWith('@')) { - for (const scopedItem of await readdir(join(dir, item))) { - results.push(join(item, scopedItem)) - } - } else { - results.push(item) - } - } - - return results -} - -module.exports = readdirScoped diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/with-temp-dir.js b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/with-temp-dir.js deleted file mode 100644 index 0738ac4f29e1be..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/lib/with-temp-dir.js +++ /dev/null @@ -1,39 +0,0 @@ -const { join, sep } = require('path') - -const getOptions = require('./common/get-options.js') -const { mkdir, mkdtemp, rm } = require('fs/promises') - -// create a temp directory, ensure its permissions match its parent, then call -// the supplied function passing it the path to the directory. clean up after -// the function finishes, whether it throws or not -const withTempDir = async (root, fn, opts) => { - const options = getOptions(opts, { - copy: ['tmpPrefix'], - }) - // create the directory - await mkdir(root, { recursive: true }) - - const target = await mkdtemp(join(`${root}${sep}`, options.tmpPrefix || '')) - let err - let result - - try { - result = await fn(target) - } catch (_err) { - err = _err - } - - try { - await rm(target, { force: true, recursive: true }) - } catch { - // ignore errors - } - - if (err) { - throw err - } - - return result -} - -module.exports = withTempDir diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/package.json b/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/package.json deleted file mode 100644 index 5261a11b78000e..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/@npmcli/fs/package.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "name": "@npmcli/fs", - "version": "3.1.1", - "description": "filesystem utilities for the npm cli", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "snap": "tap", - "test": "tap", - "npmclilint": "npmcli-lint", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "lintfix": "npm run lint -- --fix", - "posttest": "npm run lint", - "postsnap": "npm run lintfix --", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/fs.git" - }, - "keywords": [ - "npm", - "oss" - ], - "author": "GitHub Inc.", - "license": "ISC", - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.0.1" - }, - "dependencies": { - "semver": "^7.3.5" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.22.0" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/LICENSE.md b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/LICENSE.md deleted file mode 100644 index 8d28acf866d932..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/LICENSE.md +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright (c) npm, Inc. - -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted, provided that the -above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE COPYRIGHT HOLDER DISCLAIMS -ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/path.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/path.js deleted file mode 100644 index ad5a76a4f73f26..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/path.js +++ /dev/null @@ -1,29 +0,0 @@ -'use strict' - -const contentVer = require('../../package.json')['cache-version'].content -const hashToSegments = require('../util/hash-to-segments') -const path = require('path') -const ssri = require('ssri') - -// Current format of content file path: -// -// sha512-BaSE64Hex= -> -// ~/.my-cache/content-v2/sha512/ba/da/55deadbeefc0ffee -// -module.exports = contentPath - -function contentPath (cache, integrity) { - const sri = ssri.parse(integrity, { single: true }) - // contentPath is the *strongest* algo given - return path.join( - contentDir(cache), - sri.algorithm, - ...hashToSegments(sri.hexDigest()) - ) -} - -module.exports.contentDir = contentDir - -function contentDir (cache) { - return path.join(cache, `content-v${contentVer}`) -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/read.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/read.js deleted file mode 100644 index 5f6192c3cec566..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/read.js +++ /dev/null @@ -1,165 +0,0 @@ -'use strict' - -const fs = require('fs/promises') -const fsm = require('fs-minipass') -const ssri = require('ssri') -const contentPath = require('./path') -const Pipeline = require('minipass-pipeline') - -module.exports = read - -const MAX_SINGLE_READ_SIZE = 64 * 1024 * 1024 -async function read (cache, integrity, opts = {}) { - const { size } = opts - const { stat, cpath, sri } = await withContentSri(cache, integrity, async (cpath, sri) => { - // get size - const stat = size ? { size } : await fs.stat(cpath) - return { stat, cpath, sri } - }) - - if (stat.size > MAX_SINGLE_READ_SIZE) { - return readPipeline(cpath, stat.size, sri, new Pipeline()).concat() - } - - const data = await fs.readFile(cpath, { encoding: null }) - - if (stat.size !== data.length) { - throw sizeError(stat.size, data.length) - } - - if (!ssri.checkData(data, sri)) { - throw integrityError(sri, cpath) - } - - return data -} - -const readPipeline = (cpath, size, sri, stream) => { - stream.push( - new fsm.ReadStream(cpath, { - size, - readSize: MAX_SINGLE_READ_SIZE, - }), - ssri.integrityStream({ - integrity: sri, - size, - }) - ) - return stream -} - -module.exports.stream = readStream -module.exports.readStream = readStream - -function readStream (cache, integrity, opts = {}) { - const { size } = opts - const stream = new Pipeline() - // Set all this up to run on the stream and then just return the stream - Promise.resolve().then(async () => { - const { stat, cpath, sri } = await withContentSri(cache, integrity, async (cpath, sri) => { - // get size - const stat = size ? { size } : await fs.stat(cpath) - return { stat, cpath, sri } - }) - - return readPipeline(cpath, stat.size, sri, stream) - }).catch(err => stream.emit('error', err)) - - return stream -} - -module.exports.copy = copy - -function copy (cache, integrity, dest) { - return withContentSri(cache, integrity, (cpath) => { - return fs.copyFile(cpath, dest) - }) -} - -module.exports.hasContent = hasContent - -async function hasContent (cache, integrity) { - if (!integrity) { - return false - } - - try { - return await withContentSri(cache, integrity, async (cpath, sri) => { - const stat = await fs.stat(cpath) - return { size: stat.size, sri, stat } - }) - } catch (err) { - if (err.code === 'ENOENT') { - return false - } - - if (err.code === 'EPERM') { - /* istanbul ignore else */ - if (process.platform !== 'win32') { - throw err - } else { - return false - } - } - } -} - -async function withContentSri (cache, integrity, fn) { - const sri = ssri.parse(integrity) - // If `integrity` has multiple entries, pick the first digest - // with available local data. - const algo = sri.pickAlgorithm() - const digests = sri[algo] - - if (digests.length <= 1) { - const cpath = contentPath(cache, digests[0]) - return fn(cpath, digests[0]) - } else { - // Can't use race here because a generic error can happen before - // a ENOENT error, and can happen before a valid result - const results = await Promise.all(digests.map(async (meta) => { - try { - return await withContentSri(cache, meta, fn) - } catch (err) { - if (err.code === 'ENOENT') { - return Object.assign( - new Error('No matching content found for ' + sri.toString()), - { code: 'ENOENT' } - ) - } - return err - } - })) - // Return the first non error if it is found - const result = results.find((r) => !(r instanceof Error)) - if (result) { - return result - } - - // Throw the No matching content found error - const enoentError = results.find((r) => r.code === 'ENOENT') - if (enoentError) { - throw enoentError - } - - // Throw generic error - throw results.find((r) => r instanceof Error) - } -} - -function sizeError (expected, found) { - /* eslint-disable-next-line max-len */ - const err = new Error(`Bad data size: expected inserted data to be ${expected} bytes, but got ${found} instead`) - err.expected = expected - err.found = found - err.code = 'EBADSIZE' - return err -} - -function integrityError (sri, path) { - const err = new Error(`Integrity verification failed for ${sri} (${path})`) - err.code = 'EINTEGRITY' - err.sri = sri - err.path = path - return err -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/rm.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/rm.js deleted file mode 100644 index ce58d679e4cb25..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/rm.js +++ /dev/null @@ -1,18 +0,0 @@ -'use strict' - -const fs = require('fs/promises') -const contentPath = require('./path') -const { hasContent } = require('./read') - -module.exports = rm - -async function rm (cache, integrity) { - const content = await hasContent(cache, integrity) - // ~pretty~ sure we can't end up with a content lacking sri, but be safe - if (content && content.sri) { - await fs.rm(contentPath(cache, content.sri), { recursive: true, force: true }) - return true - } else { - return false - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/write.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/write.js deleted file mode 100644 index e7187abca8788a..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/content/write.js +++ /dev/null @@ -1,206 +0,0 @@ -'use strict' - -const events = require('events') - -const contentPath = require('./path') -const fs = require('fs/promises') -const { moveFile } = require('@npmcli/fs') -const { Minipass } = require('minipass') -const Pipeline = require('minipass-pipeline') -const Flush = require('minipass-flush') -const path = require('path') -const ssri = require('ssri') -const uniqueFilename = require('unique-filename') -const fsm = require('fs-minipass') - -module.exports = write - -// Cache of move operations in process so we don't duplicate -const moveOperations = new Map() - -async function write (cache, data, opts = {}) { - const { algorithms, size, integrity } = opts - - if (typeof size === 'number' && data.length !== size) { - throw sizeError(size, data.length) - } - - const sri = ssri.fromData(data, algorithms ? { algorithms } : {}) - if (integrity && !ssri.checkData(data, integrity, opts)) { - throw checksumError(integrity, sri) - } - - for (const algo in sri) { - const tmp = await makeTmp(cache, opts) - const hash = sri[algo].toString() - try { - await fs.writeFile(tmp.target, data, { flag: 'wx' }) - await moveToDestination(tmp, cache, hash, opts) - } finally { - if (!tmp.moved) { - await fs.rm(tmp.target, { recursive: true, force: true }) - } - } - } - return { integrity: sri, size: data.length } -} - -module.exports.stream = writeStream - -// writes proxied to the 'inputStream' that is passed to the Promise -// 'end' is deferred until content is handled. -class CacacheWriteStream extends Flush { - constructor (cache, opts) { - super() - this.opts = opts - this.cache = cache - this.inputStream = new Minipass() - this.inputStream.on('error', er => this.emit('error', er)) - this.inputStream.on('drain', () => this.emit('drain')) - this.handleContentP = null - } - - write (chunk, encoding, cb) { - if (!this.handleContentP) { - this.handleContentP = handleContent( - this.inputStream, - this.cache, - this.opts - ) - this.handleContentP.catch(error => this.emit('error', error)) - } - return this.inputStream.write(chunk, encoding, cb) - } - - flush (cb) { - this.inputStream.end(() => { - if (!this.handleContentP) { - const e = new Error('Cache input stream was empty') - e.code = 'ENODATA' - // empty streams are probably emitting end right away. - // defer this one tick by rejecting a promise on it. - return Promise.reject(e).catch(cb) - } - // eslint-disable-next-line promise/catch-or-return - this.handleContentP.then( - (res) => { - res.integrity && this.emit('integrity', res.integrity) - // eslint-disable-next-line promise/always-return - res.size !== null && this.emit('size', res.size) - cb() - }, - (er) => cb(er) - ) - }) - } -} - -function writeStream (cache, opts = {}) { - return new CacacheWriteStream(cache, opts) -} - -async function handleContent (inputStream, cache, opts) { - const tmp = await makeTmp(cache, opts) - try { - const res = await pipeToTmp(inputStream, cache, tmp.target, opts) - await moveToDestination( - tmp, - cache, - res.integrity, - opts - ) - return res - } finally { - if (!tmp.moved) { - await fs.rm(tmp.target, { recursive: true, force: true }) - } - } -} - -async function pipeToTmp (inputStream, cache, tmpTarget, opts) { - const outStream = new fsm.WriteStream(tmpTarget, { - flags: 'wx', - }) - - if (opts.integrityEmitter) { - // we need to create these all simultaneously since they can fire in any order - const [integrity, size] = await Promise.all([ - events.once(opts.integrityEmitter, 'integrity').then(res => res[0]), - events.once(opts.integrityEmitter, 'size').then(res => res[0]), - new Pipeline(inputStream, outStream).promise(), - ]) - return { integrity, size } - } - - let integrity - let size - const hashStream = ssri.integrityStream({ - integrity: opts.integrity, - algorithms: opts.algorithms, - size: opts.size, - }) - hashStream.on('integrity', i => { - integrity = i - }) - hashStream.on('size', s => { - size = s - }) - - const pipeline = new Pipeline(inputStream, hashStream, outStream) - await pipeline.promise() - return { integrity, size } -} - -async function makeTmp (cache, opts) { - const tmpTarget = uniqueFilename(path.join(cache, 'tmp'), opts.tmpPrefix) - await fs.mkdir(path.dirname(tmpTarget), { recursive: true }) - return { - target: tmpTarget, - moved: false, - } -} - -async function moveToDestination (tmp, cache, sri) { - const destination = contentPath(cache, sri) - const destDir = path.dirname(destination) - if (moveOperations.has(destination)) { - return moveOperations.get(destination) - } - moveOperations.set( - destination, - fs.mkdir(destDir, { recursive: true }) - .then(async () => { - await moveFile(tmp.target, destination, { overwrite: false }) - tmp.moved = true - return tmp.moved - }) - .catch(err => { - if (!err.message.startsWith('The destination file exists')) { - throw Object.assign(err, { code: 'EEXIST' }) - } - }).finally(() => { - moveOperations.delete(destination) - }) - - ) - return moveOperations.get(destination) -} - -function sizeError (expected, found) { - /* eslint-disable-next-line max-len */ - const err = new Error(`Bad data size: expected inserted data to be ${expected} bytes, but got ${found} instead`) - err.expected = expected - err.found = found - err.code = 'EBADSIZE' - return err -} - -function checksumError (expected, found) { - const err = new Error(`Integrity check failed: - Wanted: ${expected} - Found: ${found}`) - err.code = 'EINTEGRITY' - err.expected = expected - err.found = found - return err -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/entry-index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/entry-index.js deleted file mode 100644 index 89c28f2f257d48..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/entry-index.js +++ /dev/null @@ -1,336 +0,0 @@ -'use strict' - -const crypto = require('crypto') -const { - appendFile, - mkdir, - readFile, - readdir, - rm, - writeFile, -} = require('fs/promises') -const { Minipass } = require('minipass') -const path = require('path') -const ssri = require('ssri') -const uniqueFilename = require('unique-filename') - -const contentPath = require('./content/path') -const hashToSegments = require('./util/hash-to-segments') -const indexV = require('../package.json')['cache-version'].index -const { moveFile } = require('@npmcli/fs') - -const pMap = require('p-map') -const lsStreamConcurrency = 5 - -module.exports.NotFoundError = class NotFoundError extends Error { - constructor (cache, key) { - super(`No cache entry for ${key} found in ${cache}`) - this.code = 'ENOENT' - this.cache = cache - this.key = key - } -} - -module.exports.compact = compact - -async function compact (cache, key, matchFn, opts = {}) { - const bucket = bucketPath(cache, key) - const entries = await bucketEntries(bucket) - const newEntries = [] - // we loop backwards because the bottom-most result is the newest - // since we add new entries with appendFile - for (let i = entries.length - 1; i >= 0; --i) { - const entry = entries[i] - // a null integrity could mean either a delete was appended - // or the user has simply stored an index that does not map - // to any content. we determine if the user wants to keep the - // null integrity based on the validateEntry function passed in options. - // if the integrity is null and no validateEntry is provided, we break - // as we consider the null integrity to be a deletion of everything - // that came before it. - if (entry.integrity === null && !opts.validateEntry) { - break - } - - // if this entry is valid, and it is either the first entry or - // the newEntries array doesn't already include an entry that - // matches this one based on the provided matchFn, then we add - // it to the beginning of our list - if ((!opts.validateEntry || opts.validateEntry(entry) === true) && - (newEntries.length === 0 || - !newEntries.find((oldEntry) => matchFn(oldEntry, entry)))) { - newEntries.unshift(entry) - } - } - - const newIndex = '\n' + newEntries.map((entry) => { - const stringified = JSON.stringify(entry) - const hash = hashEntry(stringified) - return `${hash}\t${stringified}` - }).join('\n') - - const setup = async () => { - const target = uniqueFilename(path.join(cache, 'tmp'), opts.tmpPrefix) - await mkdir(path.dirname(target), { recursive: true }) - return { - target, - moved: false, - } - } - - const teardown = async (tmp) => { - if (!tmp.moved) { - return rm(tmp.target, { recursive: true, force: true }) - } - } - - const write = async (tmp) => { - await writeFile(tmp.target, newIndex, { flag: 'wx' }) - await mkdir(path.dirname(bucket), { recursive: true }) - // we use @npmcli/move-file directly here because we - // want to overwrite the existing file - await moveFile(tmp.target, bucket) - tmp.moved = true - } - - // write the file atomically - const tmp = await setup() - try { - await write(tmp) - } finally { - await teardown(tmp) - } - - // we reverse the list we generated such that the newest - // entries come first in order to make looping through them easier - // the true passed to formatEntry tells it to keep null - // integrity values, if they made it this far it's because - // validateEntry returned true, and as such we should return it - return newEntries.reverse().map((entry) => formatEntry(cache, entry, true)) -} - -module.exports.insert = insert - -async function insert (cache, key, integrity, opts = {}) { - const { metadata, size, time } = opts - const bucket = bucketPath(cache, key) - const entry = { - key, - integrity: integrity && ssri.stringify(integrity), - time: time || Date.now(), - size, - metadata, - } - try { - await mkdir(path.dirname(bucket), { recursive: true }) - const stringified = JSON.stringify(entry) - // NOTE - Cleverness ahoy! - // - // This works because it's tremendously unlikely for an entry to corrupt - // another while still preserving the string length of the JSON in - // question. So, we just slap the length in there and verify it on read. - // - // Thanks to @isaacs for the whiteboarding session that ended up with - // this. - await appendFile(bucket, `\n${hashEntry(stringified)}\t${stringified}`) - } catch (err) { - if (err.code === 'ENOENT') { - return undefined - } - - throw err - } - return formatEntry(cache, entry) -} - -module.exports.find = find - -async function find (cache, key) { - const bucket = bucketPath(cache, key) - try { - const entries = await bucketEntries(bucket) - return entries.reduce((latest, next) => { - if (next && next.key === key) { - return formatEntry(cache, next) - } else { - return latest - } - }, null) - } catch (err) { - if (err.code === 'ENOENT') { - return null - } else { - throw err - } - } -} - -module.exports.delete = del - -function del (cache, key, opts = {}) { - if (!opts.removeFully) { - return insert(cache, key, null, opts) - } - - const bucket = bucketPath(cache, key) - return rm(bucket, { recursive: true, force: true }) -} - -module.exports.lsStream = lsStream - -function lsStream (cache) { - const indexDir = bucketDir(cache) - const stream = new Minipass({ objectMode: true }) - - // Set all this up to run on the stream and then just return the stream - Promise.resolve().then(async () => { - const buckets = await readdirOrEmpty(indexDir) - await pMap(buckets, async (bucket) => { - const bucketPath = path.join(indexDir, bucket) - const subbuckets = await readdirOrEmpty(bucketPath) - await pMap(subbuckets, async (subbucket) => { - const subbucketPath = path.join(bucketPath, subbucket) - - // "/cachename//./*" - const subbucketEntries = await readdirOrEmpty(subbucketPath) - await pMap(subbucketEntries, async (entry) => { - const entryPath = path.join(subbucketPath, entry) - try { - const entries = await bucketEntries(entryPath) - // using a Map here prevents duplicate keys from showing up - // twice, I guess? - const reduced = entries.reduce((acc, entry) => { - acc.set(entry.key, entry) - return acc - }, new Map()) - // reduced is a map of key => entry - for (const entry of reduced.values()) { - const formatted = formatEntry(cache, entry) - if (formatted) { - stream.write(formatted) - } - } - } catch (err) { - if (err.code === 'ENOENT') { - return undefined - } - throw err - } - }, - { concurrency: lsStreamConcurrency }) - }, - { concurrency: lsStreamConcurrency }) - }, - { concurrency: lsStreamConcurrency }) - stream.end() - return stream - }).catch(err => stream.emit('error', err)) - - return stream -} - -module.exports.ls = ls - -async function ls (cache) { - const entries = await lsStream(cache).collect() - return entries.reduce((acc, xs) => { - acc[xs.key] = xs - return acc - }, {}) -} - -module.exports.bucketEntries = bucketEntries - -async function bucketEntries (bucket, filter) { - const data = await readFile(bucket, 'utf8') - return _bucketEntries(data, filter) -} - -function _bucketEntries (data) { - const entries = [] - data.split('\n').forEach((entry) => { - if (!entry) { - return - } - - const pieces = entry.split('\t') - if (!pieces[1] || hashEntry(pieces[1]) !== pieces[0]) { - // Hash is no good! Corruption or malice? Doesn't matter! - // EJECT EJECT - return - } - let obj - try { - obj = JSON.parse(pieces[1]) - } catch (_) { - // eslint-ignore-next-line no-empty-block - } - // coverage disabled here, no need to test with an entry that parses to something falsey - // istanbul ignore else - if (obj) { - entries.push(obj) - } - }) - return entries -} - -module.exports.bucketDir = bucketDir - -function bucketDir (cache) { - return path.join(cache, `index-v${indexV}`) -} - -module.exports.bucketPath = bucketPath - -function bucketPath (cache, key) { - const hashed = hashKey(key) - return path.join.apply( - path, - [bucketDir(cache)].concat(hashToSegments(hashed)) - ) -} - -module.exports.hashKey = hashKey - -function hashKey (key) { - return hash(key, 'sha256') -} - -module.exports.hashEntry = hashEntry - -function hashEntry (str) { - return hash(str, 'sha1') -} - -function hash (str, digest) { - return crypto - .createHash(digest) - .update(str) - .digest('hex') -} - -function formatEntry (cache, entry, keepAll) { - // Treat null digests as deletions. They'll shadow any previous entries. - if (!entry.integrity && !keepAll) { - return null - } - - return { - key: entry.key, - integrity: entry.integrity, - path: entry.integrity ? contentPath(cache, entry.integrity) : undefined, - size: entry.size, - time: entry.time, - metadata: entry.metadata, - } -} - -function readdirOrEmpty (dir) { - return readdir(dir).catch((err) => { - if (err.code === 'ENOENT' || err.code === 'ENOTDIR') { - return [] - } - - throw err - }) -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/get.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/get.js deleted file mode 100644 index 80ec206c7ecaaa..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/get.js +++ /dev/null @@ -1,170 +0,0 @@ -'use strict' - -const Collect = require('minipass-collect') -const { Minipass } = require('minipass') -const Pipeline = require('minipass-pipeline') - -const index = require('./entry-index') -const memo = require('./memoization') -const read = require('./content/read') - -async function getData (cache, key, opts = {}) { - const { integrity, memoize, size } = opts - const memoized = memo.get(cache, key, opts) - if (memoized && memoize !== false) { - return { - metadata: memoized.entry.metadata, - data: memoized.data, - integrity: memoized.entry.integrity, - size: memoized.entry.size, - } - } - - const entry = await index.find(cache, key, opts) - if (!entry) { - throw new index.NotFoundError(cache, key) - } - const data = await read(cache, entry.integrity, { integrity, size }) - if (memoize) { - memo.put(cache, entry, data, opts) - } - - return { - data, - metadata: entry.metadata, - size: entry.size, - integrity: entry.integrity, - } -} -module.exports = getData - -async function getDataByDigest (cache, key, opts = {}) { - const { integrity, memoize, size } = opts - const memoized = memo.get.byDigest(cache, key, opts) - if (memoized && memoize !== false) { - return memoized - } - - const res = await read(cache, key, { integrity, size }) - if (memoize) { - memo.put.byDigest(cache, key, res, opts) - } - return res -} -module.exports.byDigest = getDataByDigest - -const getMemoizedStream = (memoized) => { - const stream = new Minipass() - stream.on('newListener', function (ev, cb) { - ev === 'metadata' && cb(memoized.entry.metadata) - ev === 'integrity' && cb(memoized.entry.integrity) - ev === 'size' && cb(memoized.entry.size) - }) - stream.end(memoized.data) - return stream -} - -function getStream (cache, key, opts = {}) { - const { memoize, size } = opts - const memoized = memo.get(cache, key, opts) - if (memoized && memoize !== false) { - return getMemoizedStream(memoized) - } - - const stream = new Pipeline() - // Set all this up to run on the stream and then just return the stream - Promise.resolve().then(async () => { - const entry = await index.find(cache, key) - if (!entry) { - throw new index.NotFoundError(cache, key) - } - - stream.emit('metadata', entry.metadata) - stream.emit('integrity', entry.integrity) - stream.emit('size', entry.size) - stream.on('newListener', function (ev, cb) { - ev === 'metadata' && cb(entry.metadata) - ev === 'integrity' && cb(entry.integrity) - ev === 'size' && cb(entry.size) - }) - - const src = read.readStream( - cache, - entry.integrity, - { ...opts, size: typeof size !== 'number' ? entry.size : size } - ) - - if (memoize) { - const memoStream = new Collect.PassThrough() - memoStream.on('collect', data => memo.put(cache, entry, data, opts)) - stream.unshift(memoStream) - } - stream.unshift(src) - return stream - }).catch((err) => stream.emit('error', err)) - - return stream -} - -module.exports.stream = getStream - -function getStreamDigest (cache, integrity, opts = {}) { - const { memoize } = opts - const memoized = memo.get.byDigest(cache, integrity, opts) - if (memoized && memoize !== false) { - const stream = new Minipass() - stream.end(memoized) - return stream - } else { - const stream = read.readStream(cache, integrity, opts) - if (!memoize) { - return stream - } - - const memoStream = new Collect.PassThrough() - memoStream.on('collect', data => memo.put.byDigest( - cache, - integrity, - data, - opts - )) - return new Pipeline(stream, memoStream) - } -} - -module.exports.stream.byDigest = getStreamDigest - -function info (cache, key, opts = {}) { - const { memoize } = opts - const memoized = memo.get(cache, key, opts) - if (memoized && memoize !== false) { - return Promise.resolve(memoized.entry) - } else { - return index.find(cache, key) - } -} -module.exports.info = info - -async function copy (cache, key, dest, opts = {}) { - const entry = await index.find(cache, key, opts) - if (!entry) { - throw new index.NotFoundError(cache, key) - } - await read.copy(cache, entry.integrity, dest, opts) - return { - metadata: entry.metadata, - size: entry.size, - integrity: entry.integrity, - } -} - -module.exports.copy = copy - -async function copyByDigest (cache, key, dest, opts = {}) { - await read.copy(cache, key, dest, opts) - return key -} - -module.exports.copy.byDigest = copyByDigest - -module.exports.hasContent = read.hasContent diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/index.js deleted file mode 100644 index c9b0da5f3a271b..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/index.js +++ /dev/null @@ -1,42 +0,0 @@ -'use strict' - -const get = require('./get.js') -const put = require('./put.js') -const rm = require('./rm.js') -const verify = require('./verify.js') -const { clearMemoized } = require('./memoization.js') -const tmp = require('./util/tmp.js') -const index = require('./entry-index.js') - -module.exports.index = {} -module.exports.index.compact = index.compact -module.exports.index.insert = index.insert - -module.exports.ls = index.ls -module.exports.ls.stream = index.lsStream - -module.exports.get = get -module.exports.get.byDigest = get.byDigest -module.exports.get.stream = get.stream -module.exports.get.stream.byDigest = get.stream.byDigest -module.exports.get.copy = get.copy -module.exports.get.copy.byDigest = get.copy.byDigest -module.exports.get.info = get.info -module.exports.get.hasContent = get.hasContent - -module.exports.put = put -module.exports.put.stream = put.stream - -module.exports.rm = rm.entry -module.exports.rm.all = rm.all -module.exports.rm.entry = module.exports.rm -module.exports.rm.content = rm.content - -module.exports.clearMemoized = clearMemoized - -module.exports.tmp = {} -module.exports.tmp.mkdir = tmp.mkdir -module.exports.tmp.withTmp = tmp.withTmp - -module.exports.verify = verify -module.exports.verify.lastRun = verify.lastRun diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/memoization.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/memoization.js deleted file mode 100644 index 2ecc60912e4563..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/memoization.js +++ /dev/null @@ -1,72 +0,0 @@ -'use strict' - -const { LRUCache } = require('lru-cache') - -const MEMOIZED = new LRUCache({ - max: 500, - maxSize: 50 * 1024 * 1024, // 50MB - ttl: 3 * 60 * 1000, // 3 minutes - sizeCalculation: (entry, key) => key.startsWith('key:') ? entry.data.length : entry.length, -}) - -module.exports.clearMemoized = clearMemoized - -function clearMemoized () { - const old = {} - MEMOIZED.forEach((v, k) => { - old[k] = v - }) - MEMOIZED.clear() - return old -} - -module.exports.put = put - -function put (cache, entry, data, opts) { - pickMem(opts).set(`key:${cache}:${entry.key}`, { entry, data }) - putDigest(cache, entry.integrity, data, opts) -} - -module.exports.put.byDigest = putDigest - -function putDigest (cache, integrity, data, opts) { - pickMem(opts).set(`digest:${cache}:${integrity}`, data) -} - -module.exports.get = get - -function get (cache, key, opts) { - return pickMem(opts).get(`key:${cache}:${key}`) -} - -module.exports.get.byDigest = getDigest - -function getDigest (cache, integrity, opts) { - return pickMem(opts).get(`digest:${cache}:${integrity}`) -} - -class ObjProxy { - constructor (obj) { - this.obj = obj - } - - get (key) { - return this.obj[key] - } - - set (key, val) { - this.obj[key] = val - } -} - -function pickMem (opts) { - if (!opts || !opts.memoize) { - return MEMOIZED - } else if (opts.memoize.get && opts.memoize.set) { - return opts.memoize - } else if (typeof opts.memoize === 'object') { - return new ObjProxy(opts.memoize) - } else { - return MEMOIZED - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/put.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/put.js deleted file mode 100644 index 9fc932d5f6dec5..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/put.js +++ /dev/null @@ -1,80 +0,0 @@ -'use strict' - -const index = require('./entry-index') -const memo = require('./memoization') -const write = require('./content/write') -const Flush = require('minipass-flush') -const { PassThrough } = require('minipass-collect') -const Pipeline = require('minipass-pipeline') - -const putOpts = (opts) => ({ - algorithms: ['sha512'], - ...opts, -}) - -module.exports = putData - -async function putData (cache, key, data, opts = {}) { - const { memoize } = opts - opts = putOpts(opts) - const res = await write(cache, data, opts) - const entry = await index.insert(cache, key, res.integrity, { ...opts, size: res.size }) - if (memoize) { - memo.put(cache, entry, data, opts) - } - - return res.integrity -} - -module.exports.stream = putStream - -function putStream (cache, key, opts = {}) { - const { memoize } = opts - opts = putOpts(opts) - let integrity - let size - let error - - let memoData - const pipeline = new Pipeline() - // first item in the pipeline is the memoizer, because we need - // that to end first and get the collected data. - if (memoize) { - const memoizer = new PassThrough().on('collect', data => { - memoData = data - }) - pipeline.push(memoizer) - } - - // contentStream is a write-only, not a passthrough - // no data comes out of it. - const contentStream = write.stream(cache, opts) - .on('integrity', (int) => { - integrity = int - }) - .on('size', (s) => { - size = s - }) - .on('error', (err) => { - error = err - }) - - pipeline.push(contentStream) - - // last but not least, we write the index and emit hash and size, - // and memoize if we're doing that - pipeline.push(new Flush({ - async flush () { - if (!error) { - const entry = await index.insert(cache, key, integrity, { ...opts, size }) - if (memoize && memoData) { - memo.put(cache, entry, memoData, opts) - } - pipeline.emit('integrity', integrity) - pipeline.emit('size', size) - } - }, - })) - - return pipeline -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/rm.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/rm.js deleted file mode 100644 index a94760c7cf2430..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/rm.js +++ /dev/null @@ -1,31 +0,0 @@ -'use strict' - -const { rm } = require('fs/promises') -const glob = require('./util/glob.js') -const index = require('./entry-index') -const memo = require('./memoization') -const path = require('path') -const rmContent = require('./content/rm') - -module.exports = entry -module.exports.entry = entry - -function entry (cache, key, opts) { - memo.clearMemoized() - return index.delete(cache, key, opts) -} - -module.exports.content = content - -function content (cache, integrity) { - memo.clearMemoized() - return rmContent(cache, integrity) -} - -module.exports.all = all - -async function all (cache) { - memo.clearMemoized() - const paths = await glob(path.join(cache, '*(content-*|index-*)'), { silent: true, nosort: true }) - return Promise.all(paths.map((p) => rm(p, { recursive: true, force: true }))) -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/util/glob.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/util/glob.js deleted file mode 100644 index 8500c1c16a429f..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/util/glob.js +++ /dev/null @@ -1,7 +0,0 @@ -'use strict' - -const { glob } = require('glob') -const path = require('path') - -const globify = (pattern) => pattern.split(path.win32.sep).join(path.posix.sep) -module.exports = (path, options) => glob(globify(path), options) diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/util/hash-to-segments.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/util/hash-to-segments.js deleted file mode 100644 index 445599b5038088..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/util/hash-to-segments.js +++ /dev/null @@ -1,7 +0,0 @@ -'use strict' - -module.exports = hashToSegments - -function hashToSegments (hash) { - return [hash.slice(0, 2), hash.slice(2, 4), hash.slice(4)] -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/util/tmp.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/util/tmp.js deleted file mode 100644 index 0bf5302136ebeb..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/util/tmp.js +++ /dev/null @@ -1,26 +0,0 @@ -'use strict' - -const { withTempDir } = require('@npmcli/fs') -const fs = require('fs/promises') -const path = require('path') - -module.exports.mkdir = mktmpdir - -async function mktmpdir (cache, opts = {}) { - const { tmpPrefix } = opts - const tmpDir = path.join(cache, 'tmp') - await fs.mkdir(tmpDir, { recursive: true, owner: 'inherit' }) - // do not use path.join(), it drops the trailing / if tmpPrefix is unset - const target = `${tmpDir}${path.sep}${tmpPrefix || ''}` - return fs.mkdtemp(target, { owner: 'inherit' }) -} - -module.exports.withTmp = withTmp - -function withTmp (cache, opts, cb) { - if (!cb) { - cb = opts - opts = {} - } - return withTempDir(path.join(cache, 'tmp'), cb, opts) -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/verify.js b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/verify.js deleted file mode 100644 index d7423da1295b68..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/lib/verify.js +++ /dev/null @@ -1,257 +0,0 @@ -'use strict' - -const { - mkdir, - readFile, - rm, - stat, - truncate, - writeFile, -} = require('fs/promises') -const pMap = require('p-map') -const contentPath = require('./content/path') -const fsm = require('fs-minipass') -const glob = require('./util/glob.js') -const index = require('./entry-index') -const path = require('path') -const ssri = require('ssri') - -const hasOwnProperty = (obj, key) => - Object.prototype.hasOwnProperty.call(obj, key) - -const verifyOpts = (opts) => ({ - concurrency: 20, - log: { silly () {} }, - ...opts, -}) - -module.exports = verify - -async function verify (cache, opts) { - opts = verifyOpts(opts) - opts.log.silly('verify', 'verifying cache at', cache) - - const steps = [ - markStartTime, - fixPerms, - garbageCollect, - rebuildIndex, - cleanTmp, - writeVerifile, - markEndTime, - ] - - const stats = {} - for (const step of steps) { - const label = step.name - const start = new Date() - const s = await step(cache, opts) - if (s) { - Object.keys(s).forEach((k) => { - stats[k] = s[k] - }) - } - const end = new Date() - if (!stats.runTime) { - stats.runTime = {} - } - stats.runTime[label] = end - start - } - stats.runTime.total = stats.endTime - stats.startTime - opts.log.silly( - 'verify', - 'verification finished for', - cache, - 'in', - `${stats.runTime.total}ms` - ) - return stats -} - -async function markStartTime () { - return { startTime: new Date() } -} - -async function markEndTime () { - return { endTime: new Date() } -} - -async function fixPerms (cache, opts) { - opts.log.silly('verify', 'fixing cache permissions') - await mkdir(cache, { recursive: true }) - return null -} - -// Implements a naive mark-and-sweep tracing garbage collector. -// -// The algorithm is basically as follows: -// 1. Read (and filter) all index entries ("pointers") -// 2. Mark each integrity value as "live" -// 3. Read entire filesystem tree in `content-vX/` dir -// 4. If content is live, verify its checksum and delete it if it fails -// 5. If content is not marked as live, rm it. -// -async function garbageCollect (cache, opts) { - opts.log.silly('verify', 'garbage collecting content') - const indexStream = index.lsStream(cache) - const liveContent = new Set() - indexStream.on('data', (entry) => { - if (opts.filter && !opts.filter(entry)) { - return - } - - // integrity is stringified, re-parse it so we can get each hash - const integrity = ssri.parse(entry.integrity) - for (const algo in integrity) { - liveContent.add(integrity[algo].toString()) - } - }) - await new Promise((resolve, reject) => { - indexStream.on('end', resolve).on('error', reject) - }) - const contentDir = contentPath.contentDir(cache) - const files = await glob(path.join(contentDir, '**'), { - follow: false, - nodir: true, - nosort: true, - }) - const stats = { - verifiedContent: 0, - reclaimedCount: 0, - reclaimedSize: 0, - badContentCount: 0, - keptSize: 0, - } - await pMap( - files, - async (f) => { - const split = f.split(/[/\\]/) - const digest = split.slice(split.length - 3).join('') - const algo = split[split.length - 4] - const integrity = ssri.fromHex(digest, algo) - if (liveContent.has(integrity.toString())) { - const info = await verifyContent(f, integrity) - if (!info.valid) { - stats.reclaimedCount++ - stats.badContentCount++ - stats.reclaimedSize += info.size - } else { - stats.verifiedContent++ - stats.keptSize += info.size - } - } else { - // No entries refer to this content. We can delete. - stats.reclaimedCount++ - const s = await stat(f) - await rm(f, { recursive: true, force: true }) - stats.reclaimedSize += s.size - } - return stats - }, - { concurrency: opts.concurrency } - ) - return stats -} - -async function verifyContent (filepath, sri) { - const contentInfo = {} - try { - const { size } = await stat(filepath) - contentInfo.size = size - contentInfo.valid = true - await ssri.checkStream(new fsm.ReadStream(filepath), sri) - } catch (err) { - if (err.code === 'ENOENT') { - return { size: 0, valid: false } - } - if (err.code !== 'EINTEGRITY') { - throw err - } - - await rm(filepath, { recursive: true, force: true }) - contentInfo.valid = false - } - return contentInfo -} - -async function rebuildIndex (cache, opts) { - opts.log.silly('verify', 'rebuilding index') - const entries = await index.ls(cache) - const stats = { - missingContent: 0, - rejectedEntries: 0, - totalEntries: 0, - } - const buckets = {} - for (const k in entries) { - /* istanbul ignore else */ - if (hasOwnProperty(entries, k)) { - const hashed = index.hashKey(k) - const entry = entries[k] - const excluded = opts.filter && !opts.filter(entry) - excluded && stats.rejectedEntries++ - if (buckets[hashed] && !excluded) { - buckets[hashed].push(entry) - } else if (buckets[hashed] && excluded) { - // skip - } else if (excluded) { - buckets[hashed] = [] - buckets[hashed]._path = index.bucketPath(cache, k) - } else { - buckets[hashed] = [entry] - buckets[hashed]._path = index.bucketPath(cache, k) - } - } - } - await pMap( - Object.keys(buckets), - (key) => { - return rebuildBucket(cache, buckets[key], stats, opts) - }, - { concurrency: opts.concurrency } - ) - return stats -} - -async function rebuildBucket (cache, bucket, stats) { - await truncate(bucket._path) - // This needs to be serialized because cacache explicitly - // lets very racy bucket conflicts clobber each other. - for (const entry of bucket) { - const content = contentPath(cache, entry.integrity) - try { - await stat(content) - await index.insert(cache, entry.key, entry.integrity, { - metadata: entry.metadata, - size: entry.size, - time: entry.time, - }) - stats.totalEntries++ - } catch (err) { - if (err.code === 'ENOENT') { - stats.rejectedEntries++ - stats.missingContent++ - } else { - throw err - } - } - } -} - -function cleanTmp (cache, opts) { - opts.log.silly('verify', 'cleaning tmp directory') - return rm(path.join(cache, 'tmp'), { recursive: true, force: true }) -} - -async function writeVerifile (cache, opts) { - const verifile = path.join(cache, '_lastverified') - opts.log.silly('verify', 'writing verifile to ' + verifile) - return writeFile(verifile, `${Date.now()}`) -} - -module.exports.lastRun = lastRun - -async function lastRun (cache) { - const data = await readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' }) - return new Date(+data) -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/package.json b/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/package.json deleted file mode 100644 index 6e6219158ed759..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/cacache/package.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "name": "cacache", - "version": "18.0.4", - "cache-version": { - "content": "2", - "index": "5" - }, - "description": "Fast, fault-tolerant, cross-platform, disk-based, data-agnostic, content-addressable cache.", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "test": "tap", - "snap": "tap", - "coverage": "tap", - "test-docker": "docker run -it --rm --name pacotest -v \"$PWD\":/tmp -w /tmp node:latest npm test", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "npmclilint": "npmcli-lint", - "lintfix": "npm run lint -- --fix", - "postsnap": "npm run lintfix --", - "postlint": "template-oss-check", - "posttest": "npm run lint", - "template-oss-apply": "template-oss-apply --force" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/cacache.git" - }, - "keywords": [ - "cache", - "caching", - "content-addressable", - "sri", - "sri hash", - "subresource integrity", - "cache", - "storage", - "store", - "file store", - "filesystem", - "disk cache", - "disk storage" - ], - "license": "ISC", - "dependencies": { - "@npmcli/fs": "^3.1.0", - "fs-minipass": "^3.0.0", - "glob": "^10.2.2", - "lru-cache": "^10.0.1", - "minipass": "^7.0.3", - "minipass-collect": "^2.0.1", - "minipass-flush": "^1.0.5", - "minipass-pipeline": "^1.2.4", - "p-map": "^4.0.0", - "ssri": "^10.0.0", - "tar": "^6.1.11", - "unique-filename": "^3.0.0" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.0.0" - }, - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "windowsCI": false, - "version": "4.22.0", - "publish": "true" - }, - "author": "GitHub Inc.", - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/LICENSE b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/LICENSE deleted file mode 100644 index 1808eb2844231c..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/LICENSE +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright 2017-2022 (c) npm, Inc. - -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted, provided that the -above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE COPYRIGHT HOLDER DISCLAIMS -ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/entry.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/entry.js deleted file mode 100644 index bfcfacbcc95e18..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/entry.js +++ /dev/null @@ -1,471 +0,0 @@ -const { Request, Response } = require('minipass-fetch') -const { Minipass } = require('minipass') -const MinipassFlush = require('minipass-flush') -const cacache = require('cacache') -const url = require('url') - -const CachingMinipassPipeline = require('../pipeline.js') -const CachePolicy = require('./policy.js') -const cacheKey = require('./key.js') -const remote = require('../remote.js') - -const hasOwnProperty = (obj, prop) => Object.prototype.hasOwnProperty.call(obj, prop) - -// allow list for request headers that will be written to the cache index -// note: we will also store any request headers -// that are named in a response's vary header -const KEEP_REQUEST_HEADERS = [ - 'accept-charset', - 'accept-encoding', - 'accept-language', - 'accept', - 'cache-control', -] - -// allow list for response headers that will be written to the cache index -// note: we must not store the real response's age header, or when we load -// a cache policy based on the metadata it will think the cached response -// is always stale -const KEEP_RESPONSE_HEADERS = [ - 'cache-control', - 'content-encoding', - 'content-language', - 'content-type', - 'date', - 'etag', - 'expires', - 'last-modified', - 'link', - 'location', - 'pragma', - 'vary', -] - -// return an object containing all metadata to be written to the index -const getMetadata = (request, response, options) => { - const metadata = { - time: Date.now(), - url: request.url, - reqHeaders: {}, - resHeaders: {}, - - // options on which we must match the request and vary the response - options: { - compress: options.compress != null ? options.compress : request.compress, - }, - } - - // only save the status if it's not a 200 or 304 - if (response.status !== 200 && response.status !== 304) { - metadata.status = response.status - } - - for (const name of KEEP_REQUEST_HEADERS) { - if (request.headers.has(name)) { - metadata.reqHeaders[name] = request.headers.get(name) - } - } - - // if the request's host header differs from the host in the url - // we need to keep it, otherwise it's just noise and we ignore it - const host = request.headers.get('host') - const parsedUrl = new url.URL(request.url) - if (host && parsedUrl.host !== host) { - metadata.reqHeaders.host = host - } - - // if the response has a vary header, make sure - // we store the relevant request headers too - if (response.headers.has('vary')) { - const vary = response.headers.get('vary') - // a vary of "*" means every header causes a different response. - // in that scenario, we do not include any additional headers - // as the freshness check will always fail anyway and we don't - // want to bloat the cache indexes - if (vary !== '*') { - // copy any other request headers that will vary the response - const varyHeaders = vary.trim().toLowerCase().split(/\s*,\s*/) - for (const name of varyHeaders) { - if (request.headers.has(name)) { - metadata.reqHeaders[name] = request.headers.get(name) - } - } - } - } - - for (const name of KEEP_RESPONSE_HEADERS) { - if (response.headers.has(name)) { - metadata.resHeaders[name] = response.headers.get(name) - } - } - - for (const name of options.cacheAdditionalHeaders) { - if (response.headers.has(name)) { - metadata.resHeaders[name] = response.headers.get(name) - } - } - - return metadata -} - -// symbols used to hide objects that may be lazily evaluated in a getter -const _request = Symbol('request') -const _response = Symbol('response') -const _policy = Symbol('policy') - -class CacheEntry { - constructor ({ entry, request, response, options }) { - if (entry) { - this.key = entry.key - this.entry = entry - // previous versions of this module didn't write an explicit timestamp in - // the metadata, so fall back to the entry's timestamp. we can't use the - // entry timestamp to determine staleness because cacache will update it - // when it verifies its data - this.entry.metadata.time = this.entry.metadata.time || this.entry.time - } else { - this.key = cacheKey(request) - } - - this.options = options - - // these properties are behind getters that lazily evaluate - this[_request] = request - this[_response] = response - this[_policy] = null - } - - // returns a CacheEntry instance that satisfies the given request - // or undefined if no existing entry satisfies - static async find (request, options) { - try { - // compacts the index and returns an array of unique entries - var matches = await cacache.index.compact(options.cachePath, cacheKey(request), (A, B) => { - const entryA = new CacheEntry({ entry: A, options }) - const entryB = new CacheEntry({ entry: B, options }) - return entryA.policy.satisfies(entryB.request) - }, { - validateEntry: (entry) => { - // clean out entries with a buggy content-encoding value - if (entry.metadata && - entry.metadata.resHeaders && - entry.metadata.resHeaders['content-encoding'] === null) { - return false - } - - // if an integrity is null, it needs to have a status specified - if (entry.integrity === null) { - return !!(entry.metadata && entry.metadata.status) - } - - return true - }, - }) - } catch (err) { - // if the compact request fails, ignore the error and return - return - } - - // a cache mode of 'reload' means to behave as though we have no cache - // on the way to the network. return undefined to allow cacheFetch to - // create a brand new request no matter what. - if (options.cache === 'reload') { - return - } - - // find the specific entry that satisfies the request - let match - for (const entry of matches) { - const _entry = new CacheEntry({ - entry, - options, - }) - - if (_entry.policy.satisfies(request)) { - match = _entry - break - } - } - - return match - } - - // if the user made a PUT/POST/PATCH then we invalidate our - // cache for the same url by deleting the index entirely - static async invalidate (request, options) { - const key = cacheKey(request) - try { - await cacache.rm.entry(options.cachePath, key, { removeFully: true }) - } catch (err) { - // ignore errors - } - } - - get request () { - if (!this[_request]) { - this[_request] = new Request(this.entry.metadata.url, { - method: 'GET', - headers: this.entry.metadata.reqHeaders, - ...this.entry.metadata.options, - }) - } - - return this[_request] - } - - get response () { - if (!this[_response]) { - this[_response] = new Response(null, { - url: this.entry.metadata.url, - counter: this.options.counter, - status: this.entry.metadata.status || 200, - headers: { - ...this.entry.metadata.resHeaders, - 'content-length': this.entry.size, - }, - }) - } - - return this[_response] - } - - get policy () { - if (!this[_policy]) { - this[_policy] = new CachePolicy({ - entry: this.entry, - request: this.request, - response: this.response, - options: this.options, - }) - } - - return this[_policy] - } - - // wraps the response in a pipeline that stores the data - // in the cache while the user consumes it - async store (status) { - // if we got a status other than 200, 301, or 308, - // or the CachePolicy forbid storage, append the - // cache status header and return it untouched - if ( - this.request.method !== 'GET' || - ![200, 301, 308].includes(this.response.status) || - !this.policy.storable() - ) { - this.response.headers.set('x-local-cache-status', 'skip') - return this.response - } - - const size = this.response.headers.get('content-length') - const cacheOpts = { - algorithms: this.options.algorithms, - metadata: getMetadata(this.request, this.response, this.options), - size, - integrity: this.options.integrity, - integrityEmitter: this.response.body.hasIntegrityEmitter && this.response.body, - } - - let body = null - // we only set a body if the status is a 200, redirects are - // stored as metadata only - if (this.response.status === 200) { - let cacheWriteResolve, cacheWriteReject - const cacheWritePromise = new Promise((resolve, reject) => { - cacheWriteResolve = resolve - cacheWriteReject = reject - }).catch((err) => { - body.emit('error', err) - }) - - body = new CachingMinipassPipeline({ events: ['integrity', 'size'] }, new MinipassFlush({ - flush () { - return cacheWritePromise - }, - })) - // this is always true since if we aren't reusing the one from the remote fetch, we - // are using the one from cacache - body.hasIntegrityEmitter = true - - const onResume = () => { - const tee = new Minipass() - const cacheStream = cacache.put.stream(this.options.cachePath, this.key, cacheOpts) - // re-emit the integrity and size events on our new response body so they can be reused - cacheStream.on('integrity', i => body.emit('integrity', i)) - cacheStream.on('size', s => body.emit('size', s)) - // stick a flag on here so downstream users will know if they can expect integrity events - tee.pipe(cacheStream) - // TODO if the cache write fails, log a warning but return the response anyway - // eslint-disable-next-line promise/catch-or-return - cacheStream.promise().then(cacheWriteResolve, cacheWriteReject) - body.unshift(tee) - body.unshift(this.response.body) - } - - body.once('resume', onResume) - body.once('end', () => body.removeListener('resume', onResume)) - } else { - await cacache.index.insert(this.options.cachePath, this.key, null, cacheOpts) - } - - // note: we do not set the x-local-cache-hash header because we do not know - // the hash value until after the write to the cache completes, which doesn't - // happen until after the response has been sent and it's too late to write - // the header anyway - this.response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath)) - this.response.headers.set('x-local-cache-key', encodeURIComponent(this.key)) - this.response.headers.set('x-local-cache-mode', 'stream') - this.response.headers.set('x-local-cache-status', status) - this.response.headers.set('x-local-cache-time', new Date().toISOString()) - const newResponse = new Response(body, { - url: this.response.url, - status: this.response.status, - headers: this.response.headers, - counter: this.options.counter, - }) - return newResponse - } - - // use the cached data to create a response and return it - async respond (method, options, status) { - let response - if (method === 'HEAD' || [301, 308].includes(this.response.status)) { - // if the request is a HEAD, or the response is a redirect, - // then the metadata in the entry already includes everything - // we need to build a response - response = this.response - } else { - // we're responding with a full cached response, so create a body - // that reads from cacache and attach it to a new Response - const body = new Minipass() - const headers = { ...this.policy.responseHeaders() } - - const onResume = () => { - const cacheStream = cacache.get.stream.byDigest( - this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize } - ) - cacheStream.on('error', async (err) => { - cacheStream.pause() - if (err.code === 'EINTEGRITY') { - await cacache.rm.content( - this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize } - ) - } - if (err.code === 'ENOENT' || err.code === 'EINTEGRITY') { - await CacheEntry.invalidate(this.request, this.options) - } - body.emit('error', err) - cacheStream.resume() - }) - // emit the integrity and size events based on our metadata so we're consistent - body.emit('integrity', this.entry.integrity) - body.emit('size', Number(headers['content-length'])) - cacheStream.pipe(body) - } - - body.once('resume', onResume) - body.once('end', () => body.removeListener('resume', onResume)) - response = new Response(body, { - url: this.entry.metadata.url, - counter: options.counter, - status: 200, - headers, - }) - } - - response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath)) - response.headers.set('x-local-cache-hash', encodeURIComponent(this.entry.integrity)) - response.headers.set('x-local-cache-key', encodeURIComponent(this.key)) - response.headers.set('x-local-cache-mode', 'stream') - response.headers.set('x-local-cache-status', status) - response.headers.set('x-local-cache-time', new Date(this.entry.metadata.time).toUTCString()) - return response - } - - // use the provided request along with this cache entry to - // revalidate the stored response. returns a response, either - // from the cache or from the update - async revalidate (request, options) { - const revalidateRequest = new Request(request, { - headers: this.policy.revalidationHeaders(request), - }) - - try { - // NOTE: be sure to remove the headers property from the - // user supplied options, since we have already defined - // them on the new request object. if they're still in the - // options then those will overwrite the ones from the policy - var response = await remote(revalidateRequest, { - ...options, - headers: undefined, - }) - } catch (err) { - // if the network fetch fails, return the stale - // cached response unless it has a cache-control - // of 'must-revalidate' - if (!this.policy.mustRevalidate) { - return this.respond(request.method, options, 'stale') - } - - throw err - } - - if (this.policy.revalidated(revalidateRequest, response)) { - // we got a 304, write a new index to the cache and respond from cache - const metadata = getMetadata(request, response, options) - // 304 responses do not include headers that are specific to the response data - // since they do not include a body, so we copy values for headers that were - // in the old cache entry to the new one, if the new metadata does not already - // include that header - for (const name of KEEP_RESPONSE_HEADERS) { - if ( - !hasOwnProperty(metadata.resHeaders, name) && - hasOwnProperty(this.entry.metadata.resHeaders, name) - ) { - metadata.resHeaders[name] = this.entry.metadata.resHeaders[name] - } - } - - for (const name of options.cacheAdditionalHeaders) { - const inMeta = hasOwnProperty(metadata.resHeaders, name) - const inEntry = hasOwnProperty(this.entry.metadata.resHeaders, name) - const inPolicy = hasOwnProperty(this.policy.response.headers, name) - - // if the header is in the existing entry, but it is not in the metadata - // then we need to write it to the metadata as this will refresh the on-disk cache - if (!inMeta && inEntry) { - metadata.resHeaders[name] = this.entry.metadata.resHeaders[name] - } - // if the header is in the metadata, but not in the policy, then we need to set - // it in the policy so that it's included in the immediate response. future - // responses will load a new cache entry, so we don't need to change that - if (!inPolicy && inMeta) { - this.policy.response.headers[name] = metadata.resHeaders[name] - } - } - - try { - await cacache.index.insert(options.cachePath, this.key, this.entry.integrity, { - size: this.entry.size, - metadata, - }) - } catch (err) { - // if updating the cache index fails, we ignore it and - // respond anyway - } - return this.respond(request.method, options, 'revalidated') - } - - // if we got a modified response, create a new entry based on it - const newEntry = new CacheEntry({ - request, - response, - options, - }) - - // respond with the new entry while writing it to the cache - return newEntry.store('updated') - } -} - -module.exports = CacheEntry diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/errors.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/errors.js deleted file mode 100644 index 67a66573bebe66..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/errors.js +++ /dev/null @@ -1,11 +0,0 @@ -class NotCachedError extends Error { - constructor (url) { - /* eslint-disable-next-line max-len */ - super(`request to ${url} failed: cache mode is 'only-if-cached' but no cached response is available.`) - this.code = 'ENOTCACHED' - } -} - -module.exports = { - NotCachedError, -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/index.js deleted file mode 100644 index 0de49d23fb9336..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/index.js +++ /dev/null @@ -1,49 +0,0 @@ -const { NotCachedError } = require('./errors.js') -const CacheEntry = require('./entry.js') -const remote = require('../remote.js') - -// do whatever is necessary to get a Response and return it -const cacheFetch = async (request, options) => { - // try to find a cached entry that satisfies this request - const entry = await CacheEntry.find(request, options) - if (!entry) { - // no cached result, if the cache mode is 'only-if-cached' that's a failure - if (options.cache === 'only-if-cached') { - throw new NotCachedError(request.url) - } - - // otherwise, we make a request, store it and return it - const response = await remote(request, options) - const newEntry = new CacheEntry({ request, response, options }) - return newEntry.store('miss') - } - - // we have a cached response that satisfies this request, however if the cache - // mode is 'no-cache' then we send the revalidation request no matter what - if (options.cache === 'no-cache') { - return entry.revalidate(request, options) - } - - // if the cached entry is not stale, or if the cache mode is 'force-cache' or - // 'only-if-cached' we can respond with the cached entry. set the status - // based on the result of needsRevalidation and respond - const _needsRevalidation = entry.policy.needsRevalidation(request) - if (options.cache === 'force-cache' || - options.cache === 'only-if-cached' || - !_needsRevalidation) { - return entry.respond(request.method, options, _needsRevalidation ? 'stale' : 'hit') - } - - // if we got here, the cache entry is stale so revalidate it - return entry.revalidate(request, options) -} - -cacheFetch.invalidate = async (request, options) => { - if (!options.cachePath) { - return - } - - return CacheEntry.invalidate(request, options) -} - -module.exports = cacheFetch diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/key.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/key.js deleted file mode 100644 index f7684d562b7fae..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/key.js +++ /dev/null @@ -1,17 +0,0 @@ -const { URL, format } = require('url') - -// options passed to url.format() when generating a key -const formatOptions = { - auth: false, - fragment: false, - search: true, - unicode: false, -} - -// returns a string to be used as the cache key for the Request -const cacheKey = (request) => { - const parsed = new URL(request.url) - return `make-fetch-happen:request-cache:${format(parsed, formatOptions)}` -} - -module.exports = cacheKey diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/policy.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/policy.js deleted file mode 100644 index ada3c8600dae92..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/cache/policy.js +++ /dev/null @@ -1,161 +0,0 @@ -const CacheSemantics = require('http-cache-semantics') -const Negotiator = require('negotiator') -const ssri = require('ssri') - -// options passed to http-cache-semantics constructor -const policyOptions = { - shared: false, - ignoreCargoCult: true, -} - -// a fake empty response, used when only testing the -// request for storability -const emptyResponse = { status: 200, headers: {} } - -// returns a plain object representation of the Request -const requestObject = (request) => { - const _obj = { - method: request.method, - url: request.url, - headers: {}, - compress: request.compress, - } - - request.headers.forEach((value, key) => { - _obj.headers[key] = value - }) - - return _obj -} - -// returns a plain object representation of the Response -const responseObject = (response) => { - const _obj = { - status: response.status, - headers: {}, - } - - response.headers.forEach((value, key) => { - _obj.headers[key] = value - }) - - return _obj -} - -class CachePolicy { - constructor ({ entry, request, response, options }) { - this.entry = entry - this.request = requestObject(request) - this.response = responseObject(response) - this.options = options - this.policy = new CacheSemantics(this.request, this.response, policyOptions) - - if (this.entry) { - // if we have an entry, copy the timestamp to the _responseTime - // this is necessary because the CacheSemantics constructor forces - // the value to Date.now() which means a policy created from a - // cache entry is likely to always identify itself as stale - this.policy._responseTime = this.entry.metadata.time - } - } - - // static method to quickly determine if a request alone is storable - static storable (request, options) { - // no cachePath means no caching - if (!options.cachePath) { - return false - } - - // user explicitly asked not to cache - if (options.cache === 'no-store') { - return false - } - - // we only cache GET and HEAD requests - if (!['GET', 'HEAD'].includes(request.method)) { - return false - } - - // otherwise, let http-cache-semantics make the decision - // based on the request's headers - const policy = new CacheSemantics(requestObject(request), emptyResponse, policyOptions) - return policy.storable() - } - - // returns true if the policy satisfies the request - satisfies (request) { - const _req = requestObject(request) - if (this.request.headers.host !== _req.headers.host) { - return false - } - - if (this.request.compress !== _req.compress) { - return false - } - - const negotiatorA = new Negotiator(this.request) - const negotiatorB = new Negotiator(_req) - - if (JSON.stringify(negotiatorA.mediaTypes()) !== JSON.stringify(negotiatorB.mediaTypes())) { - return false - } - - if (JSON.stringify(negotiatorA.languages()) !== JSON.stringify(negotiatorB.languages())) { - return false - } - - if (JSON.stringify(negotiatorA.encodings()) !== JSON.stringify(negotiatorB.encodings())) { - return false - } - - if (this.options.integrity) { - return ssri.parse(this.options.integrity).match(this.entry.integrity) - } - - return true - } - - // returns true if the request and response allow caching - storable () { - return this.policy.storable() - } - - // NOTE: this is a hack to avoid parsing the cache-control - // header ourselves, it returns true if the response's - // cache-control contains must-revalidate - get mustRevalidate () { - return !!this.policy._rescc['must-revalidate'] - } - - // returns true if the cached response requires revalidation - // for the given request - needsRevalidation (request) { - const _req = requestObject(request) - // force method to GET because we only cache GETs - // but can serve a HEAD from a cached GET - _req.method = 'GET' - return !this.policy.satisfiesWithoutRevalidation(_req) - } - - responseHeaders () { - return this.policy.responseHeaders() - } - - // returns a new object containing the appropriate headers - // to send a revalidation request - revalidationHeaders (request) { - const _req = requestObject(request) - return this.policy.revalidationHeaders(_req) - } - - // returns true if the request/response was revalidated - // successfully. returns false if a new response was received - revalidated (request, response) { - const _req = requestObject(request) - const _res = responseObject(response) - const policy = this.policy.revalidatedPolicy(_req, _res) - return !policy.modified - } -} - -module.exports = CachePolicy diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/fetch.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/fetch.js deleted file mode 100644 index 233ba67e165502..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/fetch.js +++ /dev/null @@ -1,118 +0,0 @@ -'use strict' - -const { FetchError, Request, isRedirect } = require('minipass-fetch') -const url = require('url') - -const CachePolicy = require('./cache/policy.js') -const cache = require('./cache/index.js') -const remote = require('./remote.js') - -// given a Request, a Response and user options -// return true if the response is a redirect that -// can be followed. we throw errors that will result -// in the fetch being rejected if the redirect is -// possible but invalid for some reason -const canFollowRedirect = (request, response, options) => { - if (!isRedirect(response.status)) { - return false - } - - if (options.redirect === 'manual') { - return false - } - - if (options.redirect === 'error') { - throw new FetchError(`redirect mode is set to error: ${request.url}`, - 'no-redirect', { code: 'ENOREDIRECT' }) - } - - if (!response.headers.has('location')) { - throw new FetchError(`redirect location header missing for: ${request.url}`, - 'no-location', { code: 'EINVALIDREDIRECT' }) - } - - if (request.counter >= request.follow) { - throw new FetchError(`maximum redirect reached at: ${request.url}`, - 'max-redirect', { code: 'EMAXREDIRECT' }) - } - - return true -} - -// given a Request, a Response, and the user's options return an object -// with a new Request and a new options object that will be used for -// following the redirect -const getRedirect = (request, response, options) => { - const _opts = { ...options } - const location = response.headers.get('location') - const redirectUrl = new url.URL(location, /^https?:/.test(location) ? undefined : request.url) - // Comment below is used under the following license: - /** - * @license - * Copyright (c) 2010-2012 Mikeal Rogers - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language - * governing permissions and limitations under the License. - */ - - // Remove authorization if changing hostnames (but not if just - // changing ports or protocols). This matches the behavior of request: - // https://github.com/request/request/blob/b12a6245/lib/redirect.js#L134-L138 - if (new url.URL(request.url).hostname !== redirectUrl.hostname) { - request.headers.delete('authorization') - request.headers.delete('cookie') - } - - // for POST request with 301/302 response, or any request with 303 response, - // use GET when following redirect - if ( - response.status === 303 || - (request.method === 'POST' && [301, 302].includes(response.status)) - ) { - _opts.method = 'GET' - _opts.body = null - request.headers.delete('content-length') - } - - _opts.headers = {} - request.headers.forEach((value, key) => { - _opts.headers[key] = value - }) - - _opts.counter = ++request.counter - const redirectReq = new Request(url.format(redirectUrl), _opts) - return { - request: redirectReq, - options: _opts, - } -} - -const fetch = async (request, options) => { - const response = CachePolicy.storable(request, options) - ? await cache(request, options) - : await remote(request, options) - - // if the request wasn't a GET or HEAD, and the response - // status is between 200 and 399 inclusive, invalidate the - // request url - if (!['GET', 'HEAD'].includes(request.method) && - response.status >= 200 && - response.status <= 399) { - await cache.invalidate(request, options) - } - - if (!canFollowRedirect(request, response, options)) { - return response - } - - const redirect = getRedirect(request, response, options) - return fetch(redirect.request, redirect.options) -} - -module.exports = fetch diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/index.js deleted file mode 100644 index 2f12e8e1b61131..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/index.js +++ /dev/null @@ -1,41 +0,0 @@ -const { FetchError, Headers, Request, Response } = require('minipass-fetch') - -const configureOptions = require('./options.js') -const fetch = require('./fetch.js') - -const makeFetchHappen = (url, opts) => { - const options = configureOptions(opts) - - const request = new Request(url, options) - return fetch(request, options) -} - -makeFetchHappen.defaults = (defaultUrl, defaultOptions = {}, wrappedFetch = makeFetchHappen) => { - if (typeof defaultUrl === 'object') { - defaultOptions = defaultUrl - defaultUrl = null - } - - const defaultedFetch = (url, options = {}) => { - const finalUrl = url || defaultUrl - const finalOptions = { - ...defaultOptions, - ...options, - headers: { - ...defaultOptions.headers, - ...options.headers, - }, - } - return wrappedFetch(finalUrl, finalOptions) - } - - defaultedFetch.defaults = (defaultUrl1, defaultOptions1 = {}) => - makeFetchHappen.defaults(defaultUrl1, defaultOptions1, defaultedFetch) - return defaultedFetch -} - -module.exports = makeFetchHappen -module.exports.FetchError = FetchError -module.exports.Headers = Headers -module.exports.Request = Request -module.exports.Response = Response diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/options.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/options.js deleted file mode 100644 index f77511279f831d..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/options.js +++ /dev/null @@ -1,54 +0,0 @@ -const dns = require('dns') - -const conditionalHeaders = [ - 'if-modified-since', - 'if-none-match', - 'if-unmodified-since', - 'if-match', - 'if-range', -] - -const configureOptions = (opts) => { - const { strictSSL, ...options } = { ...opts } - options.method = options.method ? options.method.toUpperCase() : 'GET' - options.rejectUnauthorized = strictSSL !== false - - if (!options.retry) { - options.retry = { retries: 0 } - } else if (typeof options.retry === 'string') { - const retries = parseInt(options.retry, 10) - if (isFinite(retries)) { - options.retry = { retries } - } else { - options.retry = { retries: 0 } - } - } else if (typeof options.retry === 'number') { - options.retry = { retries: options.retry } - } else { - options.retry = { retries: 0, ...options.retry } - } - - options.dns = { ttl: 5 * 60 * 1000, lookup: dns.lookup, ...options.dns } - - options.cache = options.cache || 'default' - if (options.cache === 'default') { - const hasConditionalHeader = Object.keys(options.headers || {}).some((name) => { - return conditionalHeaders.includes(name.toLowerCase()) - }) - if (hasConditionalHeader) { - options.cache = 'no-store' - } - } - - options.cacheAdditionalHeaders = options.cacheAdditionalHeaders || [] - - // cacheManager is deprecated, but if it's set and - // cachePath is not we should copy it to the new field - if (options.cacheManager && !options.cachePath) { - options.cachePath = options.cacheManager - } - - return options -} - -module.exports = configureOptions diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/pipeline.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/pipeline.js deleted file mode 100644 index b1d221b2d0ce31..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/pipeline.js +++ /dev/null @@ -1,41 +0,0 @@ -'use strict' - -const MinipassPipeline = require('minipass-pipeline') - -class CachingMinipassPipeline extends MinipassPipeline { - #events = [] - #data = new Map() - - constructor (opts, ...streams) { - // CRITICAL: do NOT pass the streams to the call to super(), this will start - // the flow of data and potentially cause the events we need to catch to emit - // before we've finished our own setup. instead we call super() with no args, - // finish our setup, and then push the streams into ourselves to start the - // data flow - super() - this.#events = opts.events - - /* istanbul ignore next - coverage disabled because this is pointless to test here */ - if (streams.length) { - this.push(...streams) - } - } - - on (event, handler) { - if (this.#events.includes(event) && this.#data.has(event)) { - return handler(...this.#data.get(event)) - } - - return super.on(event, handler) - } - - emit (event, ...data) { - if (this.#events.includes(event)) { - this.#data.set(event, data) - } - - return super.emit(event, ...data) - } -} - -module.exports = CachingMinipassPipeline diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/remote.js b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/remote.js deleted file mode 100644 index 8554564074de6e..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/lib/remote.js +++ /dev/null @@ -1,131 +0,0 @@ -const { Minipass } = require('minipass') -const fetch = require('minipass-fetch') -const promiseRetry = require('promise-retry') -const ssri = require('ssri') -const { log } = require('proc-log') - -const CachingMinipassPipeline = require('./pipeline.js') -const { getAgent } = require('@npmcli/agent') -const pkg = require('../package.json') - -const USER_AGENT = `${pkg.name}/${pkg.version} (+https://npm.im/${pkg.name})` - -const RETRY_ERRORS = [ - 'ECONNRESET', // remote socket closed on us - 'ECONNREFUSED', // remote host refused to open connection - 'EADDRINUSE', // failed to bind to a local port (proxy?) - 'ETIMEDOUT', // someone in the transaction is WAY TOO SLOW - // from @npmcli/agent - 'ECONNECTIONTIMEOUT', - 'EIDLETIMEOUT', - 'ERESPONSETIMEOUT', - 'ETRANSFERTIMEOUT', - // Known codes we do NOT retry on: - // ENOTFOUND (getaddrinfo failure. Either bad hostname, or offline) - // EINVALIDPROXY // invalid protocol from @npmcli/agent - // EINVALIDRESPONSE // invalid status code from @npmcli/agent -] - -const RETRY_TYPES = [ - 'request-timeout', -] - -// make a request directly to the remote source, -// retrying certain classes of errors as well as -// following redirects (through the cache if necessary) -// and verifying response integrity -const remoteFetch = (request, options) => { - const agent = getAgent(request.url, options) - if (!request.headers.has('connection')) { - request.headers.set('connection', agent ? 'keep-alive' : 'close') - } - - if (!request.headers.has('user-agent')) { - request.headers.set('user-agent', USER_AGENT) - } - - // keep our own options since we're overriding the agent - // and the redirect mode - const _opts = { - ...options, - agent, - redirect: 'manual', - } - - return promiseRetry(async (retryHandler, attemptNum) => { - const req = new fetch.Request(request, _opts) - try { - let res = await fetch(req, _opts) - if (_opts.integrity && res.status === 200) { - // we got a 200 response and the user has specified an expected - // integrity value, so wrap the response in an ssri stream to verify it - const integrityStream = ssri.integrityStream({ - algorithms: _opts.algorithms, - integrity: _opts.integrity, - size: _opts.size, - }) - const pipeline = new CachingMinipassPipeline({ - events: ['integrity', 'size'], - }, res.body, integrityStream) - // we also propagate the integrity and size events out to the pipeline so we can use - // this new response body as an integrityEmitter for cacache - integrityStream.on('integrity', i => pipeline.emit('integrity', i)) - integrityStream.on('size', s => pipeline.emit('size', s)) - res = new fetch.Response(pipeline, res) - // set an explicit flag so we know if our response body will emit integrity and size - res.body.hasIntegrityEmitter = true - } - - res.headers.set('x-fetch-attempts', attemptNum) - - // do not retry POST requests, or requests with a streaming body - // do retry requests with a 408, 420, 429 or 500+ status in the response - const isStream = Minipass.isStream(req.body) - const isRetriable = req.method !== 'POST' && - !isStream && - ([408, 420, 429].includes(res.status) || res.status >= 500) - - if (isRetriable) { - if (typeof options.onRetry === 'function') { - options.onRetry(res) - } - - /* eslint-disable-next-line max-len */ - log.http('fetch', `${req.method} ${req.url} attempt ${attemptNum} failed with ${res.status}`) - return retryHandler(res) - } - - return res - } catch (err) { - const code = (err.code === 'EPROMISERETRY') - ? err.retried.code - : err.code - - // err.retried will be the thing that was thrown from above - // if it's a response, we just got a bad status code and we - // can re-throw to allow the retry - const isRetryError = err.retried instanceof fetch.Response || - (RETRY_ERRORS.includes(code) && RETRY_TYPES.includes(err.type)) - - if (req.method === 'POST' || isRetryError) { - throw err - } - - if (typeof options.onRetry === 'function') { - options.onRetry(err) - } - - log.http('fetch', `${req.method} ${req.url} attempt ${attemptNum} failed with ${err.code}`) - return retryHandler(err) - } - }, options.retry).catch((err) => { - // don't reject for http errors, just return them - if (err.status >= 400 && err.type !== 'system') { - return err - } - - throw err - }) -} - -module.exports = remoteFetch diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/package.json b/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/package.json deleted file mode 100644 index 7adb4d1e7f9719..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/make-fetch-happen/package.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "name": "make-fetch-happen", - "version": "13.0.1", - "description": "Opinionated, caching, retrying fetch client", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "test": "tap", - "posttest": "npm run lint", - "eslint": "eslint", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "lintfix": "npm run lint -- --fix", - "postlint": "template-oss-check", - "snap": "tap", - "template-oss-apply": "template-oss-apply --force" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/make-fetch-happen.git" - }, - "keywords": [ - "http", - "request", - "fetch", - "mean girls", - "caching", - "cache", - "subresource integrity" - ], - "author": "GitHub Inc.", - "license": "ISC", - "dependencies": { - "@npmcli/agent": "^2.0.0", - "cacache": "^18.0.0", - "http-cache-semantics": "^4.1.1", - "is-lambda": "^1.0.1", - "minipass": "^7.0.2", - "minipass-fetch": "^3.0.0", - "minipass-flush": "^1.0.5", - "minipass-pipeline": "^1.2.4", - "negotiator": "^0.6.3", - "proc-log": "^4.2.0", - "promise-retry": "^2.0.1", - "ssri": "^10.0.0" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.21.4", - "nock": "^13.2.4", - "safe-buffer": "^5.2.1", - "standard-version": "^9.3.2", - "tap": "^16.0.0" - }, - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "tap": { - "color": 1, - "files": "test/*.js", - "check-coverage": true, - "timeout": 60, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.21.4", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/LICENSE b/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/LICENSE deleted file mode 100644 index 3c3410cdc12ee3..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/LICENSE +++ /dev/null @@ -1,28 +0,0 @@ -The MIT License (MIT) - -Copyright (c) Isaac Z. Schlueter and Contributors -Copyright (c) 2016 David Frank - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - ---- - -Note: This is a derivative work based on "node-fetch" by David Frank, -modified and distributed under the terms of the MIT license above. -https://github.com/bitinn/node-fetch diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/abort-error.js b/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/abort-error.js deleted file mode 100644 index b18f643269e375..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/abort-error.js +++ /dev/null @@ -1,17 +0,0 @@ -'use strict' -class AbortError extends Error { - constructor (message) { - super(message) - this.code = 'FETCH_ABORTED' - this.type = 'aborted' - Error.captureStackTrace(this, this.constructor) - } - - get name () { - return 'AbortError' - } - - // don't allow name to be overridden, but don't throw either - set name (s) {} -} -module.exports = AbortError diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/blob.js b/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/blob.js deleted file mode 100644 index 121b1730102e72..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/blob.js +++ /dev/null @@ -1,97 +0,0 @@ -'use strict' -const { Minipass } = require('minipass') -const TYPE = Symbol('type') -const BUFFER = Symbol('buffer') - -class Blob { - constructor (blobParts, options) { - this[TYPE] = '' - - const buffers = [] - let size = 0 - - if (blobParts) { - const a = blobParts - const length = Number(a.length) - for (let i = 0; i < length; i++) { - const element = a[i] - const buffer = element instanceof Buffer ? element - : ArrayBuffer.isView(element) - ? Buffer.from(element.buffer, element.byteOffset, element.byteLength) - : element instanceof ArrayBuffer ? Buffer.from(element) - : element instanceof Blob ? element[BUFFER] - : typeof element === 'string' ? Buffer.from(element) - : Buffer.from(String(element)) - size += buffer.length - buffers.push(buffer) - } - } - - this[BUFFER] = Buffer.concat(buffers, size) - - const type = options && options.type !== undefined - && String(options.type).toLowerCase() - if (type && !/[^\u0020-\u007E]/.test(type)) { - this[TYPE] = type - } - } - - get size () { - return this[BUFFER].length - } - - get type () { - return this[TYPE] - } - - text () { - return Promise.resolve(this[BUFFER].toString()) - } - - arrayBuffer () { - const buf = this[BUFFER] - const off = buf.byteOffset - const len = buf.byteLength - const ab = buf.buffer.slice(off, off + len) - return Promise.resolve(ab) - } - - stream () { - return new Minipass().end(this[BUFFER]) - } - - slice (start, end, type) { - const size = this.size - const relativeStart = start === undefined ? 0 - : start < 0 ? Math.max(size + start, 0) - : Math.min(start, size) - const relativeEnd = end === undefined ? size - : end < 0 ? Math.max(size + end, 0) - : Math.min(end, size) - const span = Math.max(relativeEnd - relativeStart, 0) - - const buffer = this[BUFFER] - const slicedBuffer = buffer.slice( - relativeStart, - relativeStart + span - ) - const blob = new Blob([], { type }) - blob[BUFFER] = slicedBuffer - return blob - } - - get [Symbol.toStringTag] () { - return 'Blob' - } - - static get BUFFER () { - return BUFFER - } -} - -Object.defineProperties(Blob.prototype, { - size: { enumerable: true }, - type: { enumerable: true }, -}) - -module.exports = Blob diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/body.js b/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/body.js deleted file mode 100644 index 62286bd1de0d91..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/body.js +++ /dev/null @@ -1,350 +0,0 @@ -'use strict' -const { Minipass } = require('minipass') -const MinipassSized = require('minipass-sized') - -const Blob = require('./blob.js') -const { BUFFER } = Blob -const FetchError = require('./fetch-error.js') - -// optional dependency on 'encoding' -let convert -try { - convert = require('encoding').convert -} catch (e) { - // defer error until textConverted is called -} - -const INTERNALS = Symbol('Body internals') -const CONSUME_BODY = Symbol('consumeBody') - -class Body { - constructor (bodyArg, options = {}) { - const { size = 0, timeout = 0 } = options - const body = bodyArg === undefined || bodyArg === null ? null - : isURLSearchParams(bodyArg) ? Buffer.from(bodyArg.toString()) - : isBlob(bodyArg) ? bodyArg - : Buffer.isBuffer(bodyArg) ? bodyArg - : Object.prototype.toString.call(bodyArg) === '[object ArrayBuffer]' - ? Buffer.from(bodyArg) - : ArrayBuffer.isView(bodyArg) - ? Buffer.from(bodyArg.buffer, bodyArg.byteOffset, bodyArg.byteLength) - : Minipass.isStream(bodyArg) ? bodyArg - : Buffer.from(String(bodyArg)) - - this[INTERNALS] = { - body, - disturbed: false, - error: null, - } - - this.size = size - this.timeout = timeout - - if (Minipass.isStream(body)) { - body.on('error', er => { - const error = er.name === 'AbortError' ? er - : new FetchError(`Invalid response while trying to fetch ${ - this.url}: ${er.message}`, 'system', er) - this[INTERNALS].error = error - }) - } - } - - get body () { - return this[INTERNALS].body - } - - get bodyUsed () { - return this[INTERNALS].disturbed - } - - arrayBuffer () { - return this[CONSUME_BODY]().then(buf => - buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength)) - } - - blob () { - const ct = this.headers && this.headers.get('content-type') || '' - return this[CONSUME_BODY]().then(buf => Object.assign( - new Blob([], { type: ct.toLowerCase() }), - { [BUFFER]: buf } - )) - } - - async json () { - const buf = await this[CONSUME_BODY]() - try { - return JSON.parse(buf.toString()) - } catch (er) { - throw new FetchError( - `invalid json response body at ${this.url} reason: ${er.message}`, - 'invalid-json' - ) - } - } - - text () { - return this[CONSUME_BODY]().then(buf => buf.toString()) - } - - buffer () { - return this[CONSUME_BODY]() - } - - textConverted () { - return this[CONSUME_BODY]().then(buf => convertBody(buf, this.headers)) - } - - [CONSUME_BODY] () { - if (this[INTERNALS].disturbed) { - return Promise.reject(new TypeError(`body used already for: ${ - this.url}`)) - } - - this[INTERNALS].disturbed = true - - if (this[INTERNALS].error) { - return Promise.reject(this[INTERNALS].error) - } - - // body is null - if (this.body === null) { - return Promise.resolve(Buffer.alloc(0)) - } - - if (Buffer.isBuffer(this.body)) { - return Promise.resolve(this.body) - } - - const upstream = isBlob(this.body) ? this.body.stream() : this.body - - /* istanbul ignore if: should never happen */ - if (!Minipass.isStream(upstream)) { - return Promise.resolve(Buffer.alloc(0)) - } - - const stream = this.size && upstream instanceof MinipassSized ? upstream - : !this.size && upstream instanceof Minipass && - !(upstream instanceof MinipassSized) ? upstream - : this.size ? new MinipassSized({ size: this.size }) - : new Minipass() - - // allow timeout on slow response body, but only if the stream is still writable. this - // makes the timeout center on the socket stream from lib/index.js rather than the - // intermediary minipass stream we create to receive the data - const resTimeout = this.timeout && stream.writable ? setTimeout(() => { - stream.emit('error', new FetchError( - `Response timeout while trying to fetch ${ - this.url} (over ${this.timeout}ms)`, 'body-timeout')) - }, this.timeout) : null - - // do not keep the process open just for this timeout, even - // though we expect it'll get cleared eventually. - if (resTimeout && resTimeout.unref) { - resTimeout.unref() - } - - // do the pipe in the promise, because the pipe() can send too much - // data through right away and upset the MP Sized object - return new Promise((resolve) => { - // if the stream is some other kind of stream, then pipe through a MP - // so we can collect it more easily. - if (stream !== upstream) { - upstream.on('error', er => stream.emit('error', er)) - upstream.pipe(stream) - } - resolve() - }).then(() => stream.concat()).then(buf => { - clearTimeout(resTimeout) - return buf - }).catch(er => { - clearTimeout(resTimeout) - // request was aborted, reject with this Error - if (er.name === 'AbortError' || er.name === 'FetchError') { - throw er - } else if (er.name === 'RangeError') { - throw new FetchError(`Could not create Buffer from response body for ${ - this.url}: ${er.message}`, 'system', er) - } else { - // other errors, such as incorrect content-encoding or content-length - throw new FetchError(`Invalid response body while trying to fetch ${ - this.url}: ${er.message}`, 'system', er) - } - }) - } - - static clone (instance) { - if (instance.bodyUsed) { - throw new Error('cannot clone body after it is used') - } - - const body = instance.body - - // check that body is a stream and not form-data object - // NB: can't clone the form-data object without having it as a dependency - if (Minipass.isStream(body) && typeof body.getBoundary !== 'function') { - // create a dedicated tee stream so that we don't lose data - // potentially sitting in the body stream's buffer by writing it - // immediately to p1 and not having it for p2. - const tee = new Minipass() - const p1 = new Minipass() - const p2 = new Minipass() - tee.on('error', er => { - p1.emit('error', er) - p2.emit('error', er) - }) - body.on('error', er => tee.emit('error', er)) - tee.pipe(p1) - tee.pipe(p2) - body.pipe(tee) - // set instance body to one fork, return the other - instance[INTERNALS].body = p1 - return p2 - } else { - return instance.body - } - } - - static extractContentType (body) { - return body === null || body === undefined ? null - : typeof body === 'string' ? 'text/plain;charset=UTF-8' - : isURLSearchParams(body) - ? 'application/x-www-form-urlencoded;charset=UTF-8' - : isBlob(body) ? body.type || null - : Buffer.isBuffer(body) ? null - : Object.prototype.toString.call(body) === '[object ArrayBuffer]' ? null - : ArrayBuffer.isView(body) ? null - : typeof body.getBoundary === 'function' - ? `multipart/form-data;boundary=${body.getBoundary()}` - : Minipass.isStream(body) ? null - : 'text/plain;charset=UTF-8' - } - - static getTotalBytes (instance) { - const { body } = instance - return (body === null || body === undefined) ? 0 - : isBlob(body) ? body.size - : Buffer.isBuffer(body) ? body.length - : body && typeof body.getLengthSync === 'function' && ( - // detect form data input from form-data module - body._lengthRetrievers && - /* istanbul ignore next */ body._lengthRetrievers.length === 0 || // 1.x - body.hasKnownLength && body.hasKnownLength()) // 2.x - ? body.getLengthSync() - : null - } - - static writeToStream (dest, instance) { - const { body } = instance - - if (body === null || body === undefined) { - dest.end() - } else if (Buffer.isBuffer(body) || typeof body === 'string') { - dest.end(body) - } else { - // body is stream or blob - const stream = isBlob(body) ? body.stream() : body - stream.on('error', er => dest.emit('error', er)).pipe(dest) - } - - return dest - } -} - -Object.defineProperties(Body.prototype, { - body: { enumerable: true }, - bodyUsed: { enumerable: true }, - arrayBuffer: { enumerable: true }, - blob: { enumerable: true }, - json: { enumerable: true }, - text: { enumerable: true }, -}) - -const isURLSearchParams = obj => - // Duck-typing as a necessary condition. - (typeof obj !== 'object' || - typeof obj.append !== 'function' || - typeof obj.delete !== 'function' || - typeof obj.get !== 'function' || - typeof obj.getAll !== 'function' || - typeof obj.has !== 'function' || - typeof obj.set !== 'function') ? false - // Brand-checking and more duck-typing as optional condition. - : obj.constructor.name === 'URLSearchParams' || - Object.prototype.toString.call(obj) === '[object URLSearchParams]' || - typeof obj.sort === 'function' - -const isBlob = obj => - typeof obj === 'object' && - typeof obj.arrayBuffer === 'function' && - typeof obj.type === 'string' && - typeof obj.stream === 'function' && - typeof obj.constructor === 'function' && - typeof obj.constructor.name === 'string' && - /^(Blob|File)$/.test(obj.constructor.name) && - /^(Blob|File)$/.test(obj[Symbol.toStringTag]) - -const convertBody = (buffer, headers) => { - /* istanbul ignore if */ - if (typeof convert !== 'function') { - throw new Error('The package `encoding` must be installed to use the textConverted() function') - } - - const ct = headers && headers.get('content-type') - let charset = 'utf-8' - let res - - // header - if (ct) { - res = /charset=([^;]*)/i.exec(ct) - } - - // no charset in content type, peek at response body for at most 1024 bytes - const str = buffer.slice(0, 1024).toString() - - // html5 - if (!res && str) { - res = / this.expect - ? 'max-size' : type - this.message = message - Error.captureStackTrace(this, this.constructor) - } - - get name () { - return 'FetchError' - } - - // don't allow name to be overwritten - set name (n) {} - - get [Symbol.toStringTag] () { - return 'FetchError' - } -} -module.exports = FetchError diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/headers.js b/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/headers.js deleted file mode 100644 index dd6e854d5ba399..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/headers.js +++ /dev/null @@ -1,267 +0,0 @@ -'use strict' -const invalidTokenRegex = /[^^_`a-zA-Z\-0-9!#$%&'*+.|~]/ -const invalidHeaderCharRegex = /[^\t\x20-\x7e\x80-\xff]/ - -const validateName = name => { - name = `${name}` - if (invalidTokenRegex.test(name) || name === '') { - throw new TypeError(`${name} is not a legal HTTP header name`) - } -} - -const validateValue = value => { - value = `${value}` - if (invalidHeaderCharRegex.test(value)) { - throw new TypeError(`${value} is not a legal HTTP header value`) - } -} - -const find = (map, name) => { - name = name.toLowerCase() - for (const key in map) { - if (key.toLowerCase() === name) { - return key - } - } - return undefined -} - -const MAP = Symbol('map') -class Headers { - constructor (init = undefined) { - this[MAP] = Object.create(null) - if (init instanceof Headers) { - const rawHeaders = init.raw() - const headerNames = Object.keys(rawHeaders) - for (const headerName of headerNames) { - for (const value of rawHeaders[headerName]) { - this.append(headerName, value) - } - } - return - } - - // no-op - if (init === undefined || init === null) { - return - } - - if (typeof init === 'object') { - const method = init[Symbol.iterator] - if (method !== null && method !== undefined) { - if (typeof method !== 'function') { - throw new TypeError('Header pairs must be iterable') - } - - // sequence> - // Note: per spec we have to first exhaust the lists then process them - const pairs = [] - for (const pair of init) { - if (typeof pair !== 'object' || - typeof pair[Symbol.iterator] !== 'function') { - throw new TypeError('Each header pair must be iterable') - } - const arrPair = Array.from(pair) - if (arrPair.length !== 2) { - throw new TypeError('Each header pair must be a name/value tuple') - } - pairs.push(arrPair) - } - - for (const pair of pairs) { - this.append(pair[0], pair[1]) - } - } else { - // record - for (const key of Object.keys(init)) { - this.append(key, init[key]) - } - } - } else { - throw new TypeError('Provided initializer must be an object') - } - } - - get (name) { - name = `${name}` - validateName(name) - const key = find(this[MAP], name) - if (key === undefined) { - return null - } - - return this[MAP][key].join(', ') - } - - forEach (callback, thisArg = undefined) { - let pairs = getHeaders(this) - for (let i = 0; i < pairs.length; i++) { - const [name, value] = pairs[i] - callback.call(thisArg, value, name, this) - // refresh in case the callback added more headers - pairs = getHeaders(this) - } - } - - set (name, value) { - name = `${name}` - value = `${value}` - validateName(name) - validateValue(value) - const key = find(this[MAP], name) - this[MAP][key !== undefined ? key : name] = [value] - } - - append (name, value) { - name = `${name}` - value = `${value}` - validateName(name) - validateValue(value) - const key = find(this[MAP], name) - if (key !== undefined) { - this[MAP][key].push(value) - } else { - this[MAP][name] = [value] - } - } - - has (name) { - name = `${name}` - validateName(name) - return find(this[MAP], name) !== undefined - } - - delete (name) { - name = `${name}` - validateName(name) - const key = find(this[MAP], name) - if (key !== undefined) { - delete this[MAP][key] - } - } - - raw () { - return this[MAP] - } - - keys () { - return new HeadersIterator(this, 'key') - } - - values () { - return new HeadersIterator(this, 'value') - } - - [Symbol.iterator] () { - return new HeadersIterator(this, 'key+value') - } - - entries () { - return new HeadersIterator(this, 'key+value') - } - - get [Symbol.toStringTag] () { - return 'Headers' - } - - static exportNodeCompatibleHeaders (headers) { - const obj = Object.assign(Object.create(null), headers[MAP]) - - // http.request() only supports string as Host header. This hack makes - // specifying custom Host header possible. - const hostHeaderKey = find(headers[MAP], 'Host') - if (hostHeaderKey !== undefined) { - obj[hostHeaderKey] = obj[hostHeaderKey][0] - } - - return obj - } - - static createHeadersLenient (obj) { - const headers = new Headers() - for (const name of Object.keys(obj)) { - if (invalidTokenRegex.test(name)) { - continue - } - - if (Array.isArray(obj[name])) { - for (const val of obj[name]) { - if (invalidHeaderCharRegex.test(val)) { - continue - } - - if (headers[MAP][name] === undefined) { - headers[MAP][name] = [val] - } else { - headers[MAP][name].push(val) - } - } - } else if (!invalidHeaderCharRegex.test(obj[name])) { - headers[MAP][name] = [obj[name]] - } - } - return headers - } -} - -Object.defineProperties(Headers.prototype, { - get: { enumerable: true }, - forEach: { enumerable: true }, - set: { enumerable: true }, - append: { enumerable: true }, - has: { enumerable: true }, - delete: { enumerable: true }, - keys: { enumerable: true }, - values: { enumerable: true }, - entries: { enumerable: true }, -}) - -const getHeaders = (headers, kind = 'key+value') => - Object.keys(headers[MAP]).sort().map( - kind === 'key' ? k => k.toLowerCase() - : kind === 'value' ? k => headers[MAP][k].join(', ') - : k => [k.toLowerCase(), headers[MAP][k].join(', ')] - ) - -const INTERNAL = Symbol('internal') - -class HeadersIterator { - constructor (target, kind) { - this[INTERNAL] = { - target, - kind, - index: 0, - } - } - - get [Symbol.toStringTag] () { - return 'HeadersIterator' - } - - next () { - /* istanbul ignore if: should be impossible */ - if (!this || Object.getPrototypeOf(this) !== HeadersIterator.prototype) { - throw new TypeError('Value of `this` is not a HeadersIterator') - } - - const { target, kind, index } = this[INTERNAL] - const values = getHeaders(target, kind) - const len = values.length - if (index >= len) { - return { - value: undefined, - done: true, - } - } - - this[INTERNAL].index++ - - return { value: values[index], done: false } - } -} - -// manually extend because 'extends' requires a ctor -Object.setPrototypeOf(HeadersIterator.prototype, - Object.getPrototypeOf(Object.getPrototypeOf([][Symbol.iterator]()))) - -module.exports = Headers diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/index.js deleted file mode 100644 index da402161670e65..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/index.js +++ /dev/null @@ -1,377 +0,0 @@ -'use strict' -const { URL } = require('url') -const http = require('http') -const https = require('https') -const zlib = require('minizlib') -const { Minipass } = require('minipass') - -const Body = require('./body.js') -const { writeToStream, getTotalBytes } = Body -const Response = require('./response.js') -const Headers = require('./headers.js') -const { createHeadersLenient } = Headers -const Request = require('./request.js') -const { getNodeRequestOptions } = Request -const FetchError = require('./fetch-error.js') -const AbortError = require('./abort-error.js') - -// XXX this should really be split up and unit-ized for easier testing -// and better DRY implementation of data/http request aborting -const fetch = async (url, opts) => { - if (/^data:/.test(url)) { - const request = new Request(url, opts) - // delay 1 promise tick so that the consumer can abort right away - return Promise.resolve().then(() => new Promise((resolve, reject) => { - let type, data - try { - const { pathname, search } = new URL(url) - const split = pathname.split(',') - if (split.length < 2) { - throw new Error('invalid data: URI') - } - const mime = split.shift() - const base64 = /;base64$/.test(mime) - type = base64 ? mime.slice(0, -1 * ';base64'.length) : mime - const rawData = decodeURIComponent(split.join(',') + search) - data = base64 ? Buffer.from(rawData, 'base64') : Buffer.from(rawData) - } catch (er) { - return reject(new FetchError(`[${request.method}] ${ - request.url} invalid URL, ${er.message}`, 'system', er)) - } - - const { signal } = request - if (signal && signal.aborted) { - return reject(new AbortError('The user aborted a request.')) - } - - const headers = { 'Content-Length': data.length } - if (type) { - headers['Content-Type'] = type - } - return resolve(new Response(data, { headers })) - })) - } - - return new Promise((resolve, reject) => { - // build request object - const request = new Request(url, opts) - let options - try { - options = getNodeRequestOptions(request) - } catch (er) { - return reject(er) - } - - const send = (options.protocol === 'https:' ? https : http).request - const { signal } = request - let response = null - const abort = () => { - const error = new AbortError('The user aborted a request.') - reject(error) - if (Minipass.isStream(request.body) && - typeof request.body.destroy === 'function') { - request.body.destroy(error) - } - if (response && response.body) { - response.body.emit('error', error) - } - } - - if (signal && signal.aborted) { - return abort() - } - - const abortAndFinalize = () => { - abort() - finalize() - } - - const finalize = () => { - req.abort() - if (signal) { - signal.removeEventListener('abort', abortAndFinalize) - } - clearTimeout(reqTimeout) - } - - // send request - const req = send(options) - - if (signal) { - signal.addEventListener('abort', abortAndFinalize) - } - - let reqTimeout = null - if (request.timeout) { - req.once('socket', () => { - reqTimeout = setTimeout(() => { - reject(new FetchError(`network timeout at: ${ - request.url}`, 'request-timeout')) - finalize() - }, request.timeout) - }) - } - - req.on('error', er => { - // if a 'response' event is emitted before the 'error' event, then by the - // time this handler is run it's too late to reject the Promise for the - // response. instead, we forward the error event to the response stream - // so that the error will surface to the user when they try to consume - // the body. this is done as a side effect of aborting the request except - // for in windows, where we must forward the event manually, otherwise - // there is no longer a ref'd socket attached to the request and the - // stream never ends so the event loop runs out of work and the process - // exits without warning. - // coverage skipped here due to the difficulty in testing - // istanbul ignore next - if (req.res) { - req.res.emit('error', er) - } - reject(new FetchError(`request to ${request.url} failed, reason: ${ - er.message}`, 'system', er)) - finalize() - }) - - req.on('response', res => { - clearTimeout(reqTimeout) - - const headers = createHeadersLenient(res.headers) - - // HTTP fetch step 5 - if (fetch.isRedirect(res.statusCode)) { - // HTTP fetch step 5.2 - const location = headers.get('Location') - - // HTTP fetch step 5.3 - let locationURL = null - try { - locationURL = location === null ? null : new URL(location, request.url).toString() - } catch { - // error here can only be invalid URL in Location: header - // do not throw when options.redirect == manual - // let the user extract the errorneous redirect URL - if (request.redirect !== 'manual') { - /* eslint-disable-next-line max-len */ - reject(new FetchError(`uri requested responds with an invalid redirect URL: ${location}`, 'invalid-redirect')) - finalize() - return - } - } - - // HTTP fetch step 5.5 - if (request.redirect === 'error') { - reject(new FetchError('uri requested responds with a redirect, ' + - `redirect mode is set to error: ${request.url}`, 'no-redirect')) - finalize() - return - } else if (request.redirect === 'manual') { - // node-fetch-specific step: make manual redirect a bit easier to - // use by setting the Location header value to the resolved URL. - if (locationURL !== null) { - // handle corrupted header - try { - headers.set('Location', locationURL) - } catch (err) { - /* istanbul ignore next: nodejs server prevent invalid - response headers, we can't test this through normal - request */ - reject(err) - } - } - } else if (request.redirect === 'follow' && locationURL !== null) { - // HTTP-redirect fetch step 5 - if (request.counter >= request.follow) { - reject(new FetchError(`maximum redirect reached at: ${ - request.url}`, 'max-redirect')) - finalize() - return - } - - // HTTP-redirect fetch step 9 - if (res.statusCode !== 303 && - request.body && - getTotalBytes(request) === null) { - reject(new FetchError( - 'Cannot follow redirect with body being a readable stream', - 'unsupported-redirect' - )) - finalize() - return - } - - // Update host due to redirection - request.headers.set('host', (new URL(locationURL)).host) - - // HTTP-redirect fetch step 6 (counter increment) - // Create a new Request object. - const requestOpts = { - headers: new Headers(request.headers), - follow: request.follow, - counter: request.counter + 1, - agent: request.agent, - compress: request.compress, - method: request.method, - body: request.body, - signal: request.signal, - timeout: request.timeout, - } - - // if the redirect is to a new hostname, strip the authorization and cookie headers - const parsedOriginal = new URL(request.url) - const parsedRedirect = new URL(locationURL) - if (parsedOriginal.hostname !== parsedRedirect.hostname) { - requestOpts.headers.delete('authorization') - requestOpts.headers.delete('cookie') - } - - // HTTP-redirect fetch step 11 - if (res.statusCode === 303 || ( - (res.statusCode === 301 || res.statusCode === 302) && - request.method === 'POST' - )) { - requestOpts.method = 'GET' - requestOpts.body = undefined - requestOpts.headers.delete('content-length') - } - - // HTTP-redirect fetch step 15 - resolve(fetch(new Request(locationURL, requestOpts))) - finalize() - return - } - } // end if(isRedirect) - - // prepare response - res.once('end', () => - signal && signal.removeEventListener('abort', abortAndFinalize)) - - const body = new Minipass() - // if an error occurs, either on the response stream itself, on one of the - // decoder streams, or a response length timeout from the Body class, we - // forward the error through to our internal body stream. If we see an - // error event on that, we call finalize to abort the request and ensure - // we don't leave a socket believing a request is in flight. - // this is difficult to test, so lacks specific coverage. - body.on('error', finalize) - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - res.on('error', /* istanbul ignore next */ er => body.emit('error', er)) - res.on('data', (chunk) => body.write(chunk)) - res.on('end', () => body.end()) - - const responseOptions = { - url: request.url, - status: res.statusCode, - statusText: res.statusMessage, - headers: headers, - size: request.size, - timeout: request.timeout, - counter: request.counter, - trailer: new Promise(resolveTrailer => - res.on('end', () => resolveTrailer(createHeadersLenient(res.trailers)))), - } - - // HTTP-network fetch step 12.1.1.3 - const codings = headers.get('Content-Encoding') - - // HTTP-network fetch step 12.1.1.4: handle content codings - - // in following scenarios we ignore compression support - // 1. compression support is disabled - // 2. HEAD request - // 3. no Content-Encoding header - // 4. no content response (204) - // 5. content not modified response (304) - if (!request.compress || - request.method === 'HEAD' || - codings === null || - res.statusCode === 204 || - res.statusCode === 304) { - response = new Response(body, responseOptions) - resolve(response) - return - } - - // Be less strict when decoding compressed responses, since sometimes - // servers send slightly invalid responses that are still accepted - // by common browsers. - // Always using Z_SYNC_FLUSH is what cURL does. - const zlibOptions = { - flush: zlib.constants.Z_SYNC_FLUSH, - finishFlush: zlib.constants.Z_SYNC_FLUSH, - } - - // for gzip - if (codings === 'gzip' || codings === 'x-gzip') { - const unzip = new zlib.Gunzip(zlibOptions) - response = new Response( - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - body.on('error', /* istanbul ignore next */ er => unzip.emit('error', er)).pipe(unzip), - responseOptions - ) - resolve(response) - return - } - - // for deflate - if (codings === 'deflate' || codings === 'x-deflate') { - // handle the infamous raw deflate response from old servers - // a hack for old IIS and Apache servers - const raw = res.pipe(new Minipass()) - raw.once('data', chunk => { - // see http://stackoverflow.com/questions/37519828 - const decoder = (chunk[0] & 0x0F) === 0x08 - ? new zlib.Inflate() - : new zlib.InflateRaw() - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - body.on('error', /* istanbul ignore next */ er => decoder.emit('error', er)).pipe(decoder) - response = new Response(decoder, responseOptions) - resolve(response) - }) - return - } - - // for br - if (codings === 'br') { - // ignoring coverage so tests don't have to fake support (or lack of) for brotli - // istanbul ignore next - try { - var decoder = new zlib.BrotliDecompress() - } catch (err) { - reject(err) - finalize() - return - } - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - body.on('error', /* istanbul ignore next */ er => decoder.emit('error', er)).pipe(decoder) - response = new Response(decoder, responseOptions) - resolve(response) - return - } - - // otherwise, use response as-is - response = new Response(body, responseOptions) - resolve(response) - }) - - writeToStream(req, request) - }) -} - -module.exports = fetch - -fetch.isRedirect = code => - code === 301 || - code === 302 || - code === 303 || - code === 307 || - code === 308 - -fetch.Headers = Headers -fetch.Request = Request -fetch.Response = Response -fetch.FetchError = FetchError -fetch.AbortError = AbortError diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/request.js b/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/request.js deleted file mode 100644 index 054439e6699107..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/request.js +++ /dev/null @@ -1,282 +0,0 @@ -'use strict' -const { URL } = require('url') -const { Minipass } = require('minipass') -const Headers = require('./headers.js') -const { exportNodeCompatibleHeaders } = Headers -const Body = require('./body.js') -const { clone, extractContentType, getTotalBytes } = Body - -const version = require('../package.json').version -const defaultUserAgent = - `minipass-fetch/${version} (+https://github.com/isaacs/minipass-fetch)` - -const INTERNALS = Symbol('Request internals') - -const isRequest = input => - typeof input === 'object' && typeof input[INTERNALS] === 'object' - -const isAbortSignal = signal => { - const proto = ( - signal - && typeof signal === 'object' - && Object.getPrototypeOf(signal) - ) - return !!(proto && proto.constructor.name === 'AbortSignal') -} - -class Request extends Body { - constructor (input, init = {}) { - const parsedURL = isRequest(input) ? new URL(input.url) - : input && input.href ? new URL(input.href) - : new URL(`${input}`) - - if (isRequest(input)) { - init = { ...input[INTERNALS], ...init } - } else if (!input || typeof input === 'string') { - input = {} - } - - const method = (init.method || input.method || 'GET').toUpperCase() - const isGETHEAD = method === 'GET' || method === 'HEAD' - - if ((init.body !== null && init.body !== undefined || - isRequest(input) && input.body !== null) && isGETHEAD) { - throw new TypeError('Request with GET/HEAD method cannot have body') - } - - const inputBody = init.body !== null && init.body !== undefined ? init.body - : isRequest(input) && input.body !== null ? clone(input) - : null - - super(inputBody, { - timeout: init.timeout || input.timeout || 0, - size: init.size || input.size || 0, - }) - - const headers = new Headers(init.headers || input.headers || {}) - - if (inputBody !== null && inputBody !== undefined && - !headers.has('Content-Type')) { - const contentType = extractContentType(inputBody) - if (contentType) { - headers.append('Content-Type', contentType) - } - } - - const signal = 'signal' in init ? init.signal - : null - - if (signal !== null && signal !== undefined && !isAbortSignal(signal)) { - throw new TypeError('Expected signal must be an instanceof AbortSignal') - } - - // TLS specific options that are handled by node - const { - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized = process.env.NODE_TLS_REJECT_UNAUTHORIZED !== '0', - secureOptions, - secureProtocol, - servername, - sessionIdContext, - } = init - - this[INTERNALS] = { - method, - redirect: init.redirect || input.redirect || 'follow', - headers, - parsedURL, - signal, - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized, - secureOptions, - secureProtocol, - servername, - sessionIdContext, - } - - // node-fetch-only options - this.follow = init.follow !== undefined ? init.follow - : input.follow !== undefined ? input.follow - : 20 - this.compress = init.compress !== undefined ? init.compress - : input.compress !== undefined ? input.compress - : true - this.counter = init.counter || input.counter || 0 - this.agent = init.agent || input.agent - } - - get method () { - return this[INTERNALS].method - } - - get url () { - return this[INTERNALS].parsedURL.toString() - } - - get headers () { - return this[INTERNALS].headers - } - - get redirect () { - return this[INTERNALS].redirect - } - - get signal () { - return this[INTERNALS].signal - } - - clone () { - return new Request(this) - } - - get [Symbol.toStringTag] () { - return 'Request' - } - - static getNodeRequestOptions (request) { - const parsedURL = request[INTERNALS].parsedURL - const headers = new Headers(request[INTERNALS].headers) - - // fetch step 1.3 - if (!headers.has('Accept')) { - headers.set('Accept', '*/*') - } - - // Basic fetch - if (!/^https?:$/.test(parsedURL.protocol)) { - throw new TypeError('Only HTTP(S) protocols are supported') - } - - if (request.signal && - Minipass.isStream(request.body) && - typeof request.body.destroy !== 'function') { - throw new Error( - 'Cancellation of streamed requests with AbortSignal is not supported') - } - - // HTTP-network-or-cache fetch steps 2.4-2.7 - const contentLengthValue = - (request.body === null || request.body === undefined) && - /^(POST|PUT)$/i.test(request.method) ? '0' - : request.body !== null && request.body !== undefined - ? getTotalBytes(request) - : null - - if (contentLengthValue) { - headers.set('Content-Length', contentLengthValue + '') - } - - // HTTP-network-or-cache fetch step 2.11 - if (!headers.has('User-Agent')) { - headers.set('User-Agent', defaultUserAgent) - } - - // HTTP-network-or-cache fetch step 2.15 - if (request.compress && !headers.has('Accept-Encoding')) { - headers.set('Accept-Encoding', 'gzip,deflate') - } - - const agent = typeof request.agent === 'function' - ? request.agent(parsedURL) - : request.agent - - if (!headers.has('Connection') && !agent) { - headers.set('Connection', 'close') - } - - // TLS specific options that are handled by node - const { - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized, - secureOptions, - secureProtocol, - servername, - sessionIdContext, - } = request[INTERNALS] - - // HTTP-network fetch step 4.2 - // chunked encoding is handled by Node.js - - // we cannot spread parsedURL directly, so we have to read each property one-by-one - // and map them to the equivalent https?.request() method options - const urlProps = { - auth: parsedURL.username || parsedURL.password - ? `${parsedURL.username}:${parsedURL.password}` - : '', - host: parsedURL.host, - hostname: parsedURL.hostname, - path: `${parsedURL.pathname}${parsedURL.search}`, - port: parsedURL.port, - protocol: parsedURL.protocol, - } - - return { - ...urlProps, - method: request.method, - headers: exportNodeCompatibleHeaders(headers), - agent, - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized, - secureOptions, - secureProtocol, - servername, - sessionIdContext, - timeout: request.timeout, - } - } -} - -module.exports = Request - -Object.defineProperties(Request.prototype, { - method: { enumerable: true }, - url: { enumerable: true }, - headers: { enumerable: true }, - redirect: { enumerable: true }, - clone: { enumerable: true }, - signal: { enumerable: true }, -}) diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/response.js b/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/response.js deleted file mode 100644 index 54cb52db3594a7..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/lib/response.js +++ /dev/null @@ -1,90 +0,0 @@ -'use strict' -const http = require('http') -const { STATUS_CODES } = http - -const Headers = require('./headers.js') -const Body = require('./body.js') -const { clone, extractContentType } = Body - -const INTERNALS = Symbol('Response internals') - -class Response extends Body { - constructor (body = null, opts = {}) { - super(body, opts) - - const status = opts.status || 200 - const headers = new Headers(opts.headers) - - if (body !== null && body !== undefined && !headers.has('Content-Type')) { - const contentType = extractContentType(body) - if (contentType) { - headers.append('Content-Type', contentType) - } - } - - this[INTERNALS] = { - url: opts.url, - status, - statusText: opts.statusText || STATUS_CODES[status], - headers, - counter: opts.counter, - trailer: Promise.resolve(opts.trailer || new Headers()), - } - } - - get trailer () { - return this[INTERNALS].trailer - } - - get url () { - return this[INTERNALS].url || '' - } - - get status () { - return this[INTERNALS].status - } - - get ok () { - return this[INTERNALS].status >= 200 && this[INTERNALS].status < 300 - } - - get redirected () { - return this[INTERNALS].counter > 0 - } - - get statusText () { - return this[INTERNALS].statusText - } - - get headers () { - return this[INTERNALS].headers - } - - clone () { - return new Response(clone(this), { - url: this.url, - status: this.status, - statusText: this.statusText, - headers: this.headers, - ok: this.ok, - redirected: this.redirected, - trailer: this.trailer, - }) - } - - get [Symbol.toStringTag] () { - return 'Response' - } -} - -module.exports = Response - -Object.defineProperties(Response.prototype, { - url: { enumerable: true }, - status: { enumerable: true }, - ok: { enumerable: true }, - redirected: { enumerable: true }, - statusText: { enumerable: true }, - headers: { enumerable: true }, - clone: { enumerable: true }, -}) diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/package.json b/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/package.json deleted file mode 100644 index d491a7fba126d0..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/minipass-fetch/package.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "name": "minipass-fetch", - "version": "3.0.5", - "description": "An implementation of window.fetch in Node.js using Minipass streams", - "license": "MIT", - "main": "lib/index.js", - "scripts": { - "test:tls-fixtures": "./test/fixtures/tls/setup.sh", - "test": "tap", - "snap": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "lintfix": "npm run lint -- --fix", - "posttest": "npm run lint", - "template-oss-apply": "template-oss-apply --force" - }, - "tap": { - "coverage-map": "map.js", - "check-coverage": true, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "@ungap/url-search-params": "^0.2.2", - "abort-controller": "^3.0.0", - "abortcontroller-polyfill": "~1.7.3", - "encoding": "^0.1.13", - "form-data": "^4.0.0", - "nock": "^13.2.4", - "parted": "^0.1.1", - "string-to-arraybuffer": "^1.0.2", - "tap": "^16.0.0" - }, - "dependencies": { - "minipass": "^7.0.3", - "minipass-sized": "^1.0.3", - "minizlib": "^2.1.2" - }, - "optionalDependencies": { - "encoding": "^0.1.13" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/minipass-fetch.git" - }, - "keywords": [ - "fetch", - "minipass", - "node-fetch", - "window.fetch" - ], - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "author": "GitHub Inc.", - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.22.0", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/proc-log/LICENSE b/deps/npm/node_modules/@sigstore/sign/node_modules/proc-log/LICENSE deleted file mode 100644 index 83837797202b70..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/proc-log/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -The ISC License - -Copyright (c) GitHub, Inc. - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR -IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/proc-log/lib/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/proc-log/lib/index.js deleted file mode 100644 index 86d90861078dab..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/proc-log/lib/index.js +++ /dev/null @@ -1,153 +0,0 @@ -const META = Symbol('proc-log.meta') -module.exports = { - META: META, - output: { - LEVELS: [ - 'standard', - 'error', - 'buffer', - 'flush', - ], - KEYS: { - standard: 'standard', - error: 'error', - buffer: 'buffer', - flush: 'flush', - }, - standard: function (...args) { - return process.emit('output', 'standard', ...args) - }, - error: function (...args) { - return process.emit('output', 'error', ...args) - }, - buffer: function (...args) { - return process.emit('output', 'buffer', ...args) - }, - flush: function (...args) { - return process.emit('output', 'flush', ...args) - }, - }, - log: { - LEVELS: [ - 'notice', - 'error', - 'warn', - 'info', - 'verbose', - 'http', - 'silly', - 'timing', - 'pause', - 'resume', - ], - KEYS: { - notice: 'notice', - error: 'error', - warn: 'warn', - info: 'info', - verbose: 'verbose', - http: 'http', - silly: 'silly', - timing: 'timing', - pause: 'pause', - resume: 'resume', - }, - error: function (...args) { - return process.emit('log', 'error', ...args) - }, - notice: function (...args) { - return process.emit('log', 'notice', ...args) - }, - warn: function (...args) { - return process.emit('log', 'warn', ...args) - }, - info: function (...args) { - return process.emit('log', 'info', ...args) - }, - verbose: function (...args) { - return process.emit('log', 'verbose', ...args) - }, - http: function (...args) { - return process.emit('log', 'http', ...args) - }, - silly: function (...args) { - return process.emit('log', 'silly', ...args) - }, - timing: function (...args) { - return process.emit('log', 'timing', ...args) - }, - pause: function () { - return process.emit('log', 'pause') - }, - resume: function () { - return process.emit('log', 'resume') - }, - }, - time: { - LEVELS: [ - 'start', - 'end', - ], - KEYS: { - start: 'start', - end: 'end', - }, - start: function (name, fn) { - process.emit('time', 'start', name) - function end () { - return process.emit('time', 'end', name) - } - if (typeof fn === 'function') { - const res = fn() - if (res && res.finally) { - return res.finally(end) - } - end() - return res - } - return end - }, - end: function (name) { - return process.emit('time', 'end', name) - }, - }, - input: { - LEVELS: [ - 'start', - 'end', - 'read', - ], - KEYS: { - start: 'start', - end: 'end', - read: 'read', - }, - start: function (fn) { - process.emit('input', 'start') - function end () { - return process.emit('input', 'end') - } - if (typeof fn === 'function') { - const res = fn() - if (res && res.finally) { - return res.finally(end) - } - end() - return res - } - return end - }, - end: function () { - return process.emit('input', 'end') - }, - read: function (...args) { - let resolve, reject - const promise = new Promise((_resolve, _reject) => { - resolve = _resolve - reject = _reject - }) - process.emit('input', 'read', resolve, reject, ...args) - return promise - }, - }, -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/proc-log/package.json b/deps/npm/node_modules/@sigstore/sign/node_modules/proc-log/package.json deleted file mode 100644 index 4ab89102ecc9b5..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/proc-log/package.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "name": "proc-log", - "version": "4.2.0", - "files": [ - "bin/", - "lib/" - ], - "main": "lib/index.js", - "description": "just emit 'log' events on the process object", - "repository": { - "type": "git", - "url": "https://github.com/npm/proc-log.git" - }, - "author": "GitHub Inc.", - "license": "ISC", - "scripts": { - "test": "tap", - "snap": "tap", - "posttest": "npm run lint", - "postsnap": "eslint index.js test/*.js --fix", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "lintfix": "npm run lint -- --fix", - "template-oss-apply": "template-oss-apply --force" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.21.3", - "tap": "^16.0.1" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.21.3", - "publish": true - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/ssri/LICENSE.md b/deps/npm/node_modules/@sigstore/sign/node_modules/ssri/LICENSE.md deleted file mode 100644 index e335388869f50f..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/ssri/LICENSE.md +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright 2021 (c) npm, Inc. - -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted, provided that the -above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE COPYRIGHT HOLDER DISCLAIMS -ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/ssri/lib/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/ssri/lib/index.js deleted file mode 100644 index 7d749ed480fb98..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/ssri/lib/index.js +++ /dev/null @@ -1,580 +0,0 @@ -'use strict' - -const crypto = require('crypto') -const { Minipass } = require('minipass') - -const SPEC_ALGORITHMS = ['sha512', 'sha384', 'sha256'] -const DEFAULT_ALGORITHMS = ['sha512'] - -// TODO: this should really be a hardcoded list of algorithms we support, -// rather than [a-z0-9]. -const BASE64_REGEX = /^[a-z0-9+/]+(?:=?=?)$/i -const SRI_REGEX = /^([a-z0-9]+)-([^?]+)([?\S*]*)$/ -const STRICT_SRI_REGEX = /^([a-z0-9]+)-([A-Za-z0-9+/=]{44,88})(\?[\x21-\x7E]*)?$/ -const VCHAR_REGEX = /^[\x21-\x7E]+$/ - -const getOptString = options => options?.length ? `?${options.join('?')}` : '' - -class IntegrityStream extends Minipass { - #emittedIntegrity - #emittedSize - #emittedVerified - - constructor (opts) { - super() - this.size = 0 - this.opts = opts - - // may be overridden later, but set now for class consistency - this.#getOptions() - - // options used for calculating stream. can't be changed. - if (opts?.algorithms) { - this.algorithms = [...opts.algorithms] - } else { - this.algorithms = [...DEFAULT_ALGORITHMS] - } - if (this.algorithm !== null && !this.algorithms.includes(this.algorithm)) { - this.algorithms.push(this.algorithm) - } - - this.hashes = this.algorithms.map(crypto.createHash) - } - - #getOptions () { - // For verification - this.sri = this.opts?.integrity ? parse(this.opts?.integrity, this.opts) : null - this.expectedSize = this.opts?.size - - if (!this.sri) { - this.algorithm = null - } else if (this.sri.isHash) { - this.goodSri = true - this.algorithm = this.sri.algorithm - } else { - this.goodSri = !this.sri.isEmpty() - this.algorithm = this.sri.pickAlgorithm(this.opts) - } - - this.digests = this.goodSri ? this.sri[this.algorithm] : null - this.optString = getOptString(this.opts?.options) - } - - on (ev, handler) { - if (ev === 'size' && this.#emittedSize) { - return handler(this.#emittedSize) - } - - if (ev === 'integrity' && this.#emittedIntegrity) { - return handler(this.#emittedIntegrity) - } - - if (ev === 'verified' && this.#emittedVerified) { - return handler(this.#emittedVerified) - } - - return super.on(ev, handler) - } - - emit (ev, data) { - if (ev === 'end') { - this.#onEnd() - } - return super.emit(ev, data) - } - - write (data) { - this.size += data.length - this.hashes.forEach(h => h.update(data)) - return super.write(data) - } - - #onEnd () { - if (!this.goodSri) { - this.#getOptions() - } - const newSri = parse(this.hashes.map((h, i) => { - return `${this.algorithms[i]}-${h.digest('base64')}${this.optString}` - }).join(' '), this.opts) - // Integrity verification mode - const match = this.goodSri && newSri.match(this.sri, this.opts) - if (typeof this.expectedSize === 'number' && this.size !== this.expectedSize) { - /* eslint-disable-next-line max-len */ - const err = new Error(`stream size mismatch when checking ${this.sri}.\n Wanted: ${this.expectedSize}\n Found: ${this.size}`) - err.code = 'EBADSIZE' - err.found = this.size - err.expected = this.expectedSize - err.sri = this.sri - this.emit('error', err) - } else if (this.sri && !match) { - /* eslint-disable-next-line max-len */ - const err = new Error(`${this.sri} integrity checksum failed when using ${this.algorithm}: wanted ${this.digests} but got ${newSri}. (${this.size} bytes)`) - err.code = 'EINTEGRITY' - err.found = newSri - err.expected = this.digests - err.algorithm = this.algorithm - err.sri = this.sri - this.emit('error', err) - } else { - this.#emittedSize = this.size - this.emit('size', this.size) - this.#emittedIntegrity = newSri - this.emit('integrity', newSri) - if (match) { - this.#emittedVerified = match - this.emit('verified', match) - } - } - } -} - -class Hash { - get isHash () { - return true - } - - constructor (hash, opts) { - const strict = opts?.strict - this.source = hash.trim() - - // set default values so that we make V8 happy to - // always see a familiar object template. - this.digest = '' - this.algorithm = '' - this.options = [] - - // 3.1. Integrity metadata (called "Hash" by ssri) - // https://w3c.github.io/webappsec-subresource-integrity/#integrity-metadata-description - const match = this.source.match( - strict - ? STRICT_SRI_REGEX - : SRI_REGEX - ) - if (!match) { - return - } - if (strict && !SPEC_ALGORITHMS.includes(match[1])) { - return - } - this.algorithm = match[1] - this.digest = match[2] - - const rawOpts = match[3] - if (rawOpts) { - this.options = rawOpts.slice(1).split('?') - } - } - - hexDigest () { - return this.digest && Buffer.from(this.digest, 'base64').toString('hex') - } - - toJSON () { - return this.toString() - } - - match (integrity, opts) { - const other = parse(integrity, opts) - if (!other) { - return false - } - if (other.isIntegrity) { - const algo = other.pickAlgorithm(opts, [this.algorithm]) - - if (!algo) { - return false - } - - const foundHash = other[algo].find(hash => hash.digest === this.digest) - - if (foundHash) { - return foundHash - } - - return false - } - return other.digest === this.digest ? other : false - } - - toString (opts) { - if (opts?.strict) { - // Strict mode enforces the standard as close to the foot of the - // letter as it can. - if (!( - // The spec has very restricted productions for algorithms. - // https://www.w3.org/TR/CSP2/#source-list-syntax - SPEC_ALGORITHMS.includes(this.algorithm) && - // Usually, if someone insists on using a "different" base64, we - // leave it as-is, since there's multiple standards, and the - // specified is not a URL-safe variant. - // https://www.w3.org/TR/CSP2/#base64_value - this.digest.match(BASE64_REGEX) && - // Option syntax is strictly visual chars. - // https://w3c.github.io/webappsec-subresource-integrity/#grammardef-option-expression - // https://tools.ietf.org/html/rfc5234#appendix-B.1 - this.options.every(opt => opt.match(VCHAR_REGEX)) - )) { - return '' - } - } - return `${this.algorithm}-${this.digest}${getOptString(this.options)}` - } -} - -function integrityHashToString (toString, sep, opts, hashes) { - const toStringIsNotEmpty = toString !== '' - - let shouldAddFirstSep = false - let complement = '' - - const lastIndex = hashes.length - 1 - - for (let i = 0; i < lastIndex; i++) { - const hashString = Hash.prototype.toString.call(hashes[i], opts) - - if (hashString) { - shouldAddFirstSep = true - - complement += hashString - complement += sep - } - } - - const finalHashString = Hash.prototype.toString.call(hashes[lastIndex], opts) - - if (finalHashString) { - shouldAddFirstSep = true - complement += finalHashString - } - - if (toStringIsNotEmpty && shouldAddFirstSep) { - return toString + sep + complement - } - - return toString + complement -} - -class Integrity { - get isIntegrity () { - return true - } - - toJSON () { - return this.toString() - } - - isEmpty () { - return Object.keys(this).length === 0 - } - - toString (opts) { - let sep = opts?.sep || ' ' - let toString = '' - - if (opts?.strict) { - // Entries must be separated by whitespace, according to spec. - sep = sep.replace(/\S+/g, ' ') - - for (const hash of SPEC_ALGORITHMS) { - if (this[hash]) { - toString = integrityHashToString(toString, sep, opts, this[hash]) - } - } - } else { - for (const hash of Object.keys(this)) { - toString = integrityHashToString(toString, sep, opts, this[hash]) - } - } - - return toString - } - - concat (integrity, opts) { - const other = typeof integrity === 'string' - ? integrity - : stringify(integrity, opts) - return parse(`${this.toString(opts)} ${other}`, opts) - } - - hexDigest () { - return parse(this, { single: true }).hexDigest() - } - - // add additional hashes to an integrity value, but prevent - // *changing* an existing integrity hash. - merge (integrity, opts) { - const other = parse(integrity, opts) - for (const algo in other) { - if (this[algo]) { - if (!this[algo].find(hash => - other[algo].find(otherhash => - hash.digest === otherhash.digest))) { - throw new Error('hashes do not match, cannot update integrity') - } - } else { - this[algo] = other[algo] - } - } - } - - match (integrity, opts) { - const other = parse(integrity, opts) - if (!other) { - return false - } - const algo = other.pickAlgorithm(opts, Object.keys(this)) - return ( - !!algo && - this[algo] && - other[algo] && - this[algo].find(hash => - other[algo].find(otherhash => - hash.digest === otherhash.digest - ) - ) - ) || false - } - - // Pick the highest priority algorithm present, optionally also limited to a - // set of hashes found in another integrity. When limiting it may return - // nothing. - pickAlgorithm (opts, hashes) { - const pickAlgorithm = opts?.pickAlgorithm || getPrioritizedHash - const keys = Object.keys(this).filter(k => { - if (hashes?.length) { - return hashes.includes(k) - } - return true - }) - if (keys.length) { - return keys.reduce((acc, algo) => pickAlgorithm(acc, algo) || acc) - } - // no intersection between this and hashes, - return null - } -} - -module.exports.parse = parse -function parse (sri, opts) { - if (!sri) { - return null - } - if (typeof sri === 'string') { - return _parse(sri, opts) - } else if (sri.algorithm && sri.digest) { - const fullSri = new Integrity() - fullSri[sri.algorithm] = [sri] - return _parse(stringify(fullSri, opts), opts) - } else { - return _parse(stringify(sri, opts), opts) - } -} - -function _parse (integrity, opts) { - // 3.4.3. Parse metadata - // https://w3c.github.io/webappsec-subresource-integrity/#parse-metadata - if (opts?.single) { - return new Hash(integrity, opts) - } - const hashes = integrity.trim().split(/\s+/).reduce((acc, string) => { - const hash = new Hash(string, opts) - if (hash.algorithm && hash.digest) { - const algo = hash.algorithm - if (!acc[algo]) { - acc[algo] = [] - } - acc[algo].push(hash) - } - return acc - }, new Integrity()) - return hashes.isEmpty() ? null : hashes -} - -module.exports.stringify = stringify -function stringify (obj, opts) { - if (obj.algorithm && obj.digest) { - return Hash.prototype.toString.call(obj, opts) - } else if (typeof obj === 'string') { - return stringify(parse(obj, opts), opts) - } else { - return Integrity.prototype.toString.call(obj, opts) - } -} - -module.exports.fromHex = fromHex -function fromHex (hexDigest, algorithm, opts) { - const optString = getOptString(opts?.options) - return parse( - `${algorithm}-${ - Buffer.from(hexDigest, 'hex').toString('base64') - }${optString}`, opts - ) -} - -module.exports.fromData = fromData -function fromData (data, opts) { - const algorithms = opts?.algorithms || [...DEFAULT_ALGORITHMS] - const optString = getOptString(opts?.options) - return algorithms.reduce((acc, algo) => { - const digest = crypto.createHash(algo).update(data).digest('base64') - const hash = new Hash( - `${algo}-${digest}${optString}`, - opts - ) - /* istanbul ignore else - it would be VERY strange if the string we - * just calculated with an algo did not have an algo or digest. - */ - if (hash.algorithm && hash.digest) { - const hashAlgo = hash.algorithm - if (!acc[hashAlgo]) { - acc[hashAlgo] = [] - } - acc[hashAlgo].push(hash) - } - return acc - }, new Integrity()) -} - -module.exports.fromStream = fromStream -function fromStream (stream, opts) { - const istream = integrityStream(opts) - return new Promise((resolve, reject) => { - stream.pipe(istream) - stream.on('error', reject) - istream.on('error', reject) - let sri - istream.on('integrity', s => { - sri = s - }) - istream.on('end', () => resolve(sri)) - istream.resume() - }) -} - -module.exports.checkData = checkData -function checkData (data, sri, opts) { - sri = parse(sri, opts) - if (!sri || !Object.keys(sri).length) { - if (opts?.error) { - throw Object.assign( - new Error('No valid integrity hashes to check against'), { - code: 'EINTEGRITY', - } - ) - } else { - return false - } - } - const algorithm = sri.pickAlgorithm(opts) - const digest = crypto.createHash(algorithm).update(data).digest('base64') - const newSri = parse({ algorithm, digest }) - const match = newSri.match(sri, opts) - opts = opts || {} - if (match || !(opts.error)) { - return match - } else if (typeof opts.size === 'number' && (data.length !== opts.size)) { - /* eslint-disable-next-line max-len */ - const err = new Error(`data size mismatch when checking ${sri}.\n Wanted: ${opts.size}\n Found: ${data.length}`) - err.code = 'EBADSIZE' - err.found = data.length - err.expected = opts.size - err.sri = sri - throw err - } else { - /* eslint-disable-next-line max-len */ - const err = new Error(`Integrity checksum failed when using ${algorithm}: Wanted ${sri}, but got ${newSri}. (${data.length} bytes)`) - err.code = 'EINTEGRITY' - err.found = newSri - err.expected = sri - err.algorithm = algorithm - err.sri = sri - throw err - } -} - -module.exports.checkStream = checkStream -function checkStream (stream, sri, opts) { - opts = opts || Object.create(null) - opts.integrity = sri - sri = parse(sri, opts) - if (!sri || !Object.keys(sri).length) { - return Promise.reject(Object.assign( - new Error('No valid integrity hashes to check against'), { - code: 'EINTEGRITY', - } - )) - } - const checker = integrityStream(opts) - return new Promise((resolve, reject) => { - stream.pipe(checker) - stream.on('error', reject) - checker.on('error', reject) - let verified - checker.on('verified', s => { - verified = s - }) - checker.on('end', () => resolve(verified)) - checker.resume() - }) -} - -module.exports.integrityStream = integrityStream -function integrityStream (opts = Object.create(null)) { - return new IntegrityStream(opts) -} - -module.exports.create = createIntegrity -function createIntegrity (opts) { - const algorithms = opts?.algorithms || [...DEFAULT_ALGORITHMS] - const optString = getOptString(opts?.options) - - const hashes = algorithms.map(crypto.createHash) - - return { - update: function (chunk, enc) { - hashes.forEach(h => h.update(chunk, enc)) - return this - }, - digest: function () { - const integrity = algorithms.reduce((acc, algo) => { - const digest = hashes.shift().digest('base64') - const hash = new Hash( - `${algo}-${digest}${optString}`, - opts - ) - /* istanbul ignore else - it would be VERY strange if the hash we - * just calculated with an algo did not have an algo or digest. - */ - if (hash.algorithm && hash.digest) { - const hashAlgo = hash.algorithm - if (!acc[hashAlgo]) { - acc[hashAlgo] = [] - } - acc[hashAlgo].push(hash) - } - return acc - }, new Integrity()) - - return integrity - }, - } -} - -const NODE_HASHES = crypto.getHashes() - -// This is a Best Effort™ at a reasonable priority for hash algos -const DEFAULT_PRIORITY = [ - 'md5', 'whirlpool', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', - // TODO - it's unclear _which_ of these Node will actually use as its name - // for the algorithm, so we guesswork it based on the OpenSSL names. - 'sha3', - 'sha3-256', 'sha3-384', 'sha3-512', - 'sha3_256', 'sha3_384', 'sha3_512', -].filter(algo => NODE_HASHES.includes(algo)) - -function getPrioritizedHash (algo1, algo2) { - /* eslint-disable-next-line max-len */ - return DEFAULT_PRIORITY.indexOf(algo1.toLowerCase()) >= DEFAULT_PRIORITY.indexOf(algo2.toLowerCase()) - ? algo1 - : algo2 -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/ssri/package.json b/deps/npm/node_modules/@sigstore/sign/node_modules/ssri/package.json deleted file mode 100644 index 28395414e4643c..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/ssri/package.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "name": "ssri", - "version": "10.0.6", - "description": "Standard Subresource Integrity library -- parses, serializes, generates, and verifies integrity metadata according to the SRI spec.", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "prerelease": "npm t", - "postrelease": "npm publish", - "posttest": "npm run lint", - "test": "tap", - "coverage": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap" - }, - "tap": { - "check-coverage": true, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/ssri.git" - }, - "keywords": [ - "w3c", - "web", - "security", - "integrity", - "checksum", - "hashing", - "subresource integrity", - "sri", - "sri hash", - "sri string", - "sri generator", - "html" - ], - "author": "GitHub Inc.", - "license": "ISC", - "dependencies": { - "minipass": "^7.0.3" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.0.1" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.22.0", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-filename/LICENSE b/deps/npm/node_modules/@sigstore/sign/node_modules/unique-filename/LICENSE deleted file mode 100644 index 69619c125ea7ef..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-filename/LICENSE +++ /dev/null @@ -1,5 +0,0 @@ -Copyright npm, Inc - -Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-filename/lib/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/unique-filename/lib/index.js deleted file mode 100644 index d067d2e709809a..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-filename/lib/index.js +++ /dev/null @@ -1,7 +0,0 @@ -var path = require('path') - -var uniqueSlug = require('unique-slug') - -module.exports = function (filepath, prefix, uniq) { - return path.join(filepath, (prefix ? prefix + '-' : '') + uniqueSlug(uniq)) -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-filename/package.json b/deps/npm/node_modules/@sigstore/sign/node_modules/unique-filename/package.json deleted file mode 100644 index b2fbf0666489a6..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-filename/package.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "name": "unique-filename", - "version": "3.0.0", - "description": "Generate a unique filename for use in temporary directories or caches.", - "main": "lib/index.js", - "scripts": { - "test": "tap", - "lint": "eslint \"**/*.js\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/unique-filename.git" - }, - "keywords": [], - "author": "GitHub Inc.", - "license": "ISC", - "bugs": { - "url": "https://github.com/iarna/unique-filename/issues" - }, - "homepage": "https://github.com/iarna/unique-filename", - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.5.1", - "tap": "^16.3.0" - }, - "dependencies": { - "unique-slug": "^4.0.0" - }, - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.5.1" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-slug/LICENSE b/deps/npm/node_modules/@sigstore/sign/node_modules/unique-slug/LICENSE deleted file mode 100644 index 7953647e7760b8..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-slug/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -The ISC License - -Copyright npm, Inc - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR -IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-slug/lib/index.js b/deps/npm/node_modules/@sigstore/sign/node_modules/unique-slug/lib/index.js deleted file mode 100644 index 1bac84d95d7307..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-slug/lib/index.js +++ /dev/null @@ -1,11 +0,0 @@ -'use strict' -var MurmurHash3 = require('imurmurhash') - -module.exports = function (uniq) { - if (uniq) { - var hash = new MurmurHash3(uniq) - return ('00000000' + hash.result().toString(16)).slice(-8) - } else { - return (Math.random().toString(16) + '0000000').slice(2, 10) - } -} diff --git a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-slug/package.json b/deps/npm/node_modules/@sigstore/sign/node_modules/unique-slug/package.json deleted file mode 100644 index 33732cdbb42859..00000000000000 --- a/deps/npm/node_modules/@sigstore/sign/node_modules/unique-slug/package.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "name": "unique-slug", - "version": "4.0.0", - "description": "Generate a unique character string suitible for use in files and URLs.", - "main": "lib/index.js", - "scripts": { - "test": "tap", - "lint": "eslint \"**/*.js\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "keywords": [], - "author": "GitHub Inc.", - "license": "ISC", - "devDependencies": { - "@npmcli/eslint-config": "^3.1.0", - "@npmcli/template-oss": "4.5.1", - "tap": "^16.3.0" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/unique-slug.git" - }, - "dependencies": { - "imurmurhash": "^0.1.4" - }, - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.5.1" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/@sigstore/tuf/dist/appdata.js b/deps/npm/node_modules/@sigstore/tuf/dist/appdata.js index c9a8ee92b531eb..06a8143e70da2f 100644 --- a/deps/npm/node_modules/@sigstore/tuf/dist/appdata.js +++ b/deps/npm/node_modules/@sigstore/tuf/dist/appdata.js @@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.appDataPath = void 0; +exports.appDataPath = appDataPath; /* Copyright 2023 The Sigstore Authors. @@ -41,4 +41,3 @@ function appDataPath(name) { } } } -exports.appDataPath = appDataPath; diff --git a/deps/npm/node_modules/@sigstore/tuf/dist/client.js b/deps/npm/node_modules/@sigstore/tuf/dist/client.js index 2019c1fd30f886..328f49e40dbbd7 100644 --- a/deps/npm/node_modules/@sigstore/tuf/dist/client.js +++ b/deps/npm/node_modules/@sigstore/tuf/dist/client.js @@ -79,7 +79,6 @@ function seedCache({ cachePath, mirrorURL, tufRootPath, forceInit, }) { fs_1.default.copyFileSync(tufRootPath, cachedRootPath); } else { - /* eslint-disable @typescript-eslint/no-var-requires */ const seeds = require('../seeds.json'); const repoSeed = seeds[mirrorURL]; if (!repoSeed) { diff --git a/deps/npm/node_modules/@sigstore/tuf/dist/index.js b/deps/npm/node_modules/@sigstore/tuf/dist/index.js index 678c81d45d21ed..2af5de93ec5d2f 100644 --- a/deps/npm/node_modules/@sigstore/tuf/dist/index.js +++ b/deps/npm/node_modules/@sigstore/tuf/dist/index.js @@ -1,6 +1,8 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.TUFError = exports.initTUF = exports.getTrustedRoot = exports.DEFAULT_MIRROR_URL = void 0; +exports.TUFError = exports.DEFAULT_MIRROR_URL = void 0; +exports.getTrustedRoot = getTrustedRoot; +exports.initTUF = initTUF; /* Copyright 2023 The Sigstore Authors. @@ -31,14 +33,12 @@ options = {}) { const trustedRoot = await client.getTarget(TRUSTED_ROOT_TARGET); return protobuf_specs_1.TrustedRoot.fromJSON(JSON.parse(trustedRoot)); } -exports.getTrustedRoot = getTrustedRoot; async function initTUF( /* istanbul ignore next */ options = {}) { const client = createClient(options); return client.refresh().then(() => client); } -exports.initTUF = initTUF; // Create a TUF client with default options function createClient(options) { /* istanbul ignore next */ diff --git a/deps/npm/node_modules/@sigstore/tuf/dist/target.js b/deps/npm/node_modules/@sigstore/tuf/dist/target.js index 29eaf99a7e721c..5c6675bdfbf5fe 100644 --- a/deps/npm/node_modules/@sigstore/tuf/dist/target.js +++ b/deps/npm/node_modules/@sigstore/tuf/dist/target.js @@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.readTarget = void 0; +exports.readTarget = readTarget; /* Copyright 2023 The Sigstore Authors. @@ -39,7 +39,6 @@ async function readTarget(tuf, targetPath) { }); }); } -exports.readTarget = readTarget; // Returns the local path to the specified target. If the target is not yet // cached locally, the provided TUF Updater will be used to download and // cache the target. diff --git a/deps/npm/node_modules/@sigstore/tuf/package.json b/deps/npm/node_modules/@sigstore/tuf/package.json index b7fd34ac9674eb..808689dfddf92f 100644 --- a/deps/npm/node_modules/@sigstore/tuf/package.json +++ b/deps/npm/node_modules/@sigstore/tuf/package.json @@ -1,6 +1,6 @@ { "name": "@sigstore/tuf", - "version": "2.3.4", + "version": "3.0.0", "description": "Client for the Sigstore TUF repository", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -28,14 +28,14 @@ }, "devDependencies": { "@sigstore/jest": "^0.0.0", - "@tufjs/repo-mock": "^2.0.1", + "@tufjs/repo-mock": "^3.0.1", "@types/make-fetch-happen": "^10.0.4" }, "dependencies": { "@sigstore/protobuf-specs": "^0.3.2", - "tuf-js": "^2.2.1" + "tuf-js": "^3.0.1" }, "engines": { - "node": "^16.14.0 || >=18.0.0" + "node": "^18.17.0 || >=20.5.0" } } diff --git a/deps/npm/node_modules/@sigstore/tuf/seeds.json b/deps/npm/node_modules/@sigstore/tuf/seeds.json index e8d97d5fa7a672..d1d3c6b5c46040 100644 --- a/deps/npm/node_modules/@sigstore/tuf/seeds.json +++ b/deps/npm/node_modules/@sigstore/tuf/seeds.json @@ -1 +1 @@ -{"https://tuf-repo-cdn.sigstore.dev":{"root.json":"{
	"signed": {
		"_type": "root",
		"spec_version": "1.0",
		"version": 9,
		"expires": "2024-09-12T06:53:10Z",
		"keys": {
			"1e1d65ce98b10addad4764febf7dda2d0436b3d3a3893579c0dddaea20e54849": {
				"keytype": "ecdsa",
				"scheme": "ecdsa-sha2-nistp256",
				"keyid_hash_algorithms": [
					"sha256",
					"sha512"
				],
				"keyval": {
					"public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEzBzVOmHCPojMVLSI364WiiV8NPrD\n6IgRxVliskz/v+y3JER5mcVGcONliDcWMC5J2lfHmjPNPhb4H7xm8LzfSA==\n-----END PUBLIC KEY-----\n"
				}
			},
			"230e212616274a4195cdc28e9fce782c20e6c720f1a811b40f98228376bdd3ac": {
				"keytype": "ecdsa",
				"scheme": "ecdsa-sha2-nistp256",
				"keyid_hash_algorithms": [
					"sha256",
					"sha512"
				],
				"keyval": {
					"public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAELrWvNt94v4R085ELeeCMxHp7PldF\n0/T1GxukUh2ODuggLGJE0pc1e8CSBf6CS91Fwo9FUOuRsjBUld+VqSyCdQ==\n-----END PUBLIC KEY-----\n"
				}
			},
			"3c344aa068fd4cc4e87dc50b612c02431fbc771e95003993683a2b0bf260cf0e": {
				"keytype": "ecdsa",
				"scheme": "ecdsa-sha2-nistp256",
				"keyid_hash_algorithms": [
					"sha256",
					"sha512"
				],
				"keyval": {
					"public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEy8XKsmhBYDI8Jc0GwzBxeKax0cm5\nSTKEU65HPFunUn41sT8pi0FjM4IkHz/YUmwmLUO0Wt7lxhj6BkLIK4qYAw==\n-----END PUBLIC KEY-----\n"
				}
			},
			"923bb39e60dd6fa2c31e6ea55473aa93b64dd4e53e16fbe42f6a207d3f97de2d": {
				"keytype": "ecdsa",
				"scheme": "ecdsa-sha2-nistp256",
				"keyid_hash_algorithms": [
					"sha256",
					"sha512"
				],
				"keyval": {
					"public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEWRiGr5+j+3J5SsH+Ztr5nE2H2wO7\nBV+nO3s93gLca18qTOzHY1oWyAGDykMSsGTUBSt9D+An0KfKsD2mfSM42Q==\n-----END PUBLIC KEY-----\n"
				}
			},
			"e2f59acb9488519407e18cbfc9329510be03c04aca9929d2f0301343fec85523": {
				"keytype": "ecdsa",
				"scheme": "ecdsa-sha2-nistp256",
				"keyid_hash_algorithms": [
					"sha256",
					"sha512"
				],
				"keyval": {
					"public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEinikSsAQmYkNeH5eYq/CnIzLaacO\nxlSaawQDOwqKy/tCqxq5xxPSJc21K4WIhs9GyOkKfzueY3GILzcMJZ4cWw==\n-----END PUBLIC KEY-----\n"
				}
			},
			"ec81669734e017996c5b85f3d02c3de1dd4637a152019fe1af125d2f9368b95e": {
				"keytype": "ecdsa",
				"scheme": "ecdsa-sha2-nistp256",
				"keyid_hash_algorithms": [
					"sha256",
					"sha512"
				],
				"keyval": {
					"public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEEXsz3SZXFb8jMV42j6pJlyjbjR8K\nN3Bwocexq6LMIb5qsWKOQvLN16NUefLc4HswOoumRsVVaajSpQS6fobkRw==\n-----END PUBLIC KEY-----\n"
				}
			},
			"fdfa83a07b5a83589b87ded41f77f39d232ad91f7cce52868dacd06ba089849f": {
				"keytype": "ecdsa",
				"scheme": "ecdsa-sha2-nistp256",
				"keyid_hash_algorithms": [
					"sha256",
					"sha512"
				],
				"keyval": {
					"public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE0ghrh92Lw1Yr3idGV5WqCtMDB8Cx\n+D8hdC4w2ZLNIplVRoVGLskYa3gheMyOjiJ8kPi15aQ2//7P+oj7UvJPGw==\n-----END PUBLIC KEY-----\n"
				}
			}
		},
		"roles": {
			"root": {
				"keyids": [
					"3c344aa068fd4cc4e87dc50b612c02431fbc771e95003993683a2b0bf260cf0e",
					"ec81669734e017996c5b85f3d02c3de1dd4637a152019fe1af125d2f9368b95e",
					"1e1d65ce98b10addad4764febf7dda2d0436b3d3a3893579c0dddaea20e54849",
					"e2f59acb9488519407e18cbfc9329510be03c04aca9929d2f0301343fec85523",
					"fdfa83a07b5a83589b87ded41f77f39d232ad91f7cce52868dacd06ba089849f"
				],
				"threshold": 3
			},
			"snapshot": {
				"keyids": [
					"230e212616274a4195cdc28e9fce782c20e6c720f1a811b40f98228376bdd3ac"
				],
				"threshold": 1
			},
			"targets": {
				"keyids": [
					"3c344aa068fd4cc4e87dc50b612c02431fbc771e95003993683a2b0bf260cf0e",
					"ec81669734e017996c5b85f3d02c3de1dd4637a152019fe1af125d2f9368b95e",
					"1e1d65ce98b10addad4764febf7dda2d0436b3d3a3893579c0dddaea20e54849",
					"e2f59acb9488519407e18cbfc9329510be03c04aca9929d2f0301343fec85523",
					"fdfa83a07b5a83589b87ded41f77f39d232ad91f7cce52868dacd06ba089849f"
				],
				"threshold": 3
			},
			"timestamp": {
				"keyids": [
					"923bb39e60dd6fa2c31e6ea55473aa93b64dd4e53e16fbe42f6a207d3f97de2d"
				],
				"threshold": 1
			}
		},
		"consistent_snapshot": true
	},
	"signatures": [
		{
			"keyid": "ff51e17fcf253119b7033f6f57512631da4a0969442afcf9fc8b141c7f2be99c",
			"sig": "30450221008b78f894c3cfed3bd486379c4e0e0dfb3e7dd8cbc4d5598d2818eea1ba3c7550022029d3d06e89d04d37849985dc46c0e10dc5b1fc68dc70af1ec9910303a1f3ee2f"
		},
		{
			"keyid": "25a0eb450fd3ee2bd79218c963dce3f1cc6118badf251bf149f0bd07d5cabe99",
			"sig": "30450221009e6b90b935e09b837a90d4402eaa27d5ea26eb7891948ba0ed7090841248f436022003dc2251c4d4a7999b91e9ad0868765ae09ac7269279f2a7899bafef7a2d9260"
		},
		{
			"keyid": "f5312f542c21273d9485a49394386c4575804770667f2ddb59b3bf0669fddd2f",
			"sig": "30440220099e907dcf90b7b6e109fd1d6e442006fccbb48894aaaff47ab824b03fb35d0d02202aa0a06c21a4233f37900a48bc8777d3b47f59e3a38616ce631a04df57f96736"
		},
		{
			"keyid": "3c344aa068fd4cc4e87dc50b612c02431fbc771e95003993683a2b0bf260cf0e",
			"sig": "30450221008b78f894c3cfed3bd486379c4e0e0dfb3e7dd8cbc4d5598d2818eea1ba3c7550022029d3d06e89d04d37849985dc46c0e10dc5b1fc68dc70af1ec9910303a1f3ee2f"
		},
		{
			"keyid": "ec81669734e017996c5b85f3d02c3de1dd4637a152019fe1af125d2f9368b95e",
			"sig": "30450221009e6b90b935e09b837a90d4402eaa27d5ea26eb7891948ba0ed7090841248f436022003dc2251c4d4a7999b91e9ad0868765ae09ac7269279f2a7899bafef7a2d9260"
		},
		{
			"keyid": "e2f59acb9488519407e18cbfc9329510be03c04aca9929d2f0301343fec85523",
			"sig": "304502200e5613b901e0f3e08eceabddc73f98b50ddf892e998d0b369c6e3d451ac48875022100940cf92d1f43ee2e5cdbb22572bb52925ed3863a688f7ffdd4bd2e2e56f028b3"
		},
		{
			"keyid": "2e61cd0cbf4a8f45809bda9f7f78c0d33ad11842ff94ae340873e2664dc843de",
			"sig": "304502202cff44f2215d7a47b28b8f5f580c2cfbbd1bfcfcbbe78de323045b2c0badc5e9022100c743949eb3f4ea5a4b9ae27ac6eddea1f0ff9bfd004f8a9a9d18c6e4142b6e75"
		},
		{
			"keyid": "1e1d65ce98b10addad4764febf7dda2d0436b3d3a3893579c0dddaea20e54849",
			"sig": "30440220099e907dcf90b7b6e109fd1d6e442006fccbb48894aaaff47ab824b03fb35d0d02202aa0a06c21a4233f37900a48bc8777d3b47f59e3a38616ce631a04df57f96736"
		},
		{
			"keyid": "fdfa83a07b5a83589b87ded41f77f39d232ad91f7cce52868dacd06ba089849f",
			"sig": "304502202cff44f2215d7a47b28b8f5f580c2cfbbd1bfcfcbbe78de323045b2c0badc5e9022100c743949eb3f4ea5a4b9ae27ac6eddea1f0ff9bfd004f8a9a9d18c6e4142b6e75"
		},
		{
			"keyid": "7f7513b25429a64473e10ce3ad2f3da372bbdd14b65d07bbaf547e7c8bbbe62b",
			"sig": "304502200e5613b901e0f3e08eceabddc73f98b50ddf892e998d0b369c6e3d451ac48875022100940cf92d1f43ee2e5cdbb22572bb52925ed3863a688f7ffdd4bd2e2e56f028b3"
		}
	]
}","targets":{"trusted_root.json":"{
  "mediaType": "application/vnd.dev.sigstore.trustedroot+json;version=0.1",
  "tlogs": [
    {
      "baseUrl": "https://rekor.sigstore.dev",
      "hashAlgorithm": "SHA2_256",
      "publicKey": {
        "rawBytes": "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE2G2Y+2tabdTV5BcGiBIx0a9fAFwrkBbmLSGtks4L3qX6yYY0zufBnhC8Ur/iy55GhWP/9A/bY2LhC30M9+RYtw==",
        "keyDetails": "PKIX_ECDSA_P256_SHA_256",
        "validFor": {
          "start": "2021-01-12T11:53:27.000Z"
        }
      },
      "logId": {
        "keyId": "wNI9atQGlz+VWfO6LRygH4QUfY/8W4RFwiT5i5WRgB0="
      }
    }
  ],
  "certificateAuthorities": [
    {
      "subject": {
        "organization": "sigstore.dev",
        "commonName": "sigstore"
      },
      "uri": "https://fulcio.sigstore.dev",
      "certChain": {
        "certificates": [
          {
            "rawBytes": "MIIB+DCCAX6gAwIBAgITNVkDZoCiofPDsy7dfm6geLbuhzAKBggqhkjOPQQDAzAqMRUwEwYDVQQKEwxzaWdzdG9yZS5kZXYxETAPBgNVBAMTCHNpZ3N0b3JlMB4XDTIxMDMwNzAzMjAyOVoXDTMxMDIyMzAzMjAyOVowKjEVMBMGA1UEChMMc2lnc3RvcmUuZGV2MREwDwYDVQQDEwhzaWdzdG9yZTB2MBAGByqGSM49AgEGBSuBBAAiA2IABLSyA7Ii5k+pNO8ZEWY0ylemWDowOkNa3kL+GZE5Z5GWehL9/A9bRNA3RbrsZ5i0JcastaRL7Sp5fp/jD5dxqc/UdTVnlvS16an+2Yfswe/QuLolRUCrcOE2+2iA5+tzd6NmMGQwDgYDVR0PAQH/BAQDAgEGMBIGA1UdEwEB/wQIMAYBAf8CAQEwHQYDVR0OBBYEFMjFHQBBmiQpMlEk6w2uSu1KBtPsMB8GA1UdIwQYMBaAFMjFHQBBmiQpMlEk6w2uSu1KBtPsMAoGCCqGSM49BAMDA2gAMGUCMH8liWJfMui6vXXBhjDgY4MwslmN/TJxVe/83WrFomwmNf056y1X48F9c4m3a3ozXAIxAKjRay5/aj/jsKKGIkmQatjI8uupHr/+CxFvaJWmpYqNkLDGRU+9orzh5hI2RrcuaQ=="
          }
        ]
      },
      "validFor": {
        "start": "2021-03-07T03:20:29.000Z",
        "end": "2022-12-31T23:59:59.999Z"
      }
    },
    {
      "subject": {
        "organization": "sigstore.dev",
        "commonName": "sigstore"
      },
      "uri": "https://fulcio.sigstore.dev",
      "certChain": {
        "certificates": [
          {
            "rawBytes": "MIICGjCCAaGgAwIBAgIUALnViVfnU0brJasmRkHrn/UnfaQwCgYIKoZIzj0EAwMwKjEVMBMGA1UEChMMc2lnc3RvcmUuZGV2MREwDwYDVQQDEwhzaWdzdG9yZTAeFw0yMjA0MTMyMDA2MTVaFw0zMTEwMDUxMzU2NThaMDcxFTATBgNVBAoTDHNpZ3N0b3JlLmRldjEeMBwGA1UEAxMVc2lnc3RvcmUtaW50ZXJtZWRpYXRlMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAE8RVS/ysH+NOvuDZyPIZtilgUF9NlarYpAd9HP1vBBH1U5CV77LSS7s0ZiH4nE7Hv7ptS6LvvR/STk798LVgMzLlJ4HeIfF3tHSaexLcYpSASr1kS0N/RgBJz/9jWCiXno3sweTAOBgNVHQ8BAf8EBAMCAQYwEwYDVR0lBAwwCgYIKwYBBQUHAwMwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQU39Ppz1YkEZb5qNjpKFWixi4YZD8wHwYDVR0jBBgwFoAUWMAeX5FFpWapesyQoZMi0CrFxfowCgYIKoZIzj0EAwMDZwAwZAIwPCsQK4DYiZYDPIaDi5HFKnfxXx6ASSVmERfsynYBiX2X6SJRnZU84/9DZdnFvvxmAjBOt6QpBlc4J/0DxvkTCqpclvziL6BCCPnjdlIB3Pu3BxsPmygUY7Ii2zbdCdliiow="
          },
          {
            "rawBytes": "MIIB9zCCAXygAwIBAgIUALZNAPFdxHPwjeDloDwyYChAO/4wCgYIKoZIzj0EAwMwKjEVMBMGA1UEChMMc2lnc3RvcmUuZGV2MREwDwYDVQQDEwhzaWdzdG9yZTAeFw0yMTEwMDcxMzU2NTlaFw0zMTEwMDUxMzU2NThaMCoxFTATBgNVBAoTDHNpZ3N0b3JlLmRldjERMA8GA1UEAxMIc2lnc3RvcmUwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAAT7XeFT4rb3PQGwS4IajtLk3/OlnpgangaBclYpsYBr5i+4ynB07ceb3LP0OIOZdxexX69c5iVuyJRQ+Hz05yi+UF3uBWAlHpiS5sh0+H2GHE7SXrk1EC5m1Tr19L9gg92jYzBhMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBRYwB5fkUWlZql6zJChkyLQKsXF+jAfBgNVHSMEGDAWgBRYwB5fkUWlZql6zJChkyLQKsXF+jAKBggqhkjOPQQDAwNpADBmAjEAj1nHeXZp+13NWBNa+EDsDP8G1WWg1tCMWP/WHPqpaVo0jhsweNFZgSs0eE7wYI4qAjEA2WB9ot98sIkoF3vZYdd3/VtWB5b9TNMea7Ix/stJ5TfcLLeABLE4BNJOsQ4vnBHJ"
          }
        ]
      },
      "validFor": {
        "start": "2022-04-13T20:06:15.000Z"
      }
    }
  ],
  "ctlogs": [
    {
      "baseUrl": "https://ctfe.sigstore.dev/test",
      "hashAlgorithm": "SHA2_256",
      "publicKey": {
        "rawBytes": "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEbfwR+RJudXscgRBRpKX1XFDy3PyudDxz/SfnRi1fT8ekpfBd2O1uoz7jr3Z8nKzxA69EUQ+eFCFI3zeubPWU7w==",
        "keyDetails": "PKIX_ECDSA_P256_SHA_256",
        "validFor": {
          "start": "2021-03-14T00:00:00.000Z",
          "end": "2022-10-31T23:59:59.999Z"
        }
      },
      "logId": {
        "keyId": "CGCS8ChS/2hF0dFrJ4ScRWcYrBY9wzjSbea8IgY2b3I="
      }
    },
    {
      "baseUrl": "https://ctfe.sigstore.dev/2022",
      "hashAlgorithm": "SHA2_256",
      "publicKey": {
        "rawBytes": "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEiPSlFi0CmFTfEjCUqF9HuCEcYXNKAaYalIJmBZ8yyezPjTqhxrKBpMnaocVtLJBI1eM3uXnQzQGAJdJ4gs9Fyw==",
        "keyDetails": "PKIX_ECDSA_P256_SHA_256",
        "validFor": {
          "start": "2022-10-20T00:00:00.000Z"
        }
      },
      "logId": {
        "keyId": "3T0wasbHETJjGR4cmWc3AqJKXrjePK3/h4pygC8p7o4="
      }
    }
  ],
  "timestampAuthorities": [
    {
      "subject": {
        "organization": "GitHub, Inc.",
        "commonName": "Internal Services Root"
      },
      "certChain": {
        "certificates": [
          {
            "rawBytes": "MIIB3DCCAWKgAwIBAgIUchkNsH36Xa04b1LqIc+qr9DVecMwCgYIKoZIzj0EAwMwMjEVMBMGA1UEChMMR2l0SHViLCBJbmMuMRkwFwYDVQQDExBUU0EgaW50ZXJtZWRpYXRlMB4XDTIzMDQxNDAwMDAwMFoXDTI0MDQxMzAwMDAwMFowMjEVMBMGA1UEChMMR2l0SHViLCBJbmMuMRkwFwYDVQQDExBUU0EgVGltZXN0YW1waW5nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEUD5ZNbSqYMd6r8qpOOEX9ibGnZT9GsuXOhr/f8U9FJugBGExKYp40OULS0erjZW7xV9xV52NnJf5OeDq4e5ZKqNWMFQwDgYDVR0PAQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMIMAwGA1UdEwEB/wQCMAAwHwYDVR0jBBgwFoAUaW1RudOgVt0leqY0WKYbuPr47wAwCgYIKoZIzj0EAwMDaAAwZQIwbUH9HvD4ejCZJOWQnqAlkqURllvu9M8+VqLbiRK+zSfZCZwsiljRn8MQQRSkXEE5AjEAg+VxqtojfVfu8DhzzhCx9GKETbJHb19iV72mMKUbDAFmzZ6bQ8b54Zb8tidy5aWe"
          },
          {
            "rawBytes": "MIICEDCCAZWgAwIBAgIUX8ZO5QXP7vN4dMQ5e9sU3nub8OgwCgYIKoZIzj0EAwMwODEVMBMGA1UEChMMR2l0SHViLCBJbmMuMR8wHQYDVQQDExZJbnRlcm5hbCBTZXJ2aWNlcyBSb290MB4XDTIzMDQxNDAwMDAwMFoXDTI4MDQxMjAwMDAwMFowMjEVMBMGA1UEChMMR2l0SHViLCBJbmMuMRkwFwYDVQQDExBUU0EgaW50ZXJtZWRpYXRlMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEvMLY/dTVbvIJYANAuszEwJnQE1llftynyMKIMhh48HmqbVr5ygybzsLRLVKbBWOdZ21aeJz+gZiytZetqcyF9WlER5NEMf6JV7ZNojQpxHq4RHGoGSceQv/qvTiZxEDKo2YwZDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUaW1RudOgVt0leqY0WKYbuPr47wAwHwYDVR0jBBgwFoAU9NYYlobnAG4c0/qjxyH/lq/wz+QwCgYIKoZIzj0EAwMDaQAwZgIxAK1B185ygCrIYFlIs3GjswjnwSMG6LY8woLVdakKDZxVa8f8cqMs1DhcxJ0+09w95QIxAO+tBzZk7vjUJ9iJgD4R6ZWTxQWKqNm74jO99o+o9sv4FI/SZTZTFyMn0IJEHdNmyA=="
          },
          {
            "rawBytes": "MIIB9DCCAXqgAwIBAgIUa/JAkdUjK4JUwsqtaiRJGWhqLSowCgYIKoZIzj0EAwMwODEVMBMGA1UEChMMR2l0SHViLCBJbmMuMR8wHQYDVQQDExZJbnRlcm5hbCBTZXJ2aWNlcyBSb290MB4XDTIzMDQxNDAwMDAwMFoXDTMzMDQxMTAwMDAwMFowODEVMBMGA1UEChMMR2l0SHViLCBJbmMuMR8wHQYDVQQDExZJbnRlcm5hbCBTZXJ2aWNlcyBSb290MHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEf9jFAXxz4kx68AHRMOkFBhflDcMTvzaXz4x/FCcXjJ/1qEKon/qPIGnaURskDtyNbNDOpeJTDDFqt48iMPrnzpx6IZwqemfUJN4xBEZfza+pYt/iyod+9tZr20RRWSv/o0UwQzAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBAjAdBgNVHQ4EFgQU9NYYlobnAG4c0/qjxyH/lq/wz+QwCgYIKoZIzj0EAwMDaAAwZQIxALZLZ8BgRXzKxLMMN9VIlO+e4hrBnNBgF7tz7Hnrowv2NetZErIACKFymBlvWDvtMAIwZO+ki6ssQ1bsZo98O8mEAf2NZ7iiCgDDU0Vwjeco6zyeh0zBTs9/7gV6AHNQ53xD"
          }
        ]
      },
      "validFor": {
        "start": "2023-04-14T00:00:00.000Z"
      }
    }
  ]
}
","registry.npmjs.org%2Fkeys.json":"ewogICAgImtleXMiOiBbCiAgICAgICAgewogICAgICAgICAgICAia2V5SWQiOiAiU0hBMjU2OmpsM2J3c3d1ODBQampva0NnaDBvMnc1YzJVNExoUUFFNTdnajljejFrekEiLAogICAgICAgICAgICAia2V5VXNhZ2UiOiAibnBtOnNpZ25hdHVyZXMiLAogICAgICAgICAgICAicHVibGljS2V5IjogewogICAgICAgICAgICAgICAgInJhd0J5dGVzIjogIk1Ga3dFd1lIS29aSXpqMENBUVlJS29aSXpqMERBUWNEUWdBRTFPbGIzek1BRkZ4WEtIaUlrUU81Y0ozWWhsNWk2VVBwK0lodXRlQkpidUhjQTVVb2dLbzBFV3RsV3dXNktTYUtvVE5FWUw3SmxDUWlWbmtoQmt0VWdnPT0iLAogICAgICAgICAgICAgICAgImtleURldGFpbHMiOiAiUEtJWF9FQ0RTQV9QMjU2X1NIQV8yNTYiLAogICAgICAgICAgICAgICAgInZhbGlkRm9yIjogewogICAgICAgICAgICAgICAgICAgICJzdGFydCI6ICIxOTk5LTAxLTAxVDAwOjAwOjAwLjAwMFoiCiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICAgImtleUlkIjogIlNIQTI1NjpqbDNid3N3dTgwUGpqb2tDZ2gwbzJ3NWMyVTRMaFFBRTU3Z2o5Y3oxa3pBIiwKICAgICAgICAgICAgImtleVVzYWdlIjogIm5wbTphdHRlc3RhdGlvbnMiLAogICAgICAgICAgICAicHVibGljS2V5IjogewogICAgICAgICAgICAgICAgInJhd0J5dGVzIjogIk1Ga3dFd1lIS29aSXpqMENBUVlJS29aSXpqMERBUWNEUWdBRTFPbGIzek1BRkZ4WEtIaUlrUU81Y0ozWWhsNWk2VVBwK0lodXRlQkpidUhjQTVVb2dLbzBFV3RsV3dXNktTYUtvVE5FWUw3SmxDUWlWbmtoQmt0VWdnPT0iLAogICAgICAgICAgICAgICAgImtleURldGFpbHMiOiAiUEtJWF9FQ0RTQV9QMjU2X1NIQV8yNTYiLAogICAgICAgICAgICAgICAgInZhbGlkRm9yIjogewogICAgICAgICAgICAgICAgICAgICJzdGFydCI6ICIyMDIyLTEyLTAxVDAwOjAwOjAwLjAwMFoiCiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9CiAgICBdCn0K"}}} +{"https://tuf-repo-cdn.sigstore.dev":{"root.json":"{
 "signatures": [
  {
   "keyid": "6f260089d5923daf20166ca657c543af618346ab971884a99962b01988bbe0c3",
   "sig": "30460221008ab1f6f17d4f9e6d7dcf1c88912b6b53cc10388644ae1f09bc37a082cd06003e022100e145ef4c7b782d4e8107b53437e669d0476892ce999903ae33d14448366996e7"
  },
  {
   "keyid": "e71a54d543835ba86adad9460379c7641fb8726d164ea766801a1c522aba7ea2",
   "sig": "3045022100c768b2f86da99569019c160a081da54ae36c34c0a3120d3cb69b53b7d113758e02204f671518f617b20d46537fae6c3b63bae8913f4f1962156105cc4f019ac35c6a"
  },
  {
   "keyid": "22f4caec6d8e6f9555af66b3d4c3cb06a3bb23fdc7e39c916c61f462e6f52b06",
   "sig": "3045022100b4434e6995d368d23e74759acd0cb9013c83a5d3511f0f997ec54c456ae4350a022015b0e265d182d2b61dc74e155d98b3c3fbe564ba05286aa14c8df02c9b756516"
  },
  {
   "keyid": "61643838125b440b40db6942f5cb5a31c0dc04368316eb2aaa58b95904a58222",
   "sig": "304502210082c58411d989eb9f861410857d42381590ec9424dbdaa51e78ed13515431904e0220118185da6a6c2947131c17797e2bb7620ce26e5f301d1ceac5f2a7e58f9dcf2e"
  },
  {
   "keyid": "a687e5bf4fab82b0ee58d46e05c9535145a2c9afb458f43d42b45ca0fdce2a70",
   "sig": "3046022100c78513854cae9c32eaa6b88e18912f48006c2757a258f917312caba75948eb9e022100d9e1b4ce0adfe9fd2e2148d7fa27a2f40ba1122bd69da7612d8d1776b013c91d"
  },
  {
   "keyid": "fdfa83a07b5a83589b87ded41f77f39d232ad91f7cce52868dacd06ba089849f",
   "sig": "3045022056483a2d5d9ea9cec6e11eadfb33c484b614298faca15acf1c431b11ed7f734c022100d0c1d726af92a87e4e66459ca5adf38a05b44e1f94318423f954bae8bca5bb2e"
  },
  {
   "keyid": "e2f59acb9488519407e18cbfc9329510be03c04aca9929d2f0301343fec85523",
   "sig": "3046022100d004de88024c32dc5653a9f4843cfc5215427048ad9600d2cf9c969e6edff3d2022100d9ebb798f5fc66af10899dece014a8628ccf3c5402cd4a4270207472f8f6e712"
  },
  {
   "keyid": "3c344aa068fd4cc4e87dc50b612c02431fbc771e95003993683a2b0bf260cf0e",
   "sig": "3046022100b7b09996c45ca2d4b05603e56baefa29718a0b71147cf8c6e66349baa61477df022100c4da80c717b4fa7bba0fd5c72da8a0499358b01358b2309f41d1456ea1e7e1d9"
  },
  {
   "keyid": "ec81669734e017996c5b85f3d02c3de1dd4637a152019fe1af125d2f9368b95e",
   "sig": "3046022100be9782c30744e411a82fa85b5138d601ce148bc19258aec64e7ec24478f38812022100caef63dcaf1a4b9a500d3bd0e3f164ec18f1b63d7a9460d9acab1066db0f016d"
  },
  {
   "keyid": "1e1d65ce98b10addad4764febf7dda2d0436b3d3a3893579c0dddaea20e54849",
   "sig": "30450220746ec3f8534ce55531d0d01ff64964ef440d1e7d2c4c142409b8e9769f1ada6f022100e3b929fcd93ea18feaa0825887a7210489879a66780c07a83f4bd46e2f09ab3b"
  }
 ],
 "signed": {
  "_type": "root",
  "consistent_snapshot": true,
  "expires": "2025-02-19T08:04:32Z",
  "keys": {
   "22f4caec6d8e6f9555af66b3d4c3cb06a3bb23fdc7e39c916c61f462e6f52b06": {
    "keyid_hash_algorithms": [
     "sha256",
     "sha512"
    ],
    "keytype": "ecdsa",
    "keyval": {
     "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEzBzVOmHCPojMVLSI364WiiV8NPrD\n6IgRxVliskz/v+y3JER5mcVGcONliDcWMC5J2lfHmjPNPhb4H7xm8LzfSA==\n-----END PUBLIC KEY-----\n"
    },
    "scheme": "ecdsa-sha2-nistp256",
    "x-tuf-on-ci-keyowner": "@santiagotorres"
   },
   "61643838125b440b40db6942f5cb5a31c0dc04368316eb2aaa58b95904a58222": {
    "keyid_hash_algorithms": [
     "sha256",
     "sha512"
    ],
    "keytype": "ecdsa",
    "keyval": {
     "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEinikSsAQmYkNeH5eYq/CnIzLaacO\nxlSaawQDOwqKy/tCqxq5xxPSJc21K4WIhs9GyOkKfzueY3GILzcMJZ4cWw==\n-----END PUBLIC KEY-----\n"
    },
    "scheme": "ecdsa-sha2-nistp256",
    "x-tuf-on-ci-keyowner": "@bobcallaway"
   },
   "6f260089d5923daf20166ca657c543af618346ab971884a99962b01988bbe0c3": {
    "keyid_hash_algorithms": [
     "sha256",
     "sha512"
    ],
    "keytype": "ecdsa",
    "keyval": {
     "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEy8XKsmhBYDI8Jc0GwzBxeKax0cm5\nSTKEU65HPFunUn41sT8pi0FjM4IkHz/YUmwmLUO0Wt7lxhj6BkLIK4qYAw==\n-----END PUBLIC KEY-----\n"
    },
    "scheme": "ecdsa-sha2-nistp256",
    "x-tuf-on-ci-keyowner": "@dlorenc"
   },
   "7247f0dbad85b147e1863bade761243cc785dcb7aa410e7105dd3d2b61a36d2c": {
    "keyid_hash_algorithms": [
     "sha256",
     "sha512"
    ],
    "keytype": "ecdsa",
    "keyval": {
     "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEWRiGr5+j+3J5SsH+Ztr5nE2H2wO7\nBV+nO3s93gLca18qTOzHY1oWyAGDykMSsGTUBSt9D+An0KfKsD2mfSM42Q==\n-----END PUBLIC KEY-----\n"
    },
    "scheme": "ecdsa-sha2-nistp256",
    "x-tuf-on-ci-online-uri": "gcpkms://projects/sigstore-root-signing/locations/global/keyRings/root/cryptoKeys/timestamp"
   },
   "a687e5bf4fab82b0ee58d46e05c9535145a2c9afb458f43d42b45ca0fdce2a70": {
    "keyid_hash_algorithms": [
     "sha256",
     "sha512"
    ],
    "keytype": "ecdsa",
    "keyval": {
     "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE0ghrh92Lw1Yr3idGV5WqCtMDB8Cx\n+D8hdC4w2ZLNIplVRoVGLskYa3gheMyOjiJ8kPi15aQ2//7P+oj7UvJPGw==\n-----END PUBLIC KEY-----\n"
    },
    "scheme": "ecdsa-sha2-nistp256",
    "x-tuf-on-ci-keyowner": "@joshuagl"
   },
   "e71a54d543835ba86adad9460379c7641fb8726d164ea766801a1c522aba7ea2": {
    "keyid_hash_algorithms": [
     "sha256",
     "sha512"
    ],
    "keytype": "ecdsa",
    "keyval": {
     "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEEXsz3SZXFb8jMV42j6pJlyjbjR8K\nN3Bwocexq6LMIb5qsWKOQvLN16NUefLc4HswOoumRsVVaajSpQS6fobkRw==\n-----END PUBLIC KEY-----\n"
    },
    "scheme": "ecdsa-sha2-nistp256",
    "x-tuf-on-ci-keyowner": "@mnm678"
   }
  },
  "roles": {
   "root": {
    "keyids": [
     "6f260089d5923daf20166ca657c543af618346ab971884a99962b01988bbe0c3",
     "e71a54d543835ba86adad9460379c7641fb8726d164ea766801a1c522aba7ea2",
     "22f4caec6d8e6f9555af66b3d4c3cb06a3bb23fdc7e39c916c61f462e6f52b06",
     "61643838125b440b40db6942f5cb5a31c0dc04368316eb2aaa58b95904a58222",
     "a687e5bf4fab82b0ee58d46e05c9535145a2c9afb458f43d42b45ca0fdce2a70"
    ],
    "threshold": 3
   },
   "snapshot": {
    "keyids": [
     "7247f0dbad85b147e1863bade761243cc785dcb7aa410e7105dd3d2b61a36d2c"
    ],
    "threshold": 1,
    "x-tuf-on-ci-expiry-period": 3650,
    "x-tuf-on-ci-signing-period": 365
   },
   "targets": {
    "keyids": [
     "6f260089d5923daf20166ca657c543af618346ab971884a99962b01988bbe0c3",
     "e71a54d543835ba86adad9460379c7641fb8726d164ea766801a1c522aba7ea2",
     "22f4caec6d8e6f9555af66b3d4c3cb06a3bb23fdc7e39c916c61f462e6f52b06",
     "61643838125b440b40db6942f5cb5a31c0dc04368316eb2aaa58b95904a58222",
     "a687e5bf4fab82b0ee58d46e05c9535145a2c9afb458f43d42b45ca0fdce2a70"
    ],
    "threshold": 3
   },
   "timestamp": {
    "keyids": [
     "7247f0dbad85b147e1863bade761243cc785dcb7aa410e7105dd3d2b61a36d2c"
    ],
    "threshold": 1,
    "x-tuf-on-ci-expiry-period": 7,
    "x-tuf-on-ci-signing-period": 4
   }
  },
  "spec_version": "1.0",
  "version": 10,
  "x-tuf-on-ci-expiry-period": 182,
  "x-tuf-on-ci-signing-period": 31
 }
}","targets":{"trusted_root.json":"{
  "mediaType": "application/vnd.dev.sigstore.trustedroot+json;version=0.1",
  "tlogs": [
    {
      "baseUrl": "https://rekor.sigstore.dev",
      "hashAlgorithm": "SHA2_256",
      "publicKey": {
        "rawBytes": "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE2G2Y+2tabdTV5BcGiBIx0a9fAFwrkBbmLSGtks4L3qX6yYY0zufBnhC8Ur/iy55GhWP/9A/bY2LhC30M9+RYtw==",
        "keyDetails": "PKIX_ECDSA_P256_SHA_256",
        "validFor": {
          "start": "2021-01-12T11:53:27.000Z"
        }
      },
      "logId": {
        "keyId": "wNI9atQGlz+VWfO6LRygH4QUfY/8W4RFwiT5i5WRgB0="
      }
    }
  ],
  "certificateAuthorities": [
    {
      "subject": {
        "organization": "sigstore.dev",
        "commonName": "sigstore"
      },
      "uri": "https://fulcio.sigstore.dev",
      "certChain": {
        "certificates": [
          {
            "rawBytes": "MIIB+DCCAX6gAwIBAgITNVkDZoCiofPDsy7dfm6geLbuhzAKBggqhkjOPQQDAzAqMRUwEwYDVQQKEwxzaWdzdG9yZS5kZXYxETAPBgNVBAMTCHNpZ3N0b3JlMB4XDTIxMDMwNzAzMjAyOVoXDTMxMDIyMzAzMjAyOVowKjEVMBMGA1UEChMMc2lnc3RvcmUuZGV2MREwDwYDVQQDEwhzaWdzdG9yZTB2MBAGByqGSM49AgEGBSuBBAAiA2IABLSyA7Ii5k+pNO8ZEWY0ylemWDowOkNa3kL+GZE5Z5GWehL9/A9bRNA3RbrsZ5i0JcastaRL7Sp5fp/jD5dxqc/UdTVnlvS16an+2Yfswe/QuLolRUCrcOE2+2iA5+tzd6NmMGQwDgYDVR0PAQH/BAQDAgEGMBIGA1UdEwEB/wQIMAYBAf8CAQEwHQYDVR0OBBYEFMjFHQBBmiQpMlEk6w2uSu1KBtPsMB8GA1UdIwQYMBaAFMjFHQBBmiQpMlEk6w2uSu1KBtPsMAoGCCqGSM49BAMDA2gAMGUCMH8liWJfMui6vXXBhjDgY4MwslmN/TJxVe/83WrFomwmNf056y1X48F9c4m3a3ozXAIxAKjRay5/aj/jsKKGIkmQatjI8uupHr/+CxFvaJWmpYqNkLDGRU+9orzh5hI2RrcuaQ=="
          }
        ]
      },
      "validFor": {
        "start": "2021-03-07T03:20:29.000Z",
        "end": "2022-12-31T23:59:59.999Z"
      }
    },
    {
      "subject": {
        "organization": "sigstore.dev",
        "commonName": "sigstore"
      },
      "uri": "https://fulcio.sigstore.dev",
      "certChain": {
        "certificates": [
          {
            "rawBytes": "MIICGjCCAaGgAwIBAgIUALnViVfnU0brJasmRkHrn/UnfaQwCgYIKoZIzj0EAwMwKjEVMBMGA1UEChMMc2lnc3RvcmUuZGV2MREwDwYDVQQDEwhzaWdzdG9yZTAeFw0yMjA0MTMyMDA2MTVaFw0zMTEwMDUxMzU2NThaMDcxFTATBgNVBAoTDHNpZ3N0b3JlLmRldjEeMBwGA1UEAxMVc2lnc3RvcmUtaW50ZXJtZWRpYXRlMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAE8RVS/ysH+NOvuDZyPIZtilgUF9NlarYpAd9HP1vBBH1U5CV77LSS7s0ZiH4nE7Hv7ptS6LvvR/STk798LVgMzLlJ4HeIfF3tHSaexLcYpSASr1kS0N/RgBJz/9jWCiXno3sweTAOBgNVHQ8BAf8EBAMCAQYwEwYDVR0lBAwwCgYIKwYBBQUHAwMwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQU39Ppz1YkEZb5qNjpKFWixi4YZD8wHwYDVR0jBBgwFoAUWMAeX5FFpWapesyQoZMi0CrFxfowCgYIKoZIzj0EAwMDZwAwZAIwPCsQK4DYiZYDPIaDi5HFKnfxXx6ASSVmERfsynYBiX2X6SJRnZU84/9DZdnFvvxmAjBOt6QpBlc4J/0DxvkTCqpclvziL6BCCPnjdlIB3Pu3BxsPmygUY7Ii2zbdCdliiow="
          },
          {
            "rawBytes": "MIIB9zCCAXygAwIBAgIUALZNAPFdxHPwjeDloDwyYChAO/4wCgYIKoZIzj0EAwMwKjEVMBMGA1UEChMMc2lnc3RvcmUuZGV2MREwDwYDVQQDEwhzaWdzdG9yZTAeFw0yMTEwMDcxMzU2NTlaFw0zMTEwMDUxMzU2NThaMCoxFTATBgNVBAoTDHNpZ3N0b3JlLmRldjERMA8GA1UEAxMIc2lnc3RvcmUwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAAT7XeFT4rb3PQGwS4IajtLk3/OlnpgangaBclYpsYBr5i+4ynB07ceb3LP0OIOZdxexX69c5iVuyJRQ+Hz05yi+UF3uBWAlHpiS5sh0+H2GHE7SXrk1EC5m1Tr19L9gg92jYzBhMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBRYwB5fkUWlZql6zJChkyLQKsXF+jAfBgNVHSMEGDAWgBRYwB5fkUWlZql6zJChkyLQKsXF+jAKBggqhkjOPQQDAwNpADBmAjEAj1nHeXZp+13NWBNa+EDsDP8G1WWg1tCMWP/WHPqpaVo0jhsweNFZgSs0eE7wYI4qAjEA2WB9ot98sIkoF3vZYdd3/VtWB5b9TNMea7Ix/stJ5TfcLLeABLE4BNJOsQ4vnBHJ"
          }
        ]
      },
      "validFor": {
        "start": "2022-04-13T20:06:15.000Z"
      }
    }
  ],
  "ctlogs": [
    {
      "baseUrl": "https://ctfe.sigstore.dev/test",
      "hashAlgorithm": "SHA2_256",
      "publicKey": {
        "rawBytes": "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEbfwR+RJudXscgRBRpKX1XFDy3PyudDxz/SfnRi1fT8ekpfBd2O1uoz7jr3Z8nKzxA69EUQ+eFCFI3zeubPWU7w==",
        "keyDetails": "PKIX_ECDSA_P256_SHA_256",
        "validFor": {
          "start": "2021-03-14T00:00:00.000Z",
          "end": "2022-10-31T23:59:59.999Z"
        }
      },
      "logId": {
        "keyId": "CGCS8ChS/2hF0dFrJ4ScRWcYrBY9wzjSbea8IgY2b3I="
      }
    },
    {
      "baseUrl": "https://ctfe.sigstore.dev/2022",
      "hashAlgorithm": "SHA2_256",
      "publicKey": {
        "rawBytes": "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEiPSlFi0CmFTfEjCUqF9HuCEcYXNKAaYalIJmBZ8yyezPjTqhxrKBpMnaocVtLJBI1eM3uXnQzQGAJdJ4gs9Fyw==",
        "keyDetails": "PKIX_ECDSA_P256_SHA_256",
        "validFor": {
          "start": "2022-10-20T00:00:00.000Z"
        }
      },
      "logId": {
        "keyId": "3T0wasbHETJjGR4cmWc3AqJKXrjePK3/h4pygC8p7o4="
      }
    }
  ],
  "timestampAuthorities": [
    {
      "subject": {
        "organization": "GitHub, Inc.",
        "commonName": "Internal Services Root"
      },
      "certChain": {
        "certificates": [
          {
            "rawBytes": "MIIB3DCCAWKgAwIBAgIUchkNsH36Xa04b1LqIc+qr9DVecMwCgYIKoZIzj0EAwMwMjEVMBMGA1UEChMMR2l0SHViLCBJbmMuMRkwFwYDVQQDExBUU0EgaW50ZXJtZWRpYXRlMB4XDTIzMDQxNDAwMDAwMFoXDTI0MDQxMzAwMDAwMFowMjEVMBMGA1UEChMMR2l0SHViLCBJbmMuMRkwFwYDVQQDExBUU0EgVGltZXN0YW1waW5nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEUD5ZNbSqYMd6r8qpOOEX9ibGnZT9GsuXOhr/f8U9FJugBGExKYp40OULS0erjZW7xV9xV52NnJf5OeDq4e5ZKqNWMFQwDgYDVR0PAQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMIMAwGA1UdEwEB/wQCMAAwHwYDVR0jBBgwFoAUaW1RudOgVt0leqY0WKYbuPr47wAwCgYIKoZIzj0EAwMDaAAwZQIwbUH9HvD4ejCZJOWQnqAlkqURllvu9M8+VqLbiRK+zSfZCZwsiljRn8MQQRSkXEE5AjEAg+VxqtojfVfu8DhzzhCx9GKETbJHb19iV72mMKUbDAFmzZ6bQ8b54Zb8tidy5aWe"
          },
          {
            "rawBytes": "MIICEDCCAZWgAwIBAgIUX8ZO5QXP7vN4dMQ5e9sU3nub8OgwCgYIKoZIzj0EAwMwODEVMBMGA1UEChMMR2l0SHViLCBJbmMuMR8wHQYDVQQDExZJbnRlcm5hbCBTZXJ2aWNlcyBSb290MB4XDTIzMDQxNDAwMDAwMFoXDTI4MDQxMjAwMDAwMFowMjEVMBMGA1UEChMMR2l0SHViLCBJbmMuMRkwFwYDVQQDExBUU0EgaW50ZXJtZWRpYXRlMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEvMLY/dTVbvIJYANAuszEwJnQE1llftynyMKIMhh48HmqbVr5ygybzsLRLVKbBWOdZ21aeJz+gZiytZetqcyF9WlER5NEMf6JV7ZNojQpxHq4RHGoGSceQv/qvTiZxEDKo2YwZDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUaW1RudOgVt0leqY0WKYbuPr47wAwHwYDVR0jBBgwFoAU9NYYlobnAG4c0/qjxyH/lq/wz+QwCgYIKoZIzj0EAwMDaQAwZgIxAK1B185ygCrIYFlIs3GjswjnwSMG6LY8woLVdakKDZxVa8f8cqMs1DhcxJ0+09w95QIxAO+tBzZk7vjUJ9iJgD4R6ZWTxQWKqNm74jO99o+o9sv4FI/SZTZTFyMn0IJEHdNmyA=="
          },
          {
            "rawBytes": "MIIB9DCCAXqgAwIBAgIUa/JAkdUjK4JUwsqtaiRJGWhqLSowCgYIKoZIzj0EAwMwODEVMBMGA1UEChMMR2l0SHViLCBJbmMuMR8wHQYDVQQDExZJbnRlcm5hbCBTZXJ2aWNlcyBSb290MB4XDTIzMDQxNDAwMDAwMFoXDTMzMDQxMTAwMDAwMFowODEVMBMGA1UEChMMR2l0SHViLCBJbmMuMR8wHQYDVQQDExZJbnRlcm5hbCBTZXJ2aWNlcyBSb290MHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEf9jFAXxz4kx68AHRMOkFBhflDcMTvzaXz4x/FCcXjJ/1qEKon/qPIGnaURskDtyNbNDOpeJTDDFqt48iMPrnzpx6IZwqemfUJN4xBEZfza+pYt/iyod+9tZr20RRWSv/o0UwQzAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBAjAdBgNVHQ4EFgQU9NYYlobnAG4c0/qjxyH/lq/wz+QwCgYIKoZIzj0EAwMDaAAwZQIxALZLZ8BgRXzKxLMMN9VIlO+e4hrBnNBgF7tz7Hnrowv2NetZErIACKFymBlvWDvtMAIwZO+ki6ssQ1bsZo98O8mEAf2NZ7iiCgDDU0Vwjeco6zyeh0zBTs9/7gV6AHNQ53xD"
          }
        ]
      },
      "validFor": {
        "start": "2023-04-14T00:00:00.000Z"
      }
    }
  ]
}
","registry.npmjs.org%2Fkeys.json":"ewogICAgImtleXMiOiBbCiAgICAgICAgewogICAgICAgICAgICAia2V5SWQiOiAiU0hBMjU2OmpsM2J3c3d1ODBQampva0NnaDBvMnc1YzJVNExoUUFFNTdnajljejFrekEiLAogICAgICAgICAgICAia2V5VXNhZ2UiOiAibnBtOnNpZ25hdHVyZXMiLAogICAgICAgICAgICAicHVibGljS2V5IjogewogICAgICAgICAgICAgICAgInJhd0J5dGVzIjogIk1Ga3dFd1lIS29aSXpqMENBUVlJS29aSXpqMERBUWNEUWdBRTFPbGIzek1BRkZ4WEtIaUlrUU81Y0ozWWhsNWk2VVBwK0lodXRlQkpidUhjQTVVb2dLbzBFV3RsV3dXNktTYUtvVE5FWUw3SmxDUWlWbmtoQmt0VWdnPT0iLAogICAgICAgICAgICAgICAgImtleURldGFpbHMiOiAiUEtJWF9FQ0RTQV9QMjU2X1NIQV8yNTYiLAogICAgICAgICAgICAgICAgInZhbGlkRm9yIjogewogICAgICAgICAgICAgICAgICAgICJzdGFydCI6ICIxOTk5LTAxLTAxVDAwOjAwOjAwLjAwMFoiCiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICAgImtleUlkIjogIlNIQTI1NjpqbDNid3N3dTgwUGpqb2tDZ2gwbzJ3NWMyVTRMaFFBRTU3Z2o5Y3oxa3pBIiwKICAgICAgICAgICAgImtleVVzYWdlIjogIm5wbTphdHRlc3RhdGlvbnMiLAogICAgICAgICAgICAicHVibGljS2V5IjogewogICAgICAgICAgICAgICAgInJhd0J5dGVzIjogIk1Ga3dFd1lIS29aSXpqMENBUVlJS29aSXpqMERBUWNEUWdBRTFPbGIzek1BRkZ4WEtIaUlrUU81Y0ozWWhsNWk2VVBwK0lodXRlQkpidUhjQTVVb2dLbzBFV3RsV3dXNktTYUtvVE5FWUw3SmxDUWlWbmtoQmt0VWdnPT0iLAogICAgICAgICAgICAgICAgImtleURldGFpbHMiOiAiUEtJWF9FQ0RTQV9QMjU2X1NIQV8yNTYiLAogICAgICAgICAgICAgICAgInZhbGlkRm9yIjogewogICAgICAgICAgICAgICAgICAgICJzdGFydCI6ICIyMDIyLTEyLTAxVDAwOjAwOjAwLjAwMFoiCiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9CiAgICBdCn0K"}}} diff --git a/deps/npm/node_modules/ci-info/index.js b/deps/npm/node_modules/ci-info/index.js index 47907264581eb1..9eba6940c4147e 100644 --- a/deps/npm/node_modules/ci-info/index.js +++ b/deps/npm/node_modules/ci-info/index.js @@ -13,6 +13,7 @@ Object.defineProperty(exports, '_vendors', { exports.name = null exports.isPR = null +exports.id = null vendors.forEach(function (vendor) { const envs = Array.isArray(vendor.env) ? vendor.env : [vendor.env] @@ -27,45 +28,23 @@ vendors.forEach(function (vendor) { } exports.name = vendor.name - - switch (typeof vendor.pr) { - case 'string': - // "pr": "CIRRUS_PR" - exports.isPR = !!env[vendor.pr] - break - case 'object': - if ('env' in vendor.pr) { - // "pr": { "env": "BUILDKITE_PULL_REQUEST", "ne": "false" } - exports.isPR = vendor.pr.env in env && env[vendor.pr.env] !== vendor.pr.ne - } else if ('any' in vendor.pr) { - // "pr": { "any": ["ghprbPullId", "CHANGE_ID"] } - exports.isPR = vendor.pr.any.some(function (key) { - return !!env[key] - }) - } else { - // "pr": { "DRONE_BUILD_EVENT": "pull_request" } - exports.isPR = checkEnv(vendor.pr) - } - break - default: - // PR detection not supported for this vendor - exports.isPR = null - } + exports.isPR = checkPR(vendor) + exports.id = vendor.constant }) exports.isCI = !!( env.CI !== 'false' && // Bypass all checks if CI env is explicitly set to 'false' (env.BUILD_ID || // Jenkins, Cloudbees - env.BUILD_NUMBER || // Jenkins, TeamCity - env.CI || // Travis CI, CircleCI, Cirrus CI, Gitlab CI, Appveyor, CodeShip, dsari - env.CI_APP_ID || // Appflow - env.CI_BUILD_ID || // Appflow - env.CI_BUILD_NUMBER || // Appflow - env.CI_NAME || // Codeship and others - env.CONTINUOUS_INTEGRATION || // Travis CI, Cirrus CI - env.RUN_ID || // TaskCluster, dsari - exports.name || - false) + env.BUILD_NUMBER || // Jenkins, TeamCity + env.CI || // Travis CI, CircleCI, Cirrus CI, Gitlab CI, Appveyor, CodeShip, dsari + env.CI_APP_ID || // Appflow + env.CI_BUILD_ID || // Appflow + env.CI_BUILD_NUMBER || // Appflow + env.CI_NAME || // Codeship and others + env.CONTINUOUS_INTEGRATION || // Travis CI, Cirrus CI + env.RUN_ID || // TaskCluster, dsari + exports.name || + false) ) function checkEnv (obj) { @@ -79,12 +58,45 @@ function checkEnv (obj) { return env[obj.env] && env[obj.env].includes(obj.includes) // } } + if ('any' in obj) { return obj.any.some(function (k) { return !!env[k] }) } + return Object.keys(obj).every(function (k) { return env[k] === obj[k] }) } + +function checkPR (vendor) { + switch (typeof vendor.pr) { + case 'string': + // "pr": "CIRRUS_PR" + return !!env[vendor.pr] + case 'object': + if ('env' in vendor.pr) { + if ('any' in vendor.pr) { + // "pr": { "env": "CODEBUILD_WEBHOOK_EVENT", "any": ["PULL_REQUEST_CREATED", "PULL_REQUEST_UPDATED"] } + return vendor.pr.any.some(function (key) { + return env[vendor.pr.env] === key + }) + } else { + // "pr": { "env": "BUILDKITE_PULL_REQUEST", "ne": "false" } + return vendor.pr.env in env && env[vendor.pr.env] !== vendor.pr.ne + } + } else if ('any' in vendor.pr) { + // "pr": { "any": ["ghprbPullId", "CHANGE_ID"] } + return vendor.pr.any.some(function (key) { + return !!env[key] + }) + } else { + // "pr": { "DRONE_BUILD_EVENT": "pull_request" } + return checkEnv(vendor.pr) + } + default: + // PR detection not supported for this vendor + return null + } +} diff --git a/deps/npm/node_modules/ci-info/package.json b/deps/npm/node_modules/ci-info/package.json index 3c6b9e4adac8e8..156329d2ce379c 100644 --- a/deps/npm/node_modules/ci-info/package.json +++ b/deps/npm/node_modules/ci-info/package.json @@ -1,6 +1,6 @@ { "name": "ci-info", - "version": "4.0.0", + "version": "4.1.0", "description": "Get details about the current Continuous Integration environment", "main": "index.js", "typings": "index.d.ts", @@ -9,6 +9,18 @@ "repository": "https://github.com/watson/ci-info.git", "bugs": "https://github.com/watson/ci-info/issues", "homepage": "https://github.com/watson/ci-info", + "contributors": [ + { + "name": "Sibiraj", + "url": "https://github.com/sibiraj-s" + } + ], + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/sibiraj-s" + } + ], "keywords": [ "ci", "continuous", @@ -22,22 +34,16 @@ "index.d.ts", "CHANGELOG.md" ], - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/sibiraj-s" - } - ], "scripts": { "lint:fix": "standard --fix", "test": "standard && node test.js", - "prepare": "husky install" + "prepare": "husky install || true" }, "devDependencies": { "clear-module": "^4.1.2", - "husky": "^8.0.3", - "standard": "^17.1.0", - "tape": "^5.7.0" + "husky": "^9.1.6", + "standard": "^17.1.2", + "tape": "^5.9.0" }, "engines": { "node": ">=8" diff --git a/deps/npm/node_modules/ci-info/vendors.json b/deps/npm/node_modules/ci-info/vendors.json index 6b65e3f9b541f8..64d5924d1a557e 100644 --- a/deps/npm/node_modules/ci-info/vendors.json +++ b/deps/npm/node_modules/ci-info/vendors.json @@ -8,7 +8,11 @@ { "name": "Appcircle", "constant": "APPCIRCLE", - "env": "AC_APPCIRCLE" + "env": "AC_APPCIRCLE", + "pr": { + "env": "AC_GIT_PR", + "ne": "false" + } }, { "name": "AppVeyor", @@ -19,7 +23,15 @@ { "name": "AWS CodeBuild", "constant": "CODEBUILD", - "env": "CODEBUILD_BUILD_ARN" + "env": "CODEBUILD_BUILD_ARN", + "pr": { + "env": "CODEBUILD_WEBHOOK_EVENT", + "any": [ + "PULL_REQUEST_CREATED", + "PULL_REQUEST_UPDATED", + "PULL_REQUEST_REOPENED" + ] + } }, { "name": "Azure Pipelines", diff --git a/deps/npm/node_modules/cross-spawn/lib/enoent.js b/deps/npm/node_modules/cross-spawn/lib/enoent.js index 14df9b623d0a20..da33471369c23f 100644 --- a/deps/npm/node_modules/cross-spawn/lib/enoent.js +++ b/deps/npm/node_modules/cross-spawn/lib/enoent.js @@ -24,7 +24,7 @@ function hookChildProcess(cp, parsed) { // the command exists and emit an "error" instead // See https://github.com/IndigoUnited/node-cross-spawn/issues/16 if (name === 'exit') { - const err = verifyENOENT(arg1, parsed, 'spawn'); + const err = verifyENOENT(arg1, parsed); if (err) { return originalEmit.call(cp, 'error', err); diff --git a/deps/npm/node_modules/cross-spawn/lib/util/escape.js b/deps/npm/node_modules/cross-spawn/lib/util/escape.js index b0bb84c3a14092..7bf2905cd035ad 100644 --- a/deps/npm/node_modules/cross-spawn/lib/util/escape.js +++ b/deps/npm/node_modules/cross-spawn/lib/util/escape.js @@ -15,15 +15,17 @@ function escapeArgument(arg, doubleEscapeMetaChars) { arg = `${arg}`; // Algorithm below is based on https://qntm.org/cmd + // It's slightly altered to disable JS backtracking to avoid hanging on specially crafted input + // Please see https://github.com/moxystudio/node-cross-spawn/pull/160 for more information // Sequence of backslashes followed by a double quote: // double up all the backslashes and escape the double quote - arg = arg.replace(/(\\*)"/g, '$1$1\\"'); + arg = arg.replace(/(?=(\\+?)?)\1"/g, '$1$1\\"'); // Sequence of backslashes followed by the end of the string // (which will become a double quote later): // double up all the backslashes - arg = arg.replace(/(\\*)$/, '$1$1'); + arg = arg.replace(/(?=(\\+?)?)\1$/, '$1$1'); // All other backslashes occur literally diff --git a/deps/npm/node_modules/cross-spawn/package.json b/deps/npm/node_modules/cross-spawn/package.json index 232ff97e04b213..24b2eb4c9900cf 100644 --- a/deps/npm/node_modules/cross-spawn/package.json +++ b/deps/npm/node_modules/cross-spawn/package.json @@ -1,6 +1,6 @@ { "name": "cross-spawn", - "version": "7.0.3", + "version": "7.0.6", "description": "Cross platform child_process#spawn and child_process#spawnSync", "keywords": [ "spawn", @@ -65,7 +65,7 @@ "lint-staged": "^9.2.5", "mkdirp": "^0.5.1", "rimraf": "^3.0.0", - "standard-version": "^7.0.0" + "standard-version": "^9.5.0" }, "engines": { "node": ">= 8" diff --git a/deps/npm/node_modules/debug/node_modules/ms/index.js b/deps/npm/node_modules/debug/node_modules/ms/index.js deleted file mode 100644 index c4498bcc212589..00000000000000 --- a/deps/npm/node_modules/debug/node_modules/ms/index.js +++ /dev/null @@ -1,162 +0,0 @@ -/** - * Helpers. - */ - -var s = 1000; -var m = s * 60; -var h = m * 60; -var d = h * 24; -var w = d * 7; -var y = d * 365.25; - -/** - * Parse or format the given `val`. - * - * Options: - * - * - `long` verbose formatting [false] - * - * @param {String|Number} val - * @param {Object} [options] - * @throws {Error} throw an error if val is not a non-empty string or a number - * @return {String|Number} - * @api public - */ - -module.exports = function(val, options) { - options = options || {}; - var type = typeof val; - if (type === 'string' && val.length > 0) { - return parse(val); - } else if (type === 'number' && isFinite(val)) { - return options.long ? fmtLong(val) : fmtShort(val); - } - throw new Error( - 'val is not a non-empty string or a valid number. val=' + - JSON.stringify(val) - ); -}; - -/** - * Parse the given `str` and return milliseconds. - * - * @param {String} str - * @return {Number} - * @api private - */ - -function parse(str) { - str = String(str); - if (str.length > 100) { - return; - } - var match = /^(-?(?:\d+)?\.?\d+) *(milliseconds?|msecs?|ms|seconds?|secs?|s|minutes?|mins?|m|hours?|hrs?|h|days?|d|weeks?|w|years?|yrs?|y)?$/i.exec( - str - ); - if (!match) { - return; - } - var n = parseFloat(match[1]); - var type = (match[2] || 'ms').toLowerCase(); - switch (type) { - case 'years': - case 'year': - case 'yrs': - case 'yr': - case 'y': - return n * y; - case 'weeks': - case 'week': - case 'w': - return n * w; - case 'days': - case 'day': - case 'd': - return n * d; - case 'hours': - case 'hour': - case 'hrs': - case 'hr': - case 'h': - return n * h; - case 'minutes': - case 'minute': - case 'mins': - case 'min': - case 'm': - return n * m; - case 'seconds': - case 'second': - case 'secs': - case 'sec': - case 's': - return n * s; - case 'milliseconds': - case 'millisecond': - case 'msecs': - case 'msec': - case 'ms': - return n; - default: - return undefined; - } -} - -/** - * Short format for `ms`. - * - * @param {Number} ms - * @return {String} - * @api private - */ - -function fmtShort(ms) { - var msAbs = Math.abs(ms); - if (msAbs >= d) { - return Math.round(ms / d) + 'd'; - } - if (msAbs >= h) { - return Math.round(ms / h) + 'h'; - } - if (msAbs >= m) { - return Math.round(ms / m) + 'm'; - } - if (msAbs >= s) { - return Math.round(ms / s) + 's'; - } - return ms + 'ms'; -} - -/** - * Long format for `ms`. - * - * @param {Number} ms - * @return {String} - * @api private - */ - -function fmtLong(ms) { - var msAbs = Math.abs(ms); - if (msAbs >= d) { - return plural(ms, msAbs, d, 'day'); - } - if (msAbs >= h) { - return plural(ms, msAbs, h, 'hour'); - } - if (msAbs >= m) { - return plural(ms, msAbs, m, 'minute'); - } - if (msAbs >= s) { - return plural(ms, msAbs, s, 'second'); - } - return ms + ' ms'; -} - -/** - * Pluralization helper. - */ - -function plural(ms, msAbs, n, name) { - var isPlural = msAbs >= n * 1.5; - return Math.round(ms / n) + ' ' + name + (isPlural ? 's' : ''); -} diff --git a/deps/npm/node_modules/debug/node_modules/ms/license.md b/deps/npm/node_modules/debug/node_modules/ms/license.md deleted file mode 100644 index 69b61253a38926..00000000000000 --- a/deps/npm/node_modules/debug/node_modules/ms/license.md +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2016 Zeit, Inc. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/deps/npm/node_modules/debug/node_modules/ms/package.json b/deps/npm/node_modules/debug/node_modules/ms/package.json deleted file mode 100644 index eea666e1fb03d6..00000000000000 --- a/deps/npm/node_modules/debug/node_modules/ms/package.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "name": "ms", - "version": "2.1.2", - "description": "Tiny millisecond conversion utility", - "repository": "zeit/ms", - "main": "./index", - "files": [ - "index.js" - ], - "scripts": { - "precommit": "lint-staged", - "lint": "eslint lib/* bin/*", - "test": "mocha tests.js" - }, - "eslintConfig": { - "extends": "eslint:recommended", - "env": { - "node": true, - "es6": true - } - }, - "lint-staged": { - "*.js": [ - "npm run lint", - "prettier --single-quote --write", - "git add" - ] - }, - "license": "MIT", - "devDependencies": { - "eslint": "4.12.1", - "expect.js": "0.3.1", - "husky": "0.14.3", - "lint-staged": "5.0.0", - "mocha": "4.0.1" - } -} diff --git a/deps/npm/node_modules/debug/package.json b/deps/npm/node_modules/debug/package.json index 8eea05520554eb..2f782eb9aef450 100644 --- a/deps/npm/node_modules/debug/package.json +++ b/deps/npm/node_modules/debug/package.json @@ -1,6 +1,6 @@ { "name": "debug", - "version": "4.3.6", + "version": "4.3.7", "repository": { "type": "git", "url": "git://github.com/debug-js/debug.git" @@ -31,7 +31,7 @@ "test:coverage": "cat ./coverage/lcov.info | coveralls" }, "dependencies": { - "ms": "2.1.2" + "ms": "^2.1.3" }, "devDependencies": { "brfs": "^2.0.1", diff --git a/deps/npm/node_modules/hosted-git-info/lib/hosts.js b/deps/npm/node_modules/hosted-git-info/lib/hosts.js index 9a08efd1b2d7e9..2a88e95927772a 100644 --- a/deps/npm/node_modules/hosted-git-info/lib/hosts.js +++ b/deps/npm/node_modules/hosted-git-info/lib/hosts.js @@ -4,7 +4,11 @@ const maybeJoin = (...args) => args.every(arg => arg) ? args.join('') : '' const maybeEncode = (arg) => arg ? encodeURIComponent(arg) : '' -const formatHashFragment = (f) => f.toLowerCase().replace(/^\W+|\/|\W+$/g, '').replace(/\W+/g, '-') +const formatHashFragment = (f) => f.toLowerCase() + .replace(/^\W+/g, '') // strip leading non-characters + .replace(/(? diff --git a/deps/npm/node_modules/hosted-git-info/package.json b/deps/npm/node_modules/hosted-git-info/package.json index 3bb8bcd1f49825..78356159af7723 100644 --- a/deps/npm/node_modules/hosted-git-info/package.json +++ b/deps/npm/node_modules/hosted-git-info/package.json @@ -1,6 +1,6 @@ { "name": "hosted-git-info", - "version": "8.0.0", + "version": "8.0.2", "description": "Provides metadata and conversions from repository urls for GitHub, Bitbucket and GitLab", "main": "./lib/index.js", "repository": { @@ -35,7 +35,7 @@ }, "devDependencies": { "@npmcli/eslint-config": "^5.0.0", - "@npmcli/template-oss": "4.23.3", + "@npmcli/template-oss": "4.23.4", "tap": "^16.0.1" }, "files": [ @@ -55,7 +55,7 @@ }, "templateOSS": { "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.23.3", + "version": "4.23.4", "publish": "true" } } diff --git a/deps/npm/node_modules/init-package-json/lib/default-input.js b/deps/npm/node_modules/init-package-json/lib/default-input.js index 490e83c139887f..0a01bfa05fa45c 100644 --- a/deps/npm/node_modules/init-package-json/lib/default-input.js +++ b/deps/npm/node_modules/init-package-json/lib/default-input.js @@ -199,16 +199,17 @@ if (!package.scripts) { if (!package.repository) { exports.repository = async () => { - const gconf = await fs.readFile('.git/config', 'utf8').catch(() => '') + const gitConfigPath = path.resolve(dirname, '.git', 'config') + const gconf = await fs.readFile(gitConfigPath, 'utf8').catch(() => '') const lines = gconf.split(/\r?\n/) let url const i = lines.indexOf('[remote "origin"]') if (i !== -1) { - url = gconf[i + 1] + url = lines[i + 1] if (!url.match(/^\s*url =/)) { - url = gconf[i + 2] + url = lines[i + 2] } if (!url.match(/^\s*url =/)) { url = null diff --git a/deps/npm/node_modules/init-package-json/package.json b/deps/npm/node_modules/init-package-json/package.json index 21021ab701244b..d1de96476a9f14 100644 --- a/deps/npm/node_modules/init-package-json/package.json +++ b/deps/npm/node_modules/init-package-json/package.json @@ -1,6 +1,6 @@ { "name": "init-package-json", - "version": "7.0.1", + "version": "7.0.2", "main": "lib/init-package-json.js", "scripts": { "test": "tap", diff --git a/deps/npm/node_modules/is-lambda/index.js b/deps/npm/node_modules/is-lambda/index.js deleted file mode 100644 index b245ab1c68df1a..00000000000000 --- a/deps/npm/node_modules/is-lambda/index.js +++ /dev/null @@ -1,6 +0,0 @@ -'use strict' - -module.exports = !!( - (process.env.LAMBDA_TASK_ROOT && process.env.AWS_EXECUTION_ENV) || - false -) diff --git a/deps/npm/node_modules/is-lambda/package.json b/deps/npm/node_modules/is-lambda/package.json deleted file mode 100644 index d8550898b4e4d9..00000000000000 --- a/deps/npm/node_modules/is-lambda/package.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "name": "is-lambda", - "version": "1.0.1", - "description": "Detect if your code is running on an AWS Lambda server", - "main": "index.js", - "dependencies": {}, - "devDependencies": { - "clear-require": "^1.0.1", - "standard": "^10.0.2" - }, - "scripts": { - "test": "standard && node test.js" - }, - "repository": { - "type": "git", - "url": "https://github.com/watson/is-lambda.git" - }, - "keywords": [ - "aws", - "hosting", - "hosted", - "lambda", - "detect" - ], - "author": "Thomas Watson Steen (https://twitter.com/wa7son)", - "license": "MIT", - "bugs": { - "url": "https://github.com/watson/is-lambda/issues" - }, - "homepage": "https://github.com/watson/is-lambda", - "coordinates": [ - 37.3859955, - -122.0838831 - ] -} diff --git a/deps/npm/node_modules/is-lambda/test.js b/deps/npm/node_modules/is-lambda/test.js deleted file mode 100644 index e8e73257ad4ad7..00000000000000 --- a/deps/npm/node_modules/is-lambda/test.js +++ /dev/null @@ -1,16 +0,0 @@ -'use strict' - -var assert = require('assert') -var clearRequire = require('clear-require') - -process.env.AWS_EXECUTION_ENV = 'AWS_Lambda_nodejs6.10' -process.env.LAMBDA_TASK_ROOT = '/var/task' - -var isCI = require('./') -assert(isCI) - -delete process.env.AWS_EXECUTION_ENV - -clearRequire('./') -isCI = require('./') -assert(!isCI) diff --git a/deps/npm/node_modules/libnpmpublish/lib/publish.js b/deps/npm/node_modules/libnpmpublish/lib/publish.js index 93d546efb5f0e1..2bcab4f3ba304e 100644 --- a/deps/npm/node_modules/libnpmpublish/lib/publish.js +++ b/deps/npm/node_modules/libnpmpublish/lib/publish.js @@ -137,7 +137,7 @@ const buildMetadata = async (registry, manifest, tarballData, spec, opts) => { if (provenance === true) { await ensureProvenanceGeneration(registry, spec, opts) - provenanceBundle = await generateProvenance([subject], opts) + provenanceBundle = await generateProvenance([subject], { legacyCompatibility: true, ...opts }) /* eslint-disable-next-line max-len */ log.notice('publish', `Signed provenance statement with source and build information from ${ciInfo.name}`) diff --git a/deps/npm/node_modules/libnpmpublish/package.json b/deps/npm/node_modules/libnpmpublish/package.json index f63d50f4e7b9c1..594f5041480b4a 100644 --- a/deps/npm/node_modules/libnpmpublish/package.json +++ b/deps/npm/node_modules/libnpmpublish/package.json @@ -1,6 +1,6 @@ { "name": "libnpmpublish", - "version": "10.0.0", + "version": "10.0.1", "description": "Programmatic API for the bits behind npm publish and unpublish", "author": "GitHub Inc.", "main": "lib/index.js", @@ -45,7 +45,7 @@ "npm-registry-fetch": "^18.0.1", "proc-log": "^5.0.0", "semver": "^7.3.7", - "sigstore": "^2.2.0", + "sigstore": "^3.0.0", "ssri": "^12.0.0" }, "engines": { diff --git a/deps/npm/node_modules/make-fetch-happen/lib/options.js b/deps/npm/node_modules/make-fetch-happen/lib/options.js index f77511279f831d..db51cc63248176 100644 --- a/deps/npm/node_modules/make-fetch-happen/lib/options.js +++ b/deps/npm/node_modules/make-fetch-happen/lib/options.js @@ -11,7 +11,12 @@ const conditionalHeaders = [ const configureOptions = (opts) => { const { strictSSL, ...options } = { ...opts } options.method = options.method ? options.method.toUpperCase() : 'GET' - options.rejectUnauthorized = strictSSL !== false + + if (strictSSL === undefined || strictSSL === null) { + options.rejectUnauthorized = process.env.NODE_TLS_REJECT_UNAUTHORIZED !== '0' + } else { + options.rejectUnauthorized = strictSSL !== false + } if (!options.retry) { options.retry = { retries: 0 } diff --git a/deps/npm/node_modules/make-fetch-happen/lib/remote.js b/deps/npm/node_modules/make-fetch-happen/lib/remote.js index 8554564074de6e..1d640e5380baaf 100644 --- a/deps/npm/node_modules/make-fetch-happen/lib/remote.js +++ b/deps/npm/node_modules/make-fetch-happen/lib/remote.js @@ -35,7 +35,8 @@ const RETRY_TYPES = [ // following redirects (through the cache if necessary) // and verifying response integrity const remoteFetch = (request, options) => { - const agent = getAgent(request.url, options) + // options.signal is intended for the fetch itself, not the agent. Attaching it to the agent will re-use that signal across multiple requests, which prevents any connections beyond the first one. + const agent = getAgent(request.url, { ...options, signal: undefined }) if (!request.headers.has('connection')) { request.headers.set('connection', agent ? 'keep-alive' : 'close') } diff --git a/deps/npm/node_modules/negotiator/HISTORY.md b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/HISTORY.md similarity index 95% rename from deps/npm/node_modules/negotiator/HISTORY.md rename to deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/HISTORY.md index a9a544914c43bb..63d537d3f68114 100644 --- a/deps/npm/node_modules/negotiator/HISTORY.md +++ b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/HISTORY.md @@ -1,3 +1,9 @@ +1.0.0 / 2024-08-31 +================== + + * Drop support for node <18 + * Added an option preferred encodings array #59 + 0.6.3 / 2022-01-22 ================== diff --git a/deps/npm/node_modules/negotiator/LICENSE b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/LICENSE similarity index 100% rename from deps/npm/node_modules/negotiator/LICENSE rename to deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/LICENSE diff --git a/deps/npm/node_modules/negotiator/index.js b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/index.js similarity index 90% rename from deps/npm/node_modules/negotiator/index.js rename to deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/index.js index 4788264b16c9f2..4f51315d6af4bd 100644 --- a/deps/npm/node_modules/negotiator/index.js +++ b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/index.js @@ -44,13 +44,14 @@ Negotiator.prototype.charsets = function charsets(available) { return preferredCharsets(this.request.headers['accept-charset'], available); }; -Negotiator.prototype.encoding = function encoding(available) { - var set = this.encodings(available); +Negotiator.prototype.encoding = function encoding(available, opts) { + var set = this.encodings(available, opts); return set && set[0]; }; -Negotiator.prototype.encodings = function encodings(available) { - return preferredEncodings(this.request.headers['accept-encoding'], available); +Negotiator.prototype.encodings = function encodings(available, options) { + var opts = options || {}; + return preferredEncodings(this.request.headers['accept-encoding'], available, opts.preferred); }; Negotiator.prototype.language = function language(available) { diff --git a/deps/npm/node_modules/negotiator/lib/charset.js b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/charset.js similarity index 100% rename from deps/npm/node_modules/negotiator/lib/charset.js rename to deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/charset.js diff --git a/deps/npm/node_modules/negotiator/lib/encoding.js b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/encoding.js similarity index 79% rename from deps/npm/node_modules/negotiator/lib/encoding.js rename to deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/encoding.js index 8432cd77b8a969..9ebb633d677433 100644 --- a/deps/npm/node_modules/negotiator/lib/encoding.js +++ b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/encoding.js @@ -96,7 +96,7 @@ function parseEncoding(str, i) { */ function getEncodingPriority(encoding, accepted, index) { - var priority = {o: -1, q: 0, s: 0}; + var priority = {encoding: encoding, o: -1, q: 0, s: 0}; for (var i = 0; i < accepted.length; i++) { var spec = specify(encoding, accepted[i], index); @@ -123,6 +123,7 @@ function specify(encoding, spec, index) { } return { + encoding: encoding, i: index, o: spec.i, q: spec.q, @@ -135,14 +136,34 @@ function specify(encoding, spec, index) { * @public */ -function preferredEncodings(accept, provided) { +function preferredEncodings(accept, provided, preferred) { var accepts = parseAcceptEncoding(accept || ''); + var comparator = preferred ? function comparator (a, b) { + if (a.q !== b.q) { + return b.q - a.q // higher quality first + } + + var aPreferred = preferred.indexOf(a.encoding) + var bPreferred = preferred.indexOf(b.encoding) + + if (aPreferred === -1 && bPreferred === -1) { + // consider the original specifity/order + return (b.s - a.s) || (a.o - b.o) || (a.i - b.i) + } + + if (aPreferred !== -1 && bPreferred !== -1) { + return aPreferred - bPreferred // consider the preferred order + } + + return aPreferred === -1 ? 1 : -1 // preferred first + } : compareSpecs; + if (!provided) { // sorted list of all encodings return accepts .filter(isQuality) - .sort(compareSpecs) + .sort(comparator) .map(getFullEncoding); } @@ -151,7 +172,7 @@ function preferredEncodings(accept, provided) { }); // sorted list of accepted encodings - return priorities.filter(isQuality).sort(compareSpecs).map(function getEncoding(priority) { + return priorities.filter(isQuality).sort(comparator).map(function getEncoding(priority) { return provided[priorities.indexOf(priority)]; }); } @@ -162,7 +183,7 @@ function preferredEncodings(accept, provided) { */ function compareSpecs(a, b) { - return (b.q - a.q) || (b.s - a.s) || (a.o - b.o) || (a.i - b.i) || 0; + return (b.q - a.q) || (b.s - a.s) || (a.o - b.o) || (a.i - b.i); } /** diff --git a/deps/npm/node_modules/negotiator/lib/language.js b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/language.js similarity index 100% rename from deps/npm/node_modules/negotiator/lib/language.js rename to deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/language.js diff --git a/deps/npm/node_modules/negotiator/lib/mediaType.js b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/mediaType.js similarity index 98% rename from deps/npm/node_modules/negotiator/lib/mediaType.js rename to deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/mediaType.js index 67309dd75f1b62..8e402ea88394c0 100644 --- a/deps/npm/node_modules/negotiator/lib/mediaType.js +++ b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/lib/mediaType.js @@ -69,7 +69,7 @@ function parseMediaType(str, i) { // get the value, unwrapping quotes var value = val && val[0] === '"' && val[val.length - 1] === '"' - ? val.substr(1, val.length - 2) + ? val.slice(1, -1) : val; if (key === 'q') { @@ -238,8 +238,8 @@ function splitKeyValuePair(str) { if (index === -1) { key = str; } else { - key = str.substr(0, index); - val = str.substr(index + 1); + key = str.slice(0, index); + val = str.slice(index + 1); } return [key, val]; diff --git a/deps/npm/node_modules/negotiator/package.json b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/package.json similarity index 89% rename from deps/npm/node_modules/negotiator/package.json rename to deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/package.json index 297635f6d34177..e4bdc1ef4f7481 100644 --- a/deps/npm/node_modules/negotiator/package.json +++ b/deps/npm/node_modules/make-fetch-happen/node_modules/negotiator/package.json @@ -1,7 +1,7 @@ { "name": "negotiator", "description": "HTTP content negotiation", - "version": "0.6.3", + "version": "1.0.0", "contributors": [ "Douglas Christopher Wilson ", "Federico Romero ", @@ -36,6 +36,7 @@ "scripts": { "lint": "eslint .", "test": "mocha --reporter spec --check-leaks --bail test/", + "test:debug": "mocha --reporter spec --check-leaks --inspect --inspect-brk test/", "test-ci": "nyc --reporter=lcov --reporter=text npm test", "test-cov": "nyc --reporter=html --reporter=text npm test" } diff --git a/deps/npm/node_modules/make-fetch-happen/package.json b/deps/npm/node_modules/make-fetch-happen/package.json index 0868ff6d7efa54..054fe841f13b73 100644 --- a/deps/npm/node_modules/make-fetch-happen/package.json +++ b/deps/npm/node_modules/make-fetch-happen/package.json @@ -1,6 +1,6 @@ { "name": "make-fetch-happen", - "version": "14.0.1", + "version": "14.0.3", "description": "Opinionated, caching, retrying fetch client", "main": "lib/index.js", "files": [ @@ -40,14 +40,14 @@ "minipass-fetch": "^4.0.0", "minipass-flush": "^1.0.5", "minipass-pipeline": "^1.2.4", - "negotiator": "^0.6.3", + "negotiator": "^1.0.0", "proc-log": "^5.0.0", "promise-retry": "^2.0.1", "ssri": "^12.0.0" }, "devDependencies": { "@npmcli/eslint-config": "^5.0.0", - "@npmcli/template-oss": "4.23.3", + "@npmcli/template-oss": "4.23.4", "nock": "^13.2.4", "safe-buffer": "^5.2.1", "standard-version": "^9.3.2", @@ -68,7 +68,7 @@ }, "templateOSS": { "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.23.3", + "version": "4.23.4", "publish": "true" } } diff --git a/deps/npm/node_modules/node-gyp/.release-please-manifest.json b/deps/npm/node_modules/node-gyp/.release-please-manifest.json index 01db3293b948bd..26a3463a2e0bb3 100644 --- a/deps/npm/node_modules/node-gyp/.release-please-manifest.json +++ b/deps/npm/node_modules/node-gyp/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "10.2.0" + ".": "11.0.0" } diff --git a/deps/npm/node_modules/node-gyp/CHANGELOG.md b/deps/npm/node_modules/node-gyp/CHANGELOG.md index 1d4e4185320bd6..8374a920b7caaa 100644 --- a/deps/npm/node_modules/node-gyp/CHANGELOG.md +++ b/deps/npm/node_modules/node-gyp/CHANGELOG.md @@ -1,5 +1,46 @@ # Changelog +## [11.0.0](https://github.com/nodejs/node-gyp/compare/v10.3.1...v11.0.0) (2024-12-03) + + +### ⚠ BREAKING CHANGES + +* drop node 16 support ([#3102](https://github.com/nodejs/node-gyp/issues/3102)) + +### Features + +* drop node 16 support ([#3102](https://github.com/nodejs/node-gyp/issues/3102)) ([0e6b6f8](https://github.com/nodejs/node-gyp/commit/0e6b6f8bea615cf031d76ecff9102a38e5474c72)) + + +### Miscellaneous + +* migrate from standard to neostandard ([#3103](https://github.com/nodejs/node-gyp/issues/3103)) ([a130178](https://github.com/nodejs/node-gyp/commit/a13017807d0ae7da8fa076b0bcf23153af7c60a6)) + +## [10.3.1](https://github.com/nodejs/node-gyp/compare/v10.3.0...v10.3.1) (2024-12-02) + + +### Miscellaneous + +* fix npm-publish dependencies and add provenance ([#3099](https://github.com/nodejs/node-gyp/issues/3099)) ([6dded88](https://github.com/nodejs/node-gyp/commit/6dded88065872a32f44114e60731ba4b701ec057)) + +## [10.3.0](https://github.com/nodejs/node-gyp/compare/v10.2.0...v10.3.0) (2024-11-29) + + +### Features + +* prohibit compiling with ClangCL on Windows ([#3098](https://github.com/nodejs/node-gyp/issues/3098)) ([88260bf](https://github.com/nodejs/node-gyp/commit/88260bf86aeb4c39959b78104a5edc3dc88d3aef)) + + +### Bug Fixes + +* **ci:** use correct release-please-action domain after organization url was changed ([#3032](https://github.com/nodejs/node-gyp/issues/3032)) ([d1ed3d4](https://github.com/nodejs/node-gyp/commit/d1ed3d4dc3a53b8ccab4093d002e43945bbece0e)) + + +### Miscellaneous + +* add links to Code of Conduct from root file ([#2196](https://github.com/nodejs/node-gyp/issues/2196)) ([d22e2eb](https://github.com/nodejs/node-gyp/commit/d22e2eb080807c6290533a67249c343a7605a989)) +* publish to npm with release-please ([#3051](https://github.com/nodejs/node-gyp/issues/3051)) ([8319847](https://github.com/nodejs/node-gyp/commit/831984736393a3ea8417efec5255f95d53a70785)) + ## [10.2.0](https://github.com/nodejs/node-gyp/compare/v10.1.0...v10.2.0) (2024-07-09) diff --git a/deps/npm/node_modules/node-gyp/CODE_OF_CONDUCT.md b/deps/npm/node_modules/node-gyp/CODE_OF_CONDUCT.md new file mode 100644 index 00000000000000..4c211405596cb4 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/CODE_OF_CONDUCT.md @@ -0,0 +1,4 @@ +# Code of Conduct + +* [Node.js Code of Conduct](https://github.com/nodejs/admin/blob/master/CODE_OF_CONDUCT.md) +* [Node.js Moderation Policy](https://github.com/nodejs/admin/blob/master/Moderation-Policy.md) diff --git a/deps/npm/node_modules/node-gyp/eslint.config.js b/deps/npm/node_modules/node-gyp/eslint.config.js new file mode 100644 index 00000000000000..5212dc93d5304d --- /dev/null +++ b/deps/npm/node_modules/node-gyp/eslint.config.js @@ -0,0 +1,3 @@ +'use strict' + +module.exports = require('neostandard')({}) diff --git a/deps/npm/node_modules/node-gyp/lib/create-config-gypi.js b/deps/npm/node_modules/node-gyp/lib/create-config-gypi.js index d598dea6e2e7fa..01a820e9f2f312 100644 --- a/deps/npm/node_modules/node-gyp/lib/create-config-gypi.js +++ b/deps/npm/node_modules/node-gyp/lib/create-config-gypi.js @@ -96,6 +96,9 @@ async function getCurrentConfigGypi ({ gyp, nodeDir, vsInfo, python }) { } } variables.msbuild_path = vsInfo.msBuild + if (config.variables.clang === 1) { + config.variables.clang = 0 + } } // loop through the rest of the opts and add the unknown ones as variables. diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/agents.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/agents.js deleted file mode 100644 index c541b93001517e..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/agents.js +++ /dev/null @@ -1,206 +0,0 @@ -'use strict' - -const net = require('net') -const tls = require('tls') -const { once } = require('events') -const timers = require('timers/promises') -const { normalizeOptions, cacheOptions } = require('./options') -const { getProxy, getProxyAgent, proxyCache } = require('./proxy.js') -const Errors = require('./errors.js') -const { Agent: AgentBase } = require('agent-base') - -module.exports = class Agent extends AgentBase { - #options - #timeouts - #proxy - #noProxy - #ProxyAgent - - constructor (options = {}) { - const { timeouts, proxy, noProxy, ...normalizedOptions } = normalizeOptions(options) - - super(normalizedOptions) - - this.#options = normalizedOptions - this.#timeouts = timeouts - - if (proxy) { - this.#proxy = new URL(proxy) - this.#noProxy = noProxy - this.#ProxyAgent = getProxyAgent(proxy) - } - } - - get proxy () { - return this.#proxy ? { url: this.#proxy } : {} - } - - #getProxy (options) { - if (!this.#proxy) { - return - } - - const proxy = getProxy(`${options.protocol}//${options.host}:${options.port}`, { - proxy: this.#proxy, - noProxy: this.#noProxy, - }) - - if (!proxy) { - return - } - - const cacheKey = cacheOptions({ - ...options, - ...this.#options, - timeouts: this.#timeouts, - proxy, - }) - - if (proxyCache.has(cacheKey)) { - return proxyCache.get(cacheKey) - } - - let ProxyAgent = this.#ProxyAgent - if (Array.isArray(ProxyAgent)) { - ProxyAgent = this.isSecureEndpoint(options) ? ProxyAgent[1] : ProxyAgent[0] - } - - const proxyAgent = new ProxyAgent(proxy, { - ...this.#options, - socketOptions: { family: this.#options.family }, - }) - proxyCache.set(cacheKey, proxyAgent) - - return proxyAgent - } - - // takes an array of promises and races them against the connection timeout - // which will throw the necessary error if it is hit. This will return the - // result of the promise race. - async #timeoutConnection ({ promises, options, timeout }, ac = new AbortController()) { - if (timeout) { - const connectionTimeout = timers.setTimeout(timeout, null, { signal: ac.signal }) - .then(() => { - throw new Errors.ConnectionTimeoutError(`${options.host}:${options.port}`) - }).catch((err) => { - if (err.name === 'AbortError') { - return - } - throw err - }) - promises.push(connectionTimeout) - } - - let result - try { - result = await Promise.race(promises) - ac.abort() - } catch (err) { - ac.abort() - throw err - } - return result - } - - async connect (request, options) { - // if the connection does not have its own lookup function - // set, then use the one from our options - options.lookup ??= this.#options.lookup - - let socket - let timeout = this.#timeouts.connection - const isSecureEndpoint = this.isSecureEndpoint(options) - - const proxy = this.#getProxy(options) - if (proxy) { - // some of the proxies will wait for the socket to fully connect before - // returning so we have to await this while also racing it against the - // connection timeout. - const start = Date.now() - socket = await this.#timeoutConnection({ - options, - timeout, - promises: [proxy.connect(request, options)], - }) - // see how much time proxy.connect took and subtract it from - // the timeout - if (timeout) { - timeout = timeout - (Date.now() - start) - } - } else { - socket = (isSecureEndpoint ? tls : net).connect(options) - } - - socket.setKeepAlive(this.keepAlive, this.keepAliveMsecs) - socket.setNoDelay(this.keepAlive) - - const abortController = new AbortController() - const { signal } = abortController - - const connectPromise = socket[isSecureEndpoint ? 'secureConnecting' : 'connecting'] - ? once(socket, isSecureEndpoint ? 'secureConnect' : 'connect', { signal }) - : Promise.resolve() - - await this.#timeoutConnection({ - options, - timeout, - promises: [ - connectPromise, - once(socket, 'error', { signal }).then((err) => { - throw err[0] - }), - ], - }, abortController) - - if (this.#timeouts.idle) { - socket.setTimeout(this.#timeouts.idle, () => { - socket.destroy(new Errors.IdleTimeoutError(`${options.host}:${options.port}`)) - }) - } - - return socket - } - - addRequest (request, options) { - const proxy = this.#getProxy(options) - // it would be better to call proxy.addRequest here but this causes the - // http-proxy-agent to call its super.addRequest which causes the request - // to be added to the agent twice. since we only support 3 agents - // currently (see the required agents in proxy.js) we have manually - // checked that the only public methods we need to call are called in the - // next block. this could change in the future and presumably we would get - // failing tests until we have properly called the necessary methods on - // each of our proxy agents - if (proxy?.setRequestProps) { - proxy.setRequestProps(request, options) - } - - request.setHeader('connection', this.keepAlive ? 'keep-alive' : 'close') - - if (this.#timeouts.response) { - let responseTimeout - request.once('finish', () => { - setTimeout(() => { - request.destroy(new Errors.ResponseTimeoutError(request, this.#proxy)) - }, this.#timeouts.response) - }) - request.once('response', () => { - clearTimeout(responseTimeout) - }) - } - - if (this.#timeouts.transfer) { - let transferTimeout - request.once('response', (res) => { - setTimeout(() => { - res.destroy(new Errors.TransferTimeoutError(request, this.#proxy)) - }, this.#timeouts.transfer) - res.once('close', () => { - clearTimeout(transferTimeout) - }) - }) - } - - return super.addRequest(request, options) - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/dns.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/dns.js deleted file mode 100644 index 3c6946c566d736..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/dns.js +++ /dev/null @@ -1,53 +0,0 @@ -'use strict' - -const { LRUCache } = require('lru-cache') -const dns = require('dns') - -// this is a factory so that each request can have its own opts (i.e. ttl) -// while still sharing the cache across all requests -const cache = new LRUCache({ max: 50 }) - -const getOptions = ({ - family = 0, - hints = dns.ADDRCONFIG, - all = false, - verbatim = undefined, - ttl = 5 * 60 * 1000, - lookup = dns.lookup, -}) => ({ - // hints and lookup are returned since both are top level properties to (net|tls).connect - hints, - lookup: (hostname, ...args) => { - const callback = args.pop() // callback is always last arg - const lookupOptions = args[0] ?? {} - - const options = { - family, - hints, - all, - verbatim, - ...(typeof lookupOptions === 'number' ? { family: lookupOptions } : lookupOptions), - } - - const key = JSON.stringify({ hostname, ...options }) - - if (cache.has(key)) { - const cached = cache.get(key) - return process.nextTick(callback, null, ...cached) - } - - lookup(hostname, options, (err, ...result) => { - if (err) { - return callback(err) - } - - cache.set(key, result, { ttl }) - return callback(null, ...result) - }) - }, -}) - -module.exports = { - cache, - getOptions, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/errors.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/errors.js deleted file mode 100644 index 70475aec8eb357..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/errors.js +++ /dev/null @@ -1,61 +0,0 @@ -'use strict' - -class InvalidProxyProtocolError extends Error { - constructor (url) { - super(`Invalid protocol \`${url.protocol}\` connecting to proxy \`${url.host}\``) - this.code = 'EINVALIDPROXY' - this.proxy = url - } -} - -class ConnectionTimeoutError extends Error { - constructor (host) { - super(`Timeout connecting to host \`${host}\``) - this.code = 'ECONNECTIONTIMEOUT' - this.host = host - } -} - -class IdleTimeoutError extends Error { - constructor (host) { - super(`Idle timeout reached for host \`${host}\``) - this.code = 'EIDLETIMEOUT' - this.host = host - } -} - -class ResponseTimeoutError extends Error { - constructor (request, proxy) { - let msg = 'Response timeout ' - if (proxy) { - msg += `from proxy \`${proxy.host}\` ` - } - msg += `connecting to host \`${request.host}\`` - super(msg) - this.code = 'ERESPONSETIMEOUT' - this.proxy = proxy - this.request = request - } -} - -class TransferTimeoutError extends Error { - constructor (request, proxy) { - let msg = 'Transfer timeout ' - if (proxy) { - msg += `from proxy \`${proxy.host}\` ` - } - msg += `for \`${request.host}\`` - super(msg) - this.code = 'ETRANSFERTIMEOUT' - this.proxy = proxy - this.request = request - } -} - -module.exports = { - InvalidProxyProtocolError, - ConnectionTimeoutError, - IdleTimeoutError, - ResponseTimeoutError, - TransferTimeoutError, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/index.js deleted file mode 100644 index b33d6eaef07a21..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/index.js +++ /dev/null @@ -1,56 +0,0 @@ -'use strict' - -const { LRUCache } = require('lru-cache') -const { normalizeOptions, cacheOptions } = require('./options') -const { getProxy, proxyCache } = require('./proxy.js') -const dns = require('./dns.js') -const Agent = require('./agents.js') - -const agentCache = new LRUCache({ max: 20 }) - -const getAgent = (url, { agent, proxy, noProxy, ...options } = {}) => { - // false has meaning so this can't be a simple truthiness check - if (agent != null) { - return agent - } - - url = new URL(url) - - const proxyForUrl = getProxy(url, { proxy, noProxy }) - const normalizedOptions = { - ...normalizeOptions(options), - proxy: proxyForUrl, - } - - const cacheKey = cacheOptions({ - ...normalizedOptions, - secureEndpoint: url.protocol === 'https:', - }) - - if (agentCache.has(cacheKey)) { - return agentCache.get(cacheKey) - } - - const newAgent = new Agent(normalizedOptions) - agentCache.set(cacheKey, newAgent) - - return newAgent -} - -module.exports = { - getAgent, - Agent, - // these are exported for backwards compatability - HttpAgent: Agent, - HttpsAgent: Agent, - cache: { - proxy: proxyCache, - agent: agentCache, - dns: dns.cache, - clear: () => { - proxyCache.clear() - agentCache.clear() - dns.cache.clear() - }, - }, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/options.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/options.js deleted file mode 100644 index 0bf53f725f0846..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/options.js +++ /dev/null @@ -1,86 +0,0 @@ -'use strict' - -const dns = require('./dns') - -const normalizeOptions = (opts) => { - const family = parseInt(opts.family ?? '0', 10) - const keepAlive = opts.keepAlive ?? true - - const normalized = { - // nodejs http agent options. these are all the defaults - // but kept here to increase the likelihood of cache hits - // https://nodejs.org/api/http.html#new-agentoptions - keepAliveMsecs: keepAlive ? 1000 : undefined, - maxSockets: opts.maxSockets ?? 15, - maxTotalSockets: Infinity, - maxFreeSockets: keepAlive ? 256 : undefined, - scheduling: 'fifo', - // then spread the rest of the options - ...opts, - // we already set these to their defaults that we want - family, - keepAlive, - // our custom timeout options - timeouts: { - // the standard timeout option is mapped to our idle timeout - // and then deleted below - idle: opts.timeout ?? 0, - connection: 0, - response: 0, - transfer: 0, - ...opts.timeouts, - }, - // get the dns options that go at the top level of socket connection - ...dns.getOptions({ family, ...opts.dns }), - } - - // remove timeout since we already used it to set our own idle timeout - delete normalized.timeout - - return normalized -} - -const createKey = (obj) => { - let key = '' - const sorted = Object.entries(obj).sort((a, b) => a[0] - b[0]) - for (let [k, v] of sorted) { - if (v == null) { - v = 'null' - } else if (v instanceof URL) { - v = v.toString() - } else if (typeof v === 'object') { - v = createKey(v) - } - key += `${k}:${v}:` - } - return key -} - -const cacheOptions = ({ secureEndpoint, ...options }) => createKey({ - secureEndpoint: !!secureEndpoint, - // socket connect options - family: options.family, - hints: options.hints, - localAddress: options.localAddress, - // tls specific connect options - strictSsl: secureEndpoint ? !!options.rejectUnauthorized : false, - ca: secureEndpoint ? options.ca : null, - cert: secureEndpoint ? options.cert : null, - key: secureEndpoint ? options.key : null, - // http agent options - keepAlive: options.keepAlive, - keepAliveMsecs: options.keepAliveMsecs, - maxSockets: options.maxSockets, - maxTotalSockets: options.maxTotalSockets, - maxFreeSockets: options.maxFreeSockets, - scheduling: options.scheduling, - // timeout options - timeouts: options.timeouts, - // proxy - proxy: options.proxy, -}) - -module.exports = { - normalizeOptions, - cacheOptions, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/proxy.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/proxy.js deleted file mode 100644 index 6272e929e57bcf..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/lib/proxy.js +++ /dev/null @@ -1,88 +0,0 @@ -'use strict' - -const { HttpProxyAgent } = require('http-proxy-agent') -const { HttpsProxyAgent } = require('https-proxy-agent') -const { SocksProxyAgent } = require('socks-proxy-agent') -const { LRUCache } = require('lru-cache') -const { InvalidProxyProtocolError } = require('./errors.js') - -const PROXY_CACHE = new LRUCache({ max: 20 }) - -const SOCKS_PROTOCOLS = new Set(SocksProxyAgent.protocols) - -const PROXY_ENV_KEYS = new Set(['https_proxy', 'http_proxy', 'proxy', 'no_proxy']) - -const PROXY_ENV = Object.entries(process.env).reduce((acc, [key, value]) => { - key = key.toLowerCase() - if (PROXY_ENV_KEYS.has(key)) { - acc[key] = value - } - return acc -}, {}) - -const getProxyAgent = (url) => { - url = new URL(url) - - const protocol = url.protocol.slice(0, -1) - if (SOCKS_PROTOCOLS.has(protocol)) { - return SocksProxyAgent - } - if (protocol === 'https' || protocol === 'http') { - return [HttpProxyAgent, HttpsProxyAgent] - } - - throw new InvalidProxyProtocolError(url) -} - -const isNoProxy = (url, noProxy) => { - if (typeof noProxy === 'string') { - noProxy = noProxy.split(',').map((p) => p.trim()).filter(Boolean) - } - - if (!noProxy || !noProxy.length) { - return false - } - - const hostSegments = url.hostname.split('.').reverse() - - return noProxy.some((no) => { - const noSegments = no.split('.').filter(Boolean).reverse() - if (!noSegments.length) { - return false - } - - for (let i = 0; i < noSegments.length; i++) { - if (hostSegments[i] !== noSegments[i]) { - return false - } - } - - return true - }) -} - -const getProxy = (url, { proxy, noProxy }) => { - url = new URL(url) - - if (!proxy) { - proxy = url.protocol === 'https:' - ? PROXY_ENV.https_proxy - : PROXY_ENV.https_proxy || PROXY_ENV.http_proxy || PROXY_ENV.proxy - } - - if (!noProxy) { - noProxy = PROXY_ENV.no_proxy - } - - if (!proxy || isNoProxy(url, noProxy)) { - return null - } - - return new URL(proxy) -} - -module.exports = { - getProxyAgent, - getProxy, - proxyCache: PROXY_CACHE, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/package.json b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/package.json deleted file mode 100644 index ef5b4e3228cc46..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/agent/package.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "name": "@npmcli/agent", - "version": "2.2.2", - "description": "the http/https agent used by the npm cli", - "main": "lib/index.js", - "scripts": { - "gencerts": "bash scripts/create-cert.sh", - "test": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "author": "GitHub Inc.", - "license": "ISC", - "bugs": { - "url": "https://github.com/npm/agent/issues" - }, - "homepage": "https://github.com/npm/agent#readme", - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.21.3", - "publish": "true" - }, - "dependencies": { - "agent-base": "^7.1.0", - "http-proxy-agent": "^7.0.0", - "https-proxy-agent": "^7.0.1", - "lru-cache": "^10.0.1", - "socks-proxy-agent": "^8.0.3" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.21.3", - "minipass-fetch": "^3.0.3", - "nock": "^13.2.7", - "semver": "^7.5.4", - "simple-socks": "^3.1.0", - "tap": "^16.3.0" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/agent.git" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/LICENSE.md b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/LICENSE.md deleted file mode 100644 index 5fc208ff122e08..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/LICENSE.md +++ /dev/null @@ -1,20 +0,0 @@ - - -ISC License - -Copyright npm, Inc. - -Permission to use, copy, modify, and/or distribute this -software for any purpose with or without fee is hereby -granted, provided that the above copyright notice and this -permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND NPM DISCLAIMS ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO -EVENT SHALL NPM BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/common/get-options.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/common/get-options.js deleted file mode 100644 index cb5982f79077ac..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/common/get-options.js +++ /dev/null @@ -1,20 +0,0 @@ -// given an input that may or may not be an object, return an object that has -// a copy of every defined property listed in 'copy'. if the input is not an -// object, assign it to the property named by 'wrap' -const getOptions = (input, { copy, wrap }) => { - const result = {} - - if (input && typeof input === 'object') { - for (const prop of copy) { - if (input[prop] !== undefined) { - result[prop] = input[prop] - } - } - } else { - result[wrap] = input - } - - return result -} - -module.exports = getOptions diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/common/node.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/common/node.js deleted file mode 100644 index 4d13bc037359d7..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/common/node.js +++ /dev/null @@ -1,9 +0,0 @@ -const semver = require('semver') - -const satisfies = (range) => { - return semver.satisfies(process.version, range, { includePrerelease: true }) -} - -module.exports = { - satisfies, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/LICENSE deleted file mode 100644 index 93546dfb7655bf..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -(The MIT License) - -Copyright (c) 2011-2017 JP Richardson - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files -(the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, - merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS -OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/errors.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/errors.js deleted file mode 100644 index 1cd1e05d0c533d..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/errors.js +++ /dev/null @@ -1,129 +0,0 @@ -'use strict' -const { inspect } = require('util') - -// adapted from node's internal/errors -// https://github.com/nodejs/node/blob/c8a04049/lib/internal/errors.js - -// close copy of node's internal SystemError class. -class SystemError { - constructor (code, prefix, context) { - // XXX context.code is undefined in all constructors used in cp/polyfill - // that may be a bug copied from node, maybe the constructor should use - // `code` not `errno`? nodejs/node#41104 - let message = `${prefix}: ${context.syscall} returned ` + - `${context.code} (${context.message})` - - if (context.path !== undefined) { - message += ` ${context.path}` - } - if (context.dest !== undefined) { - message += ` => ${context.dest}` - } - - this.code = code - Object.defineProperties(this, { - name: { - value: 'SystemError', - enumerable: false, - writable: true, - configurable: true, - }, - message: { - value: message, - enumerable: false, - writable: true, - configurable: true, - }, - info: { - value: context, - enumerable: true, - configurable: true, - writable: false, - }, - errno: { - get () { - return context.errno - }, - set (value) { - context.errno = value - }, - enumerable: true, - configurable: true, - }, - syscall: { - get () { - return context.syscall - }, - set (value) { - context.syscall = value - }, - enumerable: true, - configurable: true, - }, - }) - - if (context.path !== undefined) { - Object.defineProperty(this, 'path', { - get () { - return context.path - }, - set (value) { - context.path = value - }, - enumerable: true, - configurable: true, - }) - } - - if (context.dest !== undefined) { - Object.defineProperty(this, 'dest', { - get () { - return context.dest - }, - set (value) { - context.dest = value - }, - enumerable: true, - configurable: true, - }) - } - } - - toString () { - return `${this.name} [${this.code}]: ${this.message}` - } - - [Symbol.for('nodejs.util.inspect.custom')] (_recurseTimes, ctx) { - return inspect(this, { - ...ctx, - getters: true, - customInspect: false, - }) - } -} - -function E (code, message) { - module.exports[code] = class NodeError extends SystemError { - constructor (ctx) { - super(code, message, ctx) - } - } -} - -E('ERR_FS_CP_DIR_TO_NON_DIR', 'Cannot overwrite directory with non-directory') -E('ERR_FS_CP_EEXIST', 'Target already exists') -E('ERR_FS_CP_EINVAL', 'Invalid src or dest') -E('ERR_FS_CP_FIFO_PIPE', 'Cannot copy a FIFO pipe') -E('ERR_FS_CP_NON_DIR_TO_DIR', 'Cannot overwrite non-directory with directory') -E('ERR_FS_CP_SOCKET', 'Cannot copy a socket file') -E('ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY', 'Cannot overwrite symlink in subdirectory of self') -E('ERR_FS_CP_UNKNOWN', 'Cannot copy an unknown file type') -E('ERR_FS_EISDIR', 'Path is a directory') - -module.exports.ERR_INVALID_ARG_TYPE = class ERR_INVALID_ARG_TYPE extends Error { - constructor (name, expected, actual) { - super() - this.code = 'ERR_INVALID_ARG_TYPE' - this.message = `The ${name} argument must be ${expected}. Received ${typeof actual}` - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/index.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/index.js deleted file mode 100644 index 972ce7aa12abef..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/index.js +++ /dev/null @@ -1,22 +0,0 @@ -const fs = require('fs/promises') -const getOptions = require('../common/get-options.js') -const node = require('../common/node.js') -const polyfill = require('./polyfill.js') - -// node 16.7.0 added fs.cp -const useNative = node.satisfies('>=16.7.0') - -const cp = async (src, dest, opts) => { - const options = getOptions(opts, { - copy: ['dereference', 'errorOnExist', 'filter', 'force', 'preserveTimestamps', 'recursive'], - }) - - // the polyfill is tested separately from this module, no need to hack - // process.version to try to trigger it just for coverage - // istanbul ignore next - return useNative - ? fs.cp(src, dest, options) - : polyfill(src, dest, options) -} - -module.exports = cp diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/polyfill.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/polyfill.js deleted file mode 100644 index 80eb10de971918..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/cp/polyfill.js +++ /dev/null @@ -1,428 +0,0 @@ -// this file is a modified version of the code in node 17.2.0 -// which is, in turn, a modified version of the fs-extra module on npm -// node core changes: -// - Use of the assert module has been replaced with core's error system. -// - All code related to the glob dependency has been removed. -// - Bring your own custom fs module is not currently supported. -// - Some basic code cleanup. -// changes here: -// - remove all callback related code -// - drop sync support -// - change assertions back to non-internal methods (see options.js) -// - throws ENOTDIR when rmdir gets an ENOENT for a path that exists in Windows -'use strict' - -const { - ERR_FS_CP_DIR_TO_NON_DIR, - ERR_FS_CP_EEXIST, - ERR_FS_CP_EINVAL, - ERR_FS_CP_FIFO_PIPE, - ERR_FS_CP_NON_DIR_TO_DIR, - ERR_FS_CP_SOCKET, - ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY, - ERR_FS_CP_UNKNOWN, - ERR_FS_EISDIR, - ERR_INVALID_ARG_TYPE, -} = require('./errors.js') -const { - constants: { - errno: { - EEXIST, - EISDIR, - EINVAL, - ENOTDIR, - }, - }, -} = require('os') -const { - chmod, - copyFile, - lstat, - mkdir, - readdir, - readlink, - stat, - symlink, - unlink, - utimes, -} = require('fs/promises') -const { - dirname, - isAbsolute, - join, - parse, - resolve, - sep, - toNamespacedPath, -} = require('path') -const { fileURLToPath } = require('url') - -const defaultOptions = { - dereference: false, - errorOnExist: false, - filter: undefined, - force: true, - preserveTimestamps: false, - recursive: false, -} - -async function cp (src, dest, opts) { - if (opts != null && typeof opts !== 'object') { - throw new ERR_INVALID_ARG_TYPE('options', ['Object'], opts) - } - return cpFn( - toNamespacedPath(getValidatedPath(src)), - toNamespacedPath(getValidatedPath(dest)), - { ...defaultOptions, ...opts }) -} - -function getValidatedPath (fileURLOrPath) { - const path = fileURLOrPath != null && fileURLOrPath.href - && fileURLOrPath.origin - ? fileURLToPath(fileURLOrPath) - : fileURLOrPath - return path -} - -async function cpFn (src, dest, opts) { - // Warn about using preserveTimestamps on 32-bit node - // istanbul ignore next - if (opts.preserveTimestamps && process.arch === 'ia32') { - const warning = 'Using the preserveTimestamps option in 32-bit ' + - 'node is not recommended' - process.emitWarning(warning, 'TimestampPrecisionWarning') - } - const stats = await checkPaths(src, dest, opts) - const { srcStat, destStat } = stats - await checkParentPaths(src, srcStat, dest) - if (opts.filter) { - return handleFilter(checkParentDir, destStat, src, dest, opts) - } - return checkParentDir(destStat, src, dest, opts) -} - -async function checkPaths (src, dest, opts) { - const { 0: srcStat, 1: destStat } = await getStats(src, dest, opts) - if (destStat) { - if (areIdentical(srcStat, destStat)) { - throw new ERR_FS_CP_EINVAL({ - message: 'src and dest cannot be the same', - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - if (srcStat.isDirectory() && !destStat.isDirectory()) { - throw new ERR_FS_CP_DIR_TO_NON_DIR({ - message: `cannot overwrite directory ${src} ` + - `with non-directory ${dest}`, - path: dest, - syscall: 'cp', - errno: EISDIR, - }) - } - if (!srcStat.isDirectory() && destStat.isDirectory()) { - throw new ERR_FS_CP_NON_DIR_TO_DIR({ - message: `cannot overwrite non-directory ${src} ` + - `with directory ${dest}`, - path: dest, - syscall: 'cp', - errno: ENOTDIR, - }) - } - } - - if (srcStat.isDirectory() && isSrcSubdir(src, dest)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${src} to a subdirectory of self ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - return { srcStat, destStat } -} - -function areIdentical (srcStat, destStat) { - return destStat.ino && destStat.dev && destStat.ino === srcStat.ino && - destStat.dev === srcStat.dev -} - -function getStats (src, dest, opts) { - const statFunc = opts.dereference ? - (file) => stat(file, { bigint: true }) : - (file) => lstat(file, { bigint: true }) - return Promise.all([ - statFunc(src), - statFunc(dest).catch((err) => { - // istanbul ignore next: unsure how to cover. - if (err.code === 'ENOENT') { - return null - } - // istanbul ignore next: unsure how to cover. - throw err - }), - ]) -} - -async function checkParentDir (destStat, src, dest, opts) { - const destParent = dirname(dest) - const dirExists = await pathExists(destParent) - if (dirExists) { - return getStatsForCopy(destStat, src, dest, opts) - } - await mkdir(destParent, { recursive: true }) - return getStatsForCopy(destStat, src, dest, opts) -} - -function pathExists (dest) { - return stat(dest).then( - () => true, - // istanbul ignore next: not sure when this would occur - (err) => (err.code === 'ENOENT' ? false : Promise.reject(err))) -} - -// Recursively check if dest parent is a subdirectory of src. -// It works for all file types including symlinks since it -// checks the src and dest inodes. It starts from the deepest -// parent and stops once it reaches the src parent or the root path. -async function checkParentPaths (src, srcStat, dest) { - const srcParent = resolve(dirname(src)) - const destParent = resolve(dirname(dest)) - if (destParent === srcParent || destParent === parse(destParent).root) { - return - } - let destStat - try { - destStat = await stat(destParent, { bigint: true }) - } catch (err) { - // istanbul ignore else: not sure when this would occur - if (err.code === 'ENOENT') { - return - } - // istanbul ignore next: not sure when this would occur - throw err - } - if (areIdentical(srcStat, destStat)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${src} to a subdirectory of self ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - return checkParentPaths(src, srcStat, destParent) -} - -const normalizePathToArray = (path) => - resolve(path).split(sep).filter(Boolean) - -// Return true if dest is a subdir of src, otherwise false. -// It only checks the path strings. -function isSrcSubdir (src, dest) { - const srcArr = normalizePathToArray(src) - const destArr = normalizePathToArray(dest) - return srcArr.every((cur, i) => destArr[i] === cur) -} - -async function handleFilter (onInclude, destStat, src, dest, opts, cb) { - const include = await opts.filter(src, dest) - if (include) { - return onInclude(destStat, src, dest, opts, cb) - } -} - -function startCopy (destStat, src, dest, opts) { - if (opts.filter) { - return handleFilter(getStatsForCopy, destStat, src, dest, opts) - } - return getStatsForCopy(destStat, src, dest, opts) -} - -async function getStatsForCopy (destStat, src, dest, opts) { - const statFn = opts.dereference ? stat : lstat - const srcStat = await statFn(src) - // istanbul ignore else: can't portably test FIFO - if (srcStat.isDirectory() && opts.recursive) { - return onDir(srcStat, destStat, src, dest, opts) - } else if (srcStat.isDirectory()) { - throw new ERR_FS_EISDIR({ - message: `${src} is a directory (not copied)`, - path: src, - syscall: 'cp', - errno: EINVAL, - }) - } else if (srcStat.isFile() || - srcStat.isCharacterDevice() || - srcStat.isBlockDevice()) { - return onFile(srcStat, destStat, src, dest, opts) - } else if (srcStat.isSymbolicLink()) { - return onLink(destStat, src, dest) - } else if (srcStat.isSocket()) { - throw new ERR_FS_CP_SOCKET({ - message: `cannot copy a socket file: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } else if (srcStat.isFIFO()) { - throw new ERR_FS_CP_FIFO_PIPE({ - message: `cannot copy a FIFO pipe: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - // istanbul ignore next: should be unreachable - throw new ERR_FS_CP_UNKNOWN({ - message: `cannot copy an unknown file type: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) -} - -function onFile (srcStat, destStat, src, dest, opts) { - if (!destStat) { - return _copyFile(srcStat, src, dest, opts) - } - return mayCopyFile(srcStat, src, dest, opts) -} - -async function mayCopyFile (srcStat, src, dest, opts) { - if (opts.force) { - await unlink(dest) - return _copyFile(srcStat, src, dest, opts) - } else if (opts.errorOnExist) { - throw new ERR_FS_CP_EEXIST({ - message: `${dest} already exists`, - path: dest, - syscall: 'cp', - errno: EEXIST, - }) - } -} - -async function _copyFile (srcStat, src, dest, opts) { - await copyFile(src, dest) - if (opts.preserveTimestamps) { - return handleTimestampsAndMode(srcStat.mode, src, dest) - } - return setDestMode(dest, srcStat.mode) -} - -async function handleTimestampsAndMode (srcMode, src, dest) { - // Make sure the file is writable before setting the timestamp - // otherwise open fails with EPERM when invoked with 'r+' - // (through utimes call) - if (fileIsNotWritable(srcMode)) { - await makeFileWritable(dest, srcMode) - return setDestTimestampsAndMode(srcMode, src, dest) - } - return setDestTimestampsAndMode(srcMode, src, dest) -} - -function fileIsNotWritable (srcMode) { - return (srcMode & 0o200) === 0 -} - -function makeFileWritable (dest, srcMode) { - return setDestMode(dest, srcMode | 0o200) -} - -async function setDestTimestampsAndMode (srcMode, src, dest) { - await setDestTimestamps(src, dest) - return setDestMode(dest, srcMode) -} - -function setDestMode (dest, srcMode) { - return chmod(dest, srcMode) -} - -async function setDestTimestamps (src, dest) { - // The initial srcStat.atime cannot be trusted - // because it is modified by the read(2) system call - // (See https://nodejs.org/api/fs.html#fs_stat_time_values) - const updatedSrcStat = await stat(src) - return utimes(dest, updatedSrcStat.atime, updatedSrcStat.mtime) -} - -function onDir (srcStat, destStat, src, dest, opts) { - if (!destStat) { - return mkDirAndCopy(srcStat.mode, src, dest, opts) - } - return copyDir(src, dest, opts) -} - -async function mkDirAndCopy (srcMode, src, dest, opts) { - await mkdir(dest) - await copyDir(src, dest, opts) - return setDestMode(dest, srcMode) -} - -async function copyDir (src, dest, opts) { - const dir = await readdir(src) - for (let i = 0; i < dir.length; i++) { - const item = dir[i] - const srcItem = join(src, item) - const destItem = join(dest, item) - const { destStat } = await checkPaths(srcItem, destItem, opts) - await startCopy(destStat, srcItem, destItem, opts) - } -} - -async function onLink (destStat, src, dest) { - let resolvedSrc = await readlink(src) - if (!isAbsolute(resolvedSrc)) { - resolvedSrc = resolve(dirname(src), resolvedSrc) - } - if (!destStat) { - return symlink(resolvedSrc, dest) - } - let resolvedDest - try { - resolvedDest = await readlink(dest) - } catch (err) { - // Dest exists and is a regular file or directory, - // Windows may throw UNKNOWN error. If dest already exists, - // fs throws error anyway, so no need to guard against it here. - // istanbul ignore next: can only test on windows - if (err.code === 'EINVAL' || err.code === 'UNKNOWN') { - return symlink(resolvedSrc, dest) - } - // istanbul ignore next: should not be possible - throw err - } - if (!isAbsolute(resolvedDest)) { - resolvedDest = resolve(dirname(dest), resolvedDest) - } - if (isSrcSubdir(resolvedSrc, resolvedDest)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${resolvedSrc} to a subdirectory of self ` + - `${resolvedDest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - // Do not copy if src is a subdir of dest since unlinking - // dest in this case would result in removing src contents - // and therefore a broken symlink would be created. - const srcStat = await stat(src) - if (srcStat.isDirectory() && isSrcSubdir(resolvedDest, resolvedSrc)) { - throw new ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY({ - message: `cannot overwrite ${resolvedDest} with ${resolvedSrc}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - return copyLink(resolvedSrc, dest) -} - -async function copyLink (resolvedSrc, dest) { - await unlink(dest) - return symlink(resolvedSrc, dest) -} - -module.exports = cp diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/index.js deleted file mode 100644 index 81c746304cc428..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/index.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict' - -const cp = require('./cp/index.js') -const withTempDir = require('./with-temp-dir.js') -const readdirScoped = require('./readdir-scoped.js') -const moveFile = require('./move-file.js') - -module.exports = { - cp, - withTempDir, - readdirScoped, - moveFile, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/move-file.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/move-file.js deleted file mode 100644 index d56e06d384659a..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/move-file.js +++ /dev/null @@ -1,78 +0,0 @@ -const { dirname, join, resolve, relative, isAbsolute } = require('path') -const fs = require('fs/promises') - -const pathExists = async path => { - try { - await fs.access(path) - return true - } catch (er) { - return er.code !== 'ENOENT' - } -} - -const moveFile = async (source, destination, options = {}, root = true, symlinks = []) => { - if (!source || !destination) { - throw new TypeError('`source` and `destination` file required') - } - - options = { - overwrite: true, - ...options, - } - - if (!options.overwrite && await pathExists(destination)) { - throw new Error(`The destination file exists: ${destination}`) - } - - await fs.mkdir(dirname(destination), { recursive: true }) - - try { - await fs.rename(source, destination) - } catch (error) { - if (error.code === 'EXDEV' || error.code === 'EPERM') { - const sourceStat = await fs.lstat(source) - if (sourceStat.isDirectory()) { - const files = await fs.readdir(source) - await Promise.all(files.map((file) => - moveFile(join(source, file), join(destination, file), options, false, symlinks) - )) - } else if (sourceStat.isSymbolicLink()) { - symlinks.push({ source, destination }) - } else { - await fs.copyFile(source, destination) - } - } else { - throw error - } - } - - if (root) { - await Promise.all(symlinks.map(async ({ source: symSource, destination: symDestination }) => { - let target = await fs.readlink(symSource) - // junction symlinks in windows will be absolute paths, so we need to - // make sure they point to the symlink destination - if (isAbsolute(target)) { - target = resolve(symDestination, relative(symSource, target)) - } - // try to determine what the actual file is so we can create the correct - // type of symlink in windows - let targetStat = 'file' - try { - targetStat = await fs.stat(resolve(dirname(symSource), target)) - if (targetStat.isDirectory()) { - targetStat = 'junction' - } - } catch { - // targetStat remains 'file' - } - await fs.symlink( - target, - symDestination, - targetStat - ) - })) - await fs.rm(source, { recursive: true, force: true }) - } -} - -module.exports = moveFile diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/readdir-scoped.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/readdir-scoped.js deleted file mode 100644 index cd601dfbe7486b..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/readdir-scoped.js +++ /dev/null @@ -1,20 +0,0 @@ -const { readdir } = require('fs/promises') -const { join } = require('path') - -const readdirScoped = async (dir) => { - const results = [] - - for (const item of await readdir(dir)) { - if (item.startsWith('@')) { - for (const scopedItem of await readdir(join(dir, item))) { - results.push(join(item, scopedItem)) - } - } else { - results.push(item) - } - } - - return results -} - -module.exports = readdirScoped diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/with-temp-dir.js b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/with-temp-dir.js deleted file mode 100644 index 0738ac4f29e1be..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/lib/with-temp-dir.js +++ /dev/null @@ -1,39 +0,0 @@ -const { join, sep } = require('path') - -const getOptions = require('./common/get-options.js') -const { mkdir, mkdtemp, rm } = require('fs/promises') - -// create a temp directory, ensure its permissions match its parent, then call -// the supplied function passing it the path to the directory. clean up after -// the function finishes, whether it throws or not -const withTempDir = async (root, fn, opts) => { - const options = getOptions(opts, { - copy: ['tmpPrefix'], - }) - // create the directory - await mkdir(root, { recursive: true }) - - const target = await mkdtemp(join(`${root}${sep}`, options.tmpPrefix || '')) - let err - let result - - try { - result = await fn(target) - } catch (_err) { - err = _err - } - - try { - await rm(target, { force: true, recursive: true }) - } catch { - // ignore errors - } - - if (err) { - throw err - } - - return result -} - -module.exports = withTempDir diff --git a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/package.json b/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/package.json deleted file mode 100644 index 5261a11b78000e..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/@npmcli/fs/package.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "name": "@npmcli/fs", - "version": "3.1.1", - "description": "filesystem utilities for the npm cli", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "snap": "tap", - "test": "tap", - "npmclilint": "npmcli-lint", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "lintfix": "npm run lint -- --fix", - "posttest": "npm run lint", - "postsnap": "npm run lintfix --", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/fs.git" - }, - "keywords": [ - "npm", - "oss" - ], - "author": "GitHub Inc.", - "license": "ISC", - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.0.1" - }, - "dependencies": { - "semver": "^7.3.5" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.22.0" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/abbrev/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/abbrev/LICENSE deleted file mode 100644 index 9bcfa9d7d8d26e..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/abbrev/LICENSE +++ /dev/null @@ -1,46 +0,0 @@ -This software is dual-licensed under the ISC and MIT licenses. -You may use this software under EITHER of the following licenses. - ----------- - -The ISC License - -Copyright (c) Isaac Z. Schlueter and Contributors - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR -IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - ----------- - -Copyright Isaac Z. Schlueter and Contributors -All rights reserved. - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/abbrev/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/abbrev/lib/index.js deleted file mode 100644 index 9f48801f049c9e..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/abbrev/lib/index.js +++ /dev/null @@ -1,50 +0,0 @@ -module.exports = abbrev - -function abbrev (...args) { - let list = args.length === 1 || Array.isArray(args[0]) ? args[0] : args - - for (let i = 0, l = list.length; i < l; i++) { - list[i] = typeof list[i] === 'string' ? list[i] : String(list[i]) - } - - // sort them lexicographically, so that they're next to their nearest kin - list = list.sort(lexSort) - - // walk through each, seeing how much it has in common with the next and previous - const abbrevs = {} - let prev = '' - for (let ii = 0, ll = list.length; ii < ll; ii++) { - const current = list[ii] - const next = list[ii + 1] || '' - let nextMatches = true - let prevMatches = true - if (current === next) { - continue - } - let j = 0 - const cl = current.length - for (; j < cl; j++) { - const curChar = current.charAt(j) - nextMatches = nextMatches && curChar === next.charAt(j) - prevMatches = prevMatches && curChar === prev.charAt(j) - if (!nextMatches && !prevMatches) { - j++ - break - } - } - prev = current - if (j === cl) { - abbrevs[current] = current - continue - } - for (let a = current.slice(0, j); j <= cl; j++) { - abbrevs[a] = current - a += current.charAt(j) - } - } - return abbrevs -} - -function lexSort (a, b) { - return a === b ? 0 : a > b ? 1 : -1 -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/abbrev/package.json b/deps/npm/node_modules/node-gyp/node_modules/abbrev/package.json deleted file mode 100644 index e26400445631ad..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/abbrev/package.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "name": "abbrev", - "version": "2.0.0", - "description": "Like ruby's abbrev module, but in js", - "author": "GitHub Inc.", - "main": "lib/index.js", - "scripts": { - "test": "tap", - "lint": "eslint \"**/*.js\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/abbrev-js.git" - }, - "license": "ISC", - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.8.0", - "tap": "^16.3.0" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.8.0" - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/LICENSE.md b/deps/npm/node_modules/node-gyp/node_modules/cacache/LICENSE.md deleted file mode 100644 index 8d28acf866d932..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/LICENSE.md +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright (c) npm, Inc. - -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted, provided that the -above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE COPYRIGHT HOLDER DISCLAIMS -ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/path.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/path.js deleted file mode 100644 index ad5a76a4f73f26..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/path.js +++ /dev/null @@ -1,29 +0,0 @@ -'use strict' - -const contentVer = require('../../package.json')['cache-version'].content -const hashToSegments = require('../util/hash-to-segments') -const path = require('path') -const ssri = require('ssri') - -// Current format of content file path: -// -// sha512-BaSE64Hex= -> -// ~/.my-cache/content-v2/sha512/ba/da/55deadbeefc0ffee -// -module.exports = contentPath - -function contentPath (cache, integrity) { - const sri = ssri.parse(integrity, { single: true }) - // contentPath is the *strongest* algo given - return path.join( - contentDir(cache), - sri.algorithm, - ...hashToSegments(sri.hexDigest()) - ) -} - -module.exports.contentDir = contentDir - -function contentDir (cache) { - return path.join(cache, `content-v${contentVer}`) -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/read.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/read.js deleted file mode 100644 index 5f6192c3cec566..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/read.js +++ /dev/null @@ -1,165 +0,0 @@ -'use strict' - -const fs = require('fs/promises') -const fsm = require('fs-minipass') -const ssri = require('ssri') -const contentPath = require('./path') -const Pipeline = require('minipass-pipeline') - -module.exports = read - -const MAX_SINGLE_READ_SIZE = 64 * 1024 * 1024 -async function read (cache, integrity, opts = {}) { - const { size } = opts - const { stat, cpath, sri } = await withContentSri(cache, integrity, async (cpath, sri) => { - // get size - const stat = size ? { size } : await fs.stat(cpath) - return { stat, cpath, sri } - }) - - if (stat.size > MAX_SINGLE_READ_SIZE) { - return readPipeline(cpath, stat.size, sri, new Pipeline()).concat() - } - - const data = await fs.readFile(cpath, { encoding: null }) - - if (stat.size !== data.length) { - throw sizeError(stat.size, data.length) - } - - if (!ssri.checkData(data, sri)) { - throw integrityError(sri, cpath) - } - - return data -} - -const readPipeline = (cpath, size, sri, stream) => { - stream.push( - new fsm.ReadStream(cpath, { - size, - readSize: MAX_SINGLE_READ_SIZE, - }), - ssri.integrityStream({ - integrity: sri, - size, - }) - ) - return stream -} - -module.exports.stream = readStream -module.exports.readStream = readStream - -function readStream (cache, integrity, opts = {}) { - const { size } = opts - const stream = new Pipeline() - // Set all this up to run on the stream and then just return the stream - Promise.resolve().then(async () => { - const { stat, cpath, sri } = await withContentSri(cache, integrity, async (cpath, sri) => { - // get size - const stat = size ? { size } : await fs.stat(cpath) - return { stat, cpath, sri } - }) - - return readPipeline(cpath, stat.size, sri, stream) - }).catch(err => stream.emit('error', err)) - - return stream -} - -module.exports.copy = copy - -function copy (cache, integrity, dest) { - return withContentSri(cache, integrity, (cpath) => { - return fs.copyFile(cpath, dest) - }) -} - -module.exports.hasContent = hasContent - -async function hasContent (cache, integrity) { - if (!integrity) { - return false - } - - try { - return await withContentSri(cache, integrity, async (cpath, sri) => { - const stat = await fs.stat(cpath) - return { size: stat.size, sri, stat } - }) - } catch (err) { - if (err.code === 'ENOENT') { - return false - } - - if (err.code === 'EPERM') { - /* istanbul ignore else */ - if (process.platform !== 'win32') { - throw err - } else { - return false - } - } - } -} - -async function withContentSri (cache, integrity, fn) { - const sri = ssri.parse(integrity) - // If `integrity` has multiple entries, pick the first digest - // with available local data. - const algo = sri.pickAlgorithm() - const digests = sri[algo] - - if (digests.length <= 1) { - const cpath = contentPath(cache, digests[0]) - return fn(cpath, digests[0]) - } else { - // Can't use race here because a generic error can happen before - // a ENOENT error, and can happen before a valid result - const results = await Promise.all(digests.map(async (meta) => { - try { - return await withContentSri(cache, meta, fn) - } catch (err) { - if (err.code === 'ENOENT') { - return Object.assign( - new Error('No matching content found for ' + sri.toString()), - { code: 'ENOENT' } - ) - } - return err - } - })) - // Return the first non error if it is found - const result = results.find((r) => !(r instanceof Error)) - if (result) { - return result - } - - // Throw the No matching content found error - const enoentError = results.find((r) => r.code === 'ENOENT') - if (enoentError) { - throw enoentError - } - - // Throw generic error - throw results.find((r) => r instanceof Error) - } -} - -function sizeError (expected, found) { - /* eslint-disable-next-line max-len */ - const err = new Error(`Bad data size: expected inserted data to be ${expected} bytes, but got ${found} instead`) - err.expected = expected - err.found = found - err.code = 'EBADSIZE' - return err -} - -function integrityError (sri, path) { - const err = new Error(`Integrity verification failed for ${sri} (${path})`) - err.code = 'EINTEGRITY' - err.sri = sri - err.path = path - return err -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/rm.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/rm.js deleted file mode 100644 index ce58d679e4cb25..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/rm.js +++ /dev/null @@ -1,18 +0,0 @@ -'use strict' - -const fs = require('fs/promises') -const contentPath = require('./path') -const { hasContent } = require('./read') - -module.exports = rm - -async function rm (cache, integrity) { - const content = await hasContent(cache, integrity) - // ~pretty~ sure we can't end up with a content lacking sri, but be safe - if (content && content.sri) { - await fs.rm(contentPath(cache, content.sri), { recursive: true, force: true }) - return true - } else { - return false - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/write.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/write.js deleted file mode 100644 index e7187abca8788a..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/content/write.js +++ /dev/null @@ -1,206 +0,0 @@ -'use strict' - -const events = require('events') - -const contentPath = require('./path') -const fs = require('fs/promises') -const { moveFile } = require('@npmcli/fs') -const { Minipass } = require('minipass') -const Pipeline = require('minipass-pipeline') -const Flush = require('minipass-flush') -const path = require('path') -const ssri = require('ssri') -const uniqueFilename = require('unique-filename') -const fsm = require('fs-minipass') - -module.exports = write - -// Cache of move operations in process so we don't duplicate -const moveOperations = new Map() - -async function write (cache, data, opts = {}) { - const { algorithms, size, integrity } = opts - - if (typeof size === 'number' && data.length !== size) { - throw sizeError(size, data.length) - } - - const sri = ssri.fromData(data, algorithms ? { algorithms } : {}) - if (integrity && !ssri.checkData(data, integrity, opts)) { - throw checksumError(integrity, sri) - } - - for (const algo in sri) { - const tmp = await makeTmp(cache, opts) - const hash = sri[algo].toString() - try { - await fs.writeFile(tmp.target, data, { flag: 'wx' }) - await moveToDestination(tmp, cache, hash, opts) - } finally { - if (!tmp.moved) { - await fs.rm(tmp.target, { recursive: true, force: true }) - } - } - } - return { integrity: sri, size: data.length } -} - -module.exports.stream = writeStream - -// writes proxied to the 'inputStream' that is passed to the Promise -// 'end' is deferred until content is handled. -class CacacheWriteStream extends Flush { - constructor (cache, opts) { - super() - this.opts = opts - this.cache = cache - this.inputStream = new Minipass() - this.inputStream.on('error', er => this.emit('error', er)) - this.inputStream.on('drain', () => this.emit('drain')) - this.handleContentP = null - } - - write (chunk, encoding, cb) { - if (!this.handleContentP) { - this.handleContentP = handleContent( - this.inputStream, - this.cache, - this.opts - ) - this.handleContentP.catch(error => this.emit('error', error)) - } - return this.inputStream.write(chunk, encoding, cb) - } - - flush (cb) { - this.inputStream.end(() => { - if (!this.handleContentP) { - const e = new Error('Cache input stream was empty') - e.code = 'ENODATA' - // empty streams are probably emitting end right away. - // defer this one tick by rejecting a promise on it. - return Promise.reject(e).catch(cb) - } - // eslint-disable-next-line promise/catch-or-return - this.handleContentP.then( - (res) => { - res.integrity && this.emit('integrity', res.integrity) - // eslint-disable-next-line promise/always-return - res.size !== null && this.emit('size', res.size) - cb() - }, - (er) => cb(er) - ) - }) - } -} - -function writeStream (cache, opts = {}) { - return new CacacheWriteStream(cache, opts) -} - -async function handleContent (inputStream, cache, opts) { - const tmp = await makeTmp(cache, opts) - try { - const res = await pipeToTmp(inputStream, cache, tmp.target, opts) - await moveToDestination( - tmp, - cache, - res.integrity, - opts - ) - return res - } finally { - if (!tmp.moved) { - await fs.rm(tmp.target, { recursive: true, force: true }) - } - } -} - -async function pipeToTmp (inputStream, cache, tmpTarget, opts) { - const outStream = new fsm.WriteStream(tmpTarget, { - flags: 'wx', - }) - - if (opts.integrityEmitter) { - // we need to create these all simultaneously since they can fire in any order - const [integrity, size] = await Promise.all([ - events.once(opts.integrityEmitter, 'integrity').then(res => res[0]), - events.once(opts.integrityEmitter, 'size').then(res => res[0]), - new Pipeline(inputStream, outStream).promise(), - ]) - return { integrity, size } - } - - let integrity - let size - const hashStream = ssri.integrityStream({ - integrity: opts.integrity, - algorithms: opts.algorithms, - size: opts.size, - }) - hashStream.on('integrity', i => { - integrity = i - }) - hashStream.on('size', s => { - size = s - }) - - const pipeline = new Pipeline(inputStream, hashStream, outStream) - await pipeline.promise() - return { integrity, size } -} - -async function makeTmp (cache, opts) { - const tmpTarget = uniqueFilename(path.join(cache, 'tmp'), opts.tmpPrefix) - await fs.mkdir(path.dirname(tmpTarget), { recursive: true }) - return { - target: tmpTarget, - moved: false, - } -} - -async function moveToDestination (tmp, cache, sri) { - const destination = contentPath(cache, sri) - const destDir = path.dirname(destination) - if (moveOperations.has(destination)) { - return moveOperations.get(destination) - } - moveOperations.set( - destination, - fs.mkdir(destDir, { recursive: true }) - .then(async () => { - await moveFile(tmp.target, destination, { overwrite: false }) - tmp.moved = true - return tmp.moved - }) - .catch(err => { - if (!err.message.startsWith('The destination file exists')) { - throw Object.assign(err, { code: 'EEXIST' }) - } - }).finally(() => { - moveOperations.delete(destination) - }) - - ) - return moveOperations.get(destination) -} - -function sizeError (expected, found) { - /* eslint-disable-next-line max-len */ - const err = new Error(`Bad data size: expected inserted data to be ${expected} bytes, but got ${found} instead`) - err.expected = expected - err.found = found - err.code = 'EBADSIZE' - return err -} - -function checksumError (expected, found) { - const err = new Error(`Integrity check failed: - Wanted: ${expected} - Found: ${found}`) - err.code = 'EINTEGRITY' - err.expected = expected - err.found = found - return err -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/entry-index.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/entry-index.js deleted file mode 100644 index 89c28f2f257d48..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/entry-index.js +++ /dev/null @@ -1,336 +0,0 @@ -'use strict' - -const crypto = require('crypto') -const { - appendFile, - mkdir, - readFile, - readdir, - rm, - writeFile, -} = require('fs/promises') -const { Minipass } = require('minipass') -const path = require('path') -const ssri = require('ssri') -const uniqueFilename = require('unique-filename') - -const contentPath = require('./content/path') -const hashToSegments = require('./util/hash-to-segments') -const indexV = require('../package.json')['cache-version'].index -const { moveFile } = require('@npmcli/fs') - -const pMap = require('p-map') -const lsStreamConcurrency = 5 - -module.exports.NotFoundError = class NotFoundError extends Error { - constructor (cache, key) { - super(`No cache entry for ${key} found in ${cache}`) - this.code = 'ENOENT' - this.cache = cache - this.key = key - } -} - -module.exports.compact = compact - -async function compact (cache, key, matchFn, opts = {}) { - const bucket = bucketPath(cache, key) - const entries = await bucketEntries(bucket) - const newEntries = [] - // we loop backwards because the bottom-most result is the newest - // since we add new entries with appendFile - for (let i = entries.length - 1; i >= 0; --i) { - const entry = entries[i] - // a null integrity could mean either a delete was appended - // or the user has simply stored an index that does not map - // to any content. we determine if the user wants to keep the - // null integrity based on the validateEntry function passed in options. - // if the integrity is null and no validateEntry is provided, we break - // as we consider the null integrity to be a deletion of everything - // that came before it. - if (entry.integrity === null && !opts.validateEntry) { - break - } - - // if this entry is valid, and it is either the first entry or - // the newEntries array doesn't already include an entry that - // matches this one based on the provided matchFn, then we add - // it to the beginning of our list - if ((!opts.validateEntry || opts.validateEntry(entry) === true) && - (newEntries.length === 0 || - !newEntries.find((oldEntry) => matchFn(oldEntry, entry)))) { - newEntries.unshift(entry) - } - } - - const newIndex = '\n' + newEntries.map((entry) => { - const stringified = JSON.stringify(entry) - const hash = hashEntry(stringified) - return `${hash}\t${stringified}` - }).join('\n') - - const setup = async () => { - const target = uniqueFilename(path.join(cache, 'tmp'), opts.tmpPrefix) - await mkdir(path.dirname(target), { recursive: true }) - return { - target, - moved: false, - } - } - - const teardown = async (tmp) => { - if (!tmp.moved) { - return rm(tmp.target, { recursive: true, force: true }) - } - } - - const write = async (tmp) => { - await writeFile(tmp.target, newIndex, { flag: 'wx' }) - await mkdir(path.dirname(bucket), { recursive: true }) - // we use @npmcli/move-file directly here because we - // want to overwrite the existing file - await moveFile(tmp.target, bucket) - tmp.moved = true - } - - // write the file atomically - const tmp = await setup() - try { - await write(tmp) - } finally { - await teardown(tmp) - } - - // we reverse the list we generated such that the newest - // entries come first in order to make looping through them easier - // the true passed to formatEntry tells it to keep null - // integrity values, if they made it this far it's because - // validateEntry returned true, and as such we should return it - return newEntries.reverse().map((entry) => formatEntry(cache, entry, true)) -} - -module.exports.insert = insert - -async function insert (cache, key, integrity, opts = {}) { - const { metadata, size, time } = opts - const bucket = bucketPath(cache, key) - const entry = { - key, - integrity: integrity && ssri.stringify(integrity), - time: time || Date.now(), - size, - metadata, - } - try { - await mkdir(path.dirname(bucket), { recursive: true }) - const stringified = JSON.stringify(entry) - // NOTE - Cleverness ahoy! - // - // This works because it's tremendously unlikely for an entry to corrupt - // another while still preserving the string length of the JSON in - // question. So, we just slap the length in there and verify it on read. - // - // Thanks to @isaacs for the whiteboarding session that ended up with - // this. - await appendFile(bucket, `\n${hashEntry(stringified)}\t${stringified}`) - } catch (err) { - if (err.code === 'ENOENT') { - return undefined - } - - throw err - } - return formatEntry(cache, entry) -} - -module.exports.find = find - -async function find (cache, key) { - const bucket = bucketPath(cache, key) - try { - const entries = await bucketEntries(bucket) - return entries.reduce((latest, next) => { - if (next && next.key === key) { - return formatEntry(cache, next) - } else { - return latest - } - }, null) - } catch (err) { - if (err.code === 'ENOENT') { - return null - } else { - throw err - } - } -} - -module.exports.delete = del - -function del (cache, key, opts = {}) { - if (!opts.removeFully) { - return insert(cache, key, null, opts) - } - - const bucket = bucketPath(cache, key) - return rm(bucket, { recursive: true, force: true }) -} - -module.exports.lsStream = lsStream - -function lsStream (cache) { - const indexDir = bucketDir(cache) - const stream = new Minipass({ objectMode: true }) - - // Set all this up to run on the stream and then just return the stream - Promise.resolve().then(async () => { - const buckets = await readdirOrEmpty(indexDir) - await pMap(buckets, async (bucket) => { - const bucketPath = path.join(indexDir, bucket) - const subbuckets = await readdirOrEmpty(bucketPath) - await pMap(subbuckets, async (subbucket) => { - const subbucketPath = path.join(bucketPath, subbucket) - - // "/cachename//./*" - const subbucketEntries = await readdirOrEmpty(subbucketPath) - await pMap(subbucketEntries, async (entry) => { - const entryPath = path.join(subbucketPath, entry) - try { - const entries = await bucketEntries(entryPath) - // using a Map here prevents duplicate keys from showing up - // twice, I guess? - const reduced = entries.reduce((acc, entry) => { - acc.set(entry.key, entry) - return acc - }, new Map()) - // reduced is a map of key => entry - for (const entry of reduced.values()) { - const formatted = formatEntry(cache, entry) - if (formatted) { - stream.write(formatted) - } - } - } catch (err) { - if (err.code === 'ENOENT') { - return undefined - } - throw err - } - }, - { concurrency: lsStreamConcurrency }) - }, - { concurrency: lsStreamConcurrency }) - }, - { concurrency: lsStreamConcurrency }) - stream.end() - return stream - }).catch(err => stream.emit('error', err)) - - return stream -} - -module.exports.ls = ls - -async function ls (cache) { - const entries = await lsStream(cache).collect() - return entries.reduce((acc, xs) => { - acc[xs.key] = xs - return acc - }, {}) -} - -module.exports.bucketEntries = bucketEntries - -async function bucketEntries (bucket, filter) { - const data = await readFile(bucket, 'utf8') - return _bucketEntries(data, filter) -} - -function _bucketEntries (data) { - const entries = [] - data.split('\n').forEach((entry) => { - if (!entry) { - return - } - - const pieces = entry.split('\t') - if (!pieces[1] || hashEntry(pieces[1]) !== pieces[0]) { - // Hash is no good! Corruption or malice? Doesn't matter! - // EJECT EJECT - return - } - let obj - try { - obj = JSON.parse(pieces[1]) - } catch (_) { - // eslint-ignore-next-line no-empty-block - } - // coverage disabled here, no need to test with an entry that parses to something falsey - // istanbul ignore else - if (obj) { - entries.push(obj) - } - }) - return entries -} - -module.exports.bucketDir = bucketDir - -function bucketDir (cache) { - return path.join(cache, `index-v${indexV}`) -} - -module.exports.bucketPath = bucketPath - -function bucketPath (cache, key) { - const hashed = hashKey(key) - return path.join.apply( - path, - [bucketDir(cache)].concat(hashToSegments(hashed)) - ) -} - -module.exports.hashKey = hashKey - -function hashKey (key) { - return hash(key, 'sha256') -} - -module.exports.hashEntry = hashEntry - -function hashEntry (str) { - return hash(str, 'sha1') -} - -function hash (str, digest) { - return crypto - .createHash(digest) - .update(str) - .digest('hex') -} - -function formatEntry (cache, entry, keepAll) { - // Treat null digests as deletions. They'll shadow any previous entries. - if (!entry.integrity && !keepAll) { - return null - } - - return { - key: entry.key, - integrity: entry.integrity, - path: entry.integrity ? contentPath(cache, entry.integrity) : undefined, - size: entry.size, - time: entry.time, - metadata: entry.metadata, - } -} - -function readdirOrEmpty (dir) { - return readdir(dir).catch((err) => { - if (err.code === 'ENOENT' || err.code === 'ENOTDIR') { - return [] - } - - throw err - }) -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/get.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/get.js deleted file mode 100644 index 80ec206c7ecaaa..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/get.js +++ /dev/null @@ -1,170 +0,0 @@ -'use strict' - -const Collect = require('minipass-collect') -const { Minipass } = require('minipass') -const Pipeline = require('minipass-pipeline') - -const index = require('./entry-index') -const memo = require('./memoization') -const read = require('./content/read') - -async function getData (cache, key, opts = {}) { - const { integrity, memoize, size } = opts - const memoized = memo.get(cache, key, opts) - if (memoized && memoize !== false) { - return { - metadata: memoized.entry.metadata, - data: memoized.data, - integrity: memoized.entry.integrity, - size: memoized.entry.size, - } - } - - const entry = await index.find(cache, key, opts) - if (!entry) { - throw new index.NotFoundError(cache, key) - } - const data = await read(cache, entry.integrity, { integrity, size }) - if (memoize) { - memo.put(cache, entry, data, opts) - } - - return { - data, - metadata: entry.metadata, - size: entry.size, - integrity: entry.integrity, - } -} -module.exports = getData - -async function getDataByDigest (cache, key, opts = {}) { - const { integrity, memoize, size } = opts - const memoized = memo.get.byDigest(cache, key, opts) - if (memoized && memoize !== false) { - return memoized - } - - const res = await read(cache, key, { integrity, size }) - if (memoize) { - memo.put.byDigest(cache, key, res, opts) - } - return res -} -module.exports.byDigest = getDataByDigest - -const getMemoizedStream = (memoized) => { - const stream = new Minipass() - stream.on('newListener', function (ev, cb) { - ev === 'metadata' && cb(memoized.entry.metadata) - ev === 'integrity' && cb(memoized.entry.integrity) - ev === 'size' && cb(memoized.entry.size) - }) - stream.end(memoized.data) - return stream -} - -function getStream (cache, key, opts = {}) { - const { memoize, size } = opts - const memoized = memo.get(cache, key, opts) - if (memoized && memoize !== false) { - return getMemoizedStream(memoized) - } - - const stream = new Pipeline() - // Set all this up to run on the stream and then just return the stream - Promise.resolve().then(async () => { - const entry = await index.find(cache, key) - if (!entry) { - throw new index.NotFoundError(cache, key) - } - - stream.emit('metadata', entry.metadata) - stream.emit('integrity', entry.integrity) - stream.emit('size', entry.size) - stream.on('newListener', function (ev, cb) { - ev === 'metadata' && cb(entry.metadata) - ev === 'integrity' && cb(entry.integrity) - ev === 'size' && cb(entry.size) - }) - - const src = read.readStream( - cache, - entry.integrity, - { ...opts, size: typeof size !== 'number' ? entry.size : size } - ) - - if (memoize) { - const memoStream = new Collect.PassThrough() - memoStream.on('collect', data => memo.put(cache, entry, data, opts)) - stream.unshift(memoStream) - } - stream.unshift(src) - return stream - }).catch((err) => stream.emit('error', err)) - - return stream -} - -module.exports.stream = getStream - -function getStreamDigest (cache, integrity, opts = {}) { - const { memoize } = opts - const memoized = memo.get.byDigest(cache, integrity, opts) - if (memoized && memoize !== false) { - const stream = new Minipass() - stream.end(memoized) - return stream - } else { - const stream = read.readStream(cache, integrity, opts) - if (!memoize) { - return stream - } - - const memoStream = new Collect.PassThrough() - memoStream.on('collect', data => memo.put.byDigest( - cache, - integrity, - data, - opts - )) - return new Pipeline(stream, memoStream) - } -} - -module.exports.stream.byDigest = getStreamDigest - -function info (cache, key, opts = {}) { - const { memoize } = opts - const memoized = memo.get(cache, key, opts) - if (memoized && memoize !== false) { - return Promise.resolve(memoized.entry) - } else { - return index.find(cache, key) - } -} -module.exports.info = info - -async function copy (cache, key, dest, opts = {}) { - const entry = await index.find(cache, key, opts) - if (!entry) { - throw new index.NotFoundError(cache, key) - } - await read.copy(cache, entry.integrity, dest, opts) - return { - metadata: entry.metadata, - size: entry.size, - integrity: entry.integrity, - } -} - -module.exports.copy = copy - -async function copyByDigest (cache, key, dest, opts = {}) { - await read.copy(cache, key, dest, opts) - return key -} - -module.exports.copy.byDigest = copyByDigest - -module.exports.hasContent = read.hasContent diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/index.js deleted file mode 100644 index c9b0da5f3a271b..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/index.js +++ /dev/null @@ -1,42 +0,0 @@ -'use strict' - -const get = require('./get.js') -const put = require('./put.js') -const rm = require('./rm.js') -const verify = require('./verify.js') -const { clearMemoized } = require('./memoization.js') -const tmp = require('./util/tmp.js') -const index = require('./entry-index.js') - -module.exports.index = {} -module.exports.index.compact = index.compact -module.exports.index.insert = index.insert - -module.exports.ls = index.ls -module.exports.ls.stream = index.lsStream - -module.exports.get = get -module.exports.get.byDigest = get.byDigest -module.exports.get.stream = get.stream -module.exports.get.stream.byDigest = get.stream.byDigest -module.exports.get.copy = get.copy -module.exports.get.copy.byDigest = get.copy.byDigest -module.exports.get.info = get.info -module.exports.get.hasContent = get.hasContent - -module.exports.put = put -module.exports.put.stream = put.stream - -module.exports.rm = rm.entry -module.exports.rm.all = rm.all -module.exports.rm.entry = module.exports.rm -module.exports.rm.content = rm.content - -module.exports.clearMemoized = clearMemoized - -module.exports.tmp = {} -module.exports.tmp.mkdir = tmp.mkdir -module.exports.tmp.withTmp = tmp.withTmp - -module.exports.verify = verify -module.exports.verify.lastRun = verify.lastRun diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/memoization.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/memoization.js deleted file mode 100644 index 2ecc60912e4563..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/memoization.js +++ /dev/null @@ -1,72 +0,0 @@ -'use strict' - -const { LRUCache } = require('lru-cache') - -const MEMOIZED = new LRUCache({ - max: 500, - maxSize: 50 * 1024 * 1024, // 50MB - ttl: 3 * 60 * 1000, // 3 minutes - sizeCalculation: (entry, key) => key.startsWith('key:') ? entry.data.length : entry.length, -}) - -module.exports.clearMemoized = clearMemoized - -function clearMemoized () { - const old = {} - MEMOIZED.forEach((v, k) => { - old[k] = v - }) - MEMOIZED.clear() - return old -} - -module.exports.put = put - -function put (cache, entry, data, opts) { - pickMem(opts).set(`key:${cache}:${entry.key}`, { entry, data }) - putDigest(cache, entry.integrity, data, opts) -} - -module.exports.put.byDigest = putDigest - -function putDigest (cache, integrity, data, opts) { - pickMem(opts).set(`digest:${cache}:${integrity}`, data) -} - -module.exports.get = get - -function get (cache, key, opts) { - return pickMem(opts).get(`key:${cache}:${key}`) -} - -module.exports.get.byDigest = getDigest - -function getDigest (cache, integrity, opts) { - return pickMem(opts).get(`digest:${cache}:${integrity}`) -} - -class ObjProxy { - constructor (obj) { - this.obj = obj - } - - get (key) { - return this.obj[key] - } - - set (key, val) { - this.obj[key] = val - } -} - -function pickMem (opts) { - if (!opts || !opts.memoize) { - return MEMOIZED - } else if (opts.memoize.get && opts.memoize.set) { - return opts.memoize - } else if (typeof opts.memoize === 'object') { - return new ObjProxy(opts.memoize) - } else { - return MEMOIZED - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/put.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/put.js deleted file mode 100644 index 9fc932d5f6dec5..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/put.js +++ /dev/null @@ -1,80 +0,0 @@ -'use strict' - -const index = require('./entry-index') -const memo = require('./memoization') -const write = require('./content/write') -const Flush = require('minipass-flush') -const { PassThrough } = require('minipass-collect') -const Pipeline = require('minipass-pipeline') - -const putOpts = (opts) => ({ - algorithms: ['sha512'], - ...opts, -}) - -module.exports = putData - -async function putData (cache, key, data, opts = {}) { - const { memoize } = opts - opts = putOpts(opts) - const res = await write(cache, data, opts) - const entry = await index.insert(cache, key, res.integrity, { ...opts, size: res.size }) - if (memoize) { - memo.put(cache, entry, data, opts) - } - - return res.integrity -} - -module.exports.stream = putStream - -function putStream (cache, key, opts = {}) { - const { memoize } = opts - opts = putOpts(opts) - let integrity - let size - let error - - let memoData - const pipeline = new Pipeline() - // first item in the pipeline is the memoizer, because we need - // that to end first and get the collected data. - if (memoize) { - const memoizer = new PassThrough().on('collect', data => { - memoData = data - }) - pipeline.push(memoizer) - } - - // contentStream is a write-only, not a passthrough - // no data comes out of it. - const contentStream = write.stream(cache, opts) - .on('integrity', (int) => { - integrity = int - }) - .on('size', (s) => { - size = s - }) - .on('error', (err) => { - error = err - }) - - pipeline.push(contentStream) - - // last but not least, we write the index and emit hash and size, - // and memoize if we're doing that - pipeline.push(new Flush({ - async flush () { - if (!error) { - const entry = await index.insert(cache, key, integrity, { ...opts, size }) - if (memoize && memoData) { - memo.put(cache, entry, memoData, opts) - } - pipeline.emit('integrity', integrity) - pipeline.emit('size', size) - } - }, - })) - - return pipeline -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/rm.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/rm.js deleted file mode 100644 index a94760c7cf2430..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/rm.js +++ /dev/null @@ -1,31 +0,0 @@ -'use strict' - -const { rm } = require('fs/promises') -const glob = require('./util/glob.js') -const index = require('./entry-index') -const memo = require('./memoization') -const path = require('path') -const rmContent = require('./content/rm') - -module.exports = entry -module.exports.entry = entry - -function entry (cache, key, opts) { - memo.clearMemoized() - return index.delete(cache, key, opts) -} - -module.exports.content = content - -function content (cache, integrity) { - memo.clearMemoized() - return rmContent(cache, integrity) -} - -module.exports.all = all - -async function all (cache) { - memo.clearMemoized() - const paths = await glob(path.join(cache, '*(content-*|index-*)'), { silent: true, nosort: true }) - return Promise.all(paths.map((p) => rm(p, { recursive: true, force: true }))) -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/util/glob.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/util/glob.js deleted file mode 100644 index 8500c1c16a429f..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/util/glob.js +++ /dev/null @@ -1,7 +0,0 @@ -'use strict' - -const { glob } = require('glob') -const path = require('path') - -const globify = (pattern) => pattern.split(path.win32.sep).join(path.posix.sep) -module.exports = (path, options) => glob(globify(path), options) diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/util/hash-to-segments.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/util/hash-to-segments.js deleted file mode 100644 index 445599b5038088..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/util/hash-to-segments.js +++ /dev/null @@ -1,7 +0,0 @@ -'use strict' - -module.exports = hashToSegments - -function hashToSegments (hash) { - return [hash.slice(0, 2), hash.slice(2, 4), hash.slice(4)] -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/util/tmp.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/util/tmp.js deleted file mode 100644 index 0bf5302136ebeb..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/util/tmp.js +++ /dev/null @@ -1,26 +0,0 @@ -'use strict' - -const { withTempDir } = require('@npmcli/fs') -const fs = require('fs/promises') -const path = require('path') - -module.exports.mkdir = mktmpdir - -async function mktmpdir (cache, opts = {}) { - const { tmpPrefix } = opts - const tmpDir = path.join(cache, 'tmp') - await fs.mkdir(tmpDir, { recursive: true, owner: 'inherit' }) - // do not use path.join(), it drops the trailing / if tmpPrefix is unset - const target = `${tmpDir}${path.sep}${tmpPrefix || ''}` - return fs.mkdtemp(target, { owner: 'inherit' }) -} - -module.exports.withTmp = withTmp - -function withTmp (cache, opts, cb) { - if (!cb) { - cb = opts - opts = {} - } - return withTempDir(path.join(cache, 'tmp'), cb, opts) -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/verify.js b/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/verify.js deleted file mode 100644 index d7423da1295b68..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/lib/verify.js +++ /dev/null @@ -1,257 +0,0 @@ -'use strict' - -const { - mkdir, - readFile, - rm, - stat, - truncate, - writeFile, -} = require('fs/promises') -const pMap = require('p-map') -const contentPath = require('./content/path') -const fsm = require('fs-minipass') -const glob = require('./util/glob.js') -const index = require('./entry-index') -const path = require('path') -const ssri = require('ssri') - -const hasOwnProperty = (obj, key) => - Object.prototype.hasOwnProperty.call(obj, key) - -const verifyOpts = (opts) => ({ - concurrency: 20, - log: { silly () {} }, - ...opts, -}) - -module.exports = verify - -async function verify (cache, opts) { - opts = verifyOpts(opts) - opts.log.silly('verify', 'verifying cache at', cache) - - const steps = [ - markStartTime, - fixPerms, - garbageCollect, - rebuildIndex, - cleanTmp, - writeVerifile, - markEndTime, - ] - - const stats = {} - for (const step of steps) { - const label = step.name - const start = new Date() - const s = await step(cache, opts) - if (s) { - Object.keys(s).forEach((k) => { - stats[k] = s[k] - }) - } - const end = new Date() - if (!stats.runTime) { - stats.runTime = {} - } - stats.runTime[label] = end - start - } - stats.runTime.total = stats.endTime - stats.startTime - opts.log.silly( - 'verify', - 'verification finished for', - cache, - 'in', - `${stats.runTime.total}ms` - ) - return stats -} - -async function markStartTime () { - return { startTime: new Date() } -} - -async function markEndTime () { - return { endTime: new Date() } -} - -async function fixPerms (cache, opts) { - opts.log.silly('verify', 'fixing cache permissions') - await mkdir(cache, { recursive: true }) - return null -} - -// Implements a naive mark-and-sweep tracing garbage collector. -// -// The algorithm is basically as follows: -// 1. Read (and filter) all index entries ("pointers") -// 2. Mark each integrity value as "live" -// 3. Read entire filesystem tree in `content-vX/` dir -// 4. If content is live, verify its checksum and delete it if it fails -// 5. If content is not marked as live, rm it. -// -async function garbageCollect (cache, opts) { - opts.log.silly('verify', 'garbage collecting content') - const indexStream = index.lsStream(cache) - const liveContent = new Set() - indexStream.on('data', (entry) => { - if (opts.filter && !opts.filter(entry)) { - return - } - - // integrity is stringified, re-parse it so we can get each hash - const integrity = ssri.parse(entry.integrity) - for (const algo in integrity) { - liveContent.add(integrity[algo].toString()) - } - }) - await new Promise((resolve, reject) => { - indexStream.on('end', resolve).on('error', reject) - }) - const contentDir = contentPath.contentDir(cache) - const files = await glob(path.join(contentDir, '**'), { - follow: false, - nodir: true, - nosort: true, - }) - const stats = { - verifiedContent: 0, - reclaimedCount: 0, - reclaimedSize: 0, - badContentCount: 0, - keptSize: 0, - } - await pMap( - files, - async (f) => { - const split = f.split(/[/\\]/) - const digest = split.slice(split.length - 3).join('') - const algo = split[split.length - 4] - const integrity = ssri.fromHex(digest, algo) - if (liveContent.has(integrity.toString())) { - const info = await verifyContent(f, integrity) - if (!info.valid) { - stats.reclaimedCount++ - stats.badContentCount++ - stats.reclaimedSize += info.size - } else { - stats.verifiedContent++ - stats.keptSize += info.size - } - } else { - // No entries refer to this content. We can delete. - stats.reclaimedCount++ - const s = await stat(f) - await rm(f, { recursive: true, force: true }) - stats.reclaimedSize += s.size - } - return stats - }, - { concurrency: opts.concurrency } - ) - return stats -} - -async function verifyContent (filepath, sri) { - const contentInfo = {} - try { - const { size } = await stat(filepath) - contentInfo.size = size - contentInfo.valid = true - await ssri.checkStream(new fsm.ReadStream(filepath), sri) - } catch (err) { - if (err.code === 'ENOENT') { - return { size: 0, valid: false } - } - if (err.code !== 'EINTEGRITY') { - throw err - } - - await rm(filepath, { recursive: true, force: true }) - contentInfo.valid = false - } - return contentInfo -} - -async function rebuildIndex (cache, opts) { - opts.log.silly('verify', 'rebuilding index') - const entries = await index.ls(cache) - const stats = { - missingContent: 0, - rejectedEntries: 0, - totalEntries: 0, - } - const buckets = {} - for (const k in entries) { - /* istanbul ignore else */ - if (hasOwnProperty(entries, k)) { - const hashed = index.hashKey(k) - const entry = entries[k] - const excluded = opts.filter && !opts.filter(entry) - excluded && stats.rejectedEntries++ - if (buckets[hashed] && !excluded) { - buckets[hashed].push(entry) - } else if (buckets[hashed] && excluded) { - // skip - } else if (excluded) { - buckets[hashed] = [] - buckets[hashed]._path = index.bucketPath(cache, k) - } else { - buckets[hashed] = [entry] - buckets[hashed]._path = index.bucketPath(cache, k) - } - } - } - await pMap( - Object.keys(buckets), - (key) => { - return rebuildBucket(cache, buckets[key], stats, opts) - }, - { concurrency: opts.concurrency } - ) - return stats -} - -async function rebuildBucket (cache, bucket, stats) { - await truncate(bucket._path) - // This needs to be serialized because cacache explicitly - // lets very racy bucket conflicts clobber each other. - for (const entry of bucket) { - const content = contentPath(cache, entry.integrity) - try { - await stat(content) - await index.insert(cache, entry.key, entry.integrity, { - metadata: entry.metadata, - size: entry.size, - time: entry.time, - }) - stats.totalEntries++ - } catch (err) { - if (err.code === 'ENOENT') { - stats.rejectedEntries++ - stats.missingContent++ - } else { - throw err - } - } - } -} - -function cleanTmp (cache, opts) { - opts.log.silly('verify', 'cleaning tmp directory') - return rm(path.join(cache, 'tmp'), { recursive: true, force: true }) -} - -async function writeVerifile (cache, opts) { - const verifile = path.join(cache, '_lastverified') - opts.log.silly('verify', 'writing verifile to ' + verifile) - return writeFile(verifile, `${Date.now()}`) -} - -module.exports.lastRun = lastRun - -async function lastRun (cache) { - const data = await readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' }) - return new Date(+data) -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/cacache/package.json b/deps/npm/node_modules/node-gyp/node_modules/cacache/package.json deleted file mode 100644 index 6e6219158ed759..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/cacache/package.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "name": "cacache", - "version": "18.0.4", - "cache-version": { - "content": "2", - "index": "5" - }, - "description": "Fast, fault-tolerant, cross-platform, disk-based, data-agnostic, content-addressable cache.", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "test": "tap", - "snap": "tap", - "coverage": "tap", - "test-docker": "docker run -it --rm --name pacotest -v \"$PWD\":/tmp -w /tmp node:latest npm test", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "npmclilint": "npmcli-lint", - "lintfix": "npm run lint -- --fix", - "postsnap": "npm run lintfix --", - "postlint": "template-oss-check", - "posttest": "npm run lint", - "template-oss-apply": "template-oss-apply --force" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/cacache.git" - }, - "keywords": [ - "cache", - "caching", - "content-addressable", - "sri", - "sri hash", - "subresource integrity", - "cache", - "storage", - "store", - "file store", - "filesystem", - "disk cache", - "disk storage" - ], - "license": "ISC", - "dependencies": { - "@npmcli/fs": "^3.1.0", - "fs-minipass": "^3.0.0", - "glob": "^10.2.2", - "lru-cache": "^10.0.1", - "minipass": "^7.0.3", - "minipass-collect": "^2.0.1", - "minipass-flush": "^1.0.5", - "minipass-pipeline": "^1.2.4", - "p-map": "^4.0.0", - "ssri": "^10.0.0", - "tar": "^6.1.11", - "unique-filename": "^3.0.0" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.0.0" - }, - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "windowsCI": false, - "version": "4.22.0", - "publish": "true" - }, - "author": "GitHub Inc.", - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/chownr/LICENSE.md b/deps/npm/node_modules/node-gyp/node_modules/chownr/LICENSE.md new file mode 100644 index 00000000000000..881248b6d7f0ca --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/chownr/LICENSE.md @@ -0,0 +1,63 @@ +All packages under `src/` are licensed according to the terms in +their respective `LICENSE` or `LICENSE.md` files. + +The remainder of this project is licensed under the Blue Oak +Model License, as follows: + +----- + +# Blue Oak Model License + +Version 1.0.0 + +## Purpose + +This license gives everyone as much permission to work with +this software as possible, while protecting contributors +from liability. + +## Acceptance + +In order to receive this license, you must agree to its +rules. The rules of this license are both obligations +under that agreement and conditions to your license. +You must not do anything with this software that triggers +a rule that you cannot or will not follow. + +## Copyright + +Each contributor licenses you to do everything with this +software that would otherwise infringe that contributor's +copyright in it. + +## Notices + +You must ensure that everyone who gets a copy of +any part of this software from you, with or without +changes, also gets the text of this license or a link to +. + +## Excuse + +If anyone notifies you in writing that you have not +complied with [Notices](#notices), you can keep your +license by taking all practical steps to comply within 30 +days after the notice. If you do not do so, your license +ends immediately. + +## Patent + +Each contributor licenses you to do everything with this +software that would otherwise infringe any patent claims +they can license or become able to license. + +## Reliability + +No contributor can revoke this license. + +## No Liability + +***As far as the law allows, this software comes as is, +without any warranty or condition, and no contributor +will be liable to anyone for any damages related to this +software or this license, under any kind of legal claim.*** diff --git a/deps/npm/node_modules/node-gyp/node_modules/chownr/dist/commonjs/index.js b/deps/npm/node_modules/node-gyp/node_modules/chownr/dist/commonjs/index.js new file mode 100644 index 00000000000000..6a7b68d5eac26e --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/chownr/dist/commonjs/index.js @@ -0,0 +1,93 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.chownrSync = exports.chownr = void 0; +const node_fs_1 = __importDefault(require("node:fs")); +const node_path_1 = __importDefault(require("node:path")); +const lchownSync = (path, uid, gid) => { + try { + return node_fs_1.default.lchownSync(path, uid, gid); + } + catch (er) { + if (er?.code !== 'ENOENT') + throw er; + } +}; +const chown = (cpath, uid, gid, cb) => { + node_fs_1.default.lchown(cpath, uid, gid, er => { + // Skip ENOENT error + cb(er && er?.code !== 'ENOENT' ? er : null); + }); +}; +const chownrKid = (p, child, uid, gid, cb) => { + if (child.isDirectory()) { + (0, exports.chownr)(node_path_1.default.resolve(p, child.name), uid, gid, (er) => { + if (er) + return cb(er); + const cpath = node_path_1.default.resolve(p, child.name); + chown(cpath, uid, gid, cb); + }); + } + else { + const cpath = node_path_1.default.resolve(p, child.name); + chown(cpath, uid, gid, cb); + } +}; +const chownr = (p, uid, gid, cb) => { + node_fs_1.default.readdir(p, { withFileTypes: true }, (er, children) => { + // any error other than ENOTDIR or ENOTSUP means it's not readable, + // or doesn't exist. give up. + if (er) { + if (er.code === 'ENOENT') + return cb(); + else if (er.code !== 'ENOTDIR' && er.code !== 'ENOTSUP') + return cb(er); + } + if (er || !children.length) + return chown(p, uid, gid, cb); + let len = children.length; + let errState = null; + const then = (er) => { + /* c8 ignore start */ + if (errState) + return; + /* c8 ignore stop */ + if (er) + return cb((errState = er)); + if (--len === 0) + return chown(p, uid, gid, cb); + }; + for (const child of children) { + chownrKid(p, child, uid, gid, then); + } + }); +}; +exports.chownr = chownr; +const chownrKidSync = (p, child, uid, gid) => { + if (child.isDirectory()) + (0, exports.chownrSync)(node_path_1.default.resolve(p, child.name), uid, gid); + lchownSync(node_path_1.default.resolve(p, child.name), uid, gid); +}; +const chownrSync = (p, uid, gid) => { + let children; + try { + children = node_fs_1.default.readdirSync(p, { withFileTypes: true }); + } + catch (er) { + const e = er; + if (e?.code === 'ENOENT') + return; + else if (e?.code === 'ENOTDIR' || e?.code === 'ENOTSUP') + return lchownSync(p, uid, gid); + else + throw e; + } + for (const child of children) { + chownrKidSync(p, child, uid, gid); + } + return lchownSync(p, uid, gid); +}; +exports.chownrSync = chownrSync; +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/package.json b/deps/npm/node_modules/node-gyp/node_modules/chownr/dist/commonjs/package.json similarity index 100% rename from deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/package.json rename to deps/npm/node_modules/node-gyp/node_modules/chownr/dist/commonjs/package.json diff --git a/deps/npm/node_modules/node-gyp/node_modules/chownr/dist/esm/index.js b/deps/npm/node_modules/node-gyp/node_modules/chownr/dist/esm/index.js new file mode 100644 index 00000000000000..5c2815297a67cb --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/chownr/dist/esm/index.js @@ -0,0 +1,85 @@ +import fs from 'node:fs'; +import path from 'node:path'; +const lchownSync = (path, uid, gid) => { + try { + return fs.lchownSync(path, uid, gid); + } + catch (er) { + if (er?.code !== 'ENOENT') + throw er; + } +}; +const chown = (cpath, uid, gid, cb) => { + fs.lchown(cpath, uid, gid, er => { + // Skip ENOENT error + cb(er && er?.code !== 'ENOENT' ? er : null); + }); +}; +const chownrKid = (p, child, uid, gid, cb) => { + if (child.isDirectory()) { + chownr(path.resolve(p, child.name), uid, gid, (er) => { + if (er) + return cb(er); + const cpath = path.resolve(p, child.name); + chown(cpath, uid, gid, cb); + }); + } + else { + const cpath = path.resolve(p, child.name); + chown(cpath, uid, gid, cb); + } +}; +export const chownr = (p, uid, gid, cb) => { + fs.readdir(p, { withFileTypes: true }, (er, children) => { + // any error other than ENOTDIR or ENOTSUP means it's not readable, + // or doesn't exist. give up. + if (er) { + if (er.code === 'ENOENT') + return cb(); + else if (er.code !== 'ENOTDIR' && er.code !== 'ENOTSUP') + return cb(er); + } + if (er || !children.length) + return chown(p, uid, gid, cb); + let len = children.length; + let errState = null; + const then = (er) => { + /* c8 ignore start */ + if (errState) + return; + /* c8 ignore stop */ + if (er) + return cb((errState = er)); + if (--len === 0) + return chown(p, uid, gid, cb); + }; + for (const child of children) { + chownrKid(p, child, uid, gid, then); + } + }); +}; +const chownrKidSync = (p, child, uid, gid) => { + if (child.isDirectory()) + chownrSync(path.resolve(p, child.name), uid, gid); + lchownSync(path.resolve(p, child.name), uid, gid); +}; +export const chownrSync = (p, uid, gid) => { + let children; + try { + children = fs.readdirSync(p, { withFileTypes: true }); + } + catch (er) { + const e = er; + if (e?.code === 'ENOENT') + return; + else if (e?.code === 'ENOTDIR' || e?.code === 'ENOTSUP') + return lchownSync(p, uid, gid); + else + throw e; + } + for (const child of children) { + chownrKidSync(p, child, uid, gid); + } + return lchownSync(p, uid, gid); +}; +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/package.json b/deps/npm/node_modules/node-gyp/node_modules/chownr/dist/esm/package.json similarity index 100% rename from deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/package.json rename to deps/npm/node_modules/node-gyp/node_modules/chownr/dist/esm/package.json diff --git a/deps/npm/node_modules/node-gyp/node_modules/chownr/package.json b/deps/npm/node_modules/node-gyp/node_modules/chownr/package.json new file mode 100644 index 00000000000000..09aa6b2e2e576d --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/chownr/package.json @@ -0,0 +1,69 @@ +{ + "author": "Isaac Z. Schlueter (http://blog.izs.me/)", + "name": "chownr", + "description": "like `chown -R`", + "version": "3.0.0", + "repository": { + "type": "git", + "url": "git://github.com/isaacs/chownr.git" + }, + "files": [ + "dist" + ], + "devDependencies": { + "@types/node": "^20.12.5", + "mkdirp": "^3.0.1", + "prettier": "^3.2.5", + "rimraf": "^5.0.5", + "tap": "^18.7.2", + "tshy": "^1.13.1", + "typedoc": "^0.25.12" + }, + "scripts": { + "prepare": "tshy", + "pretest": "npm run prepare", + "test": "tap", + "preversion": "npm test", + "postversion": "npm publish", + "prepublishOnly": "git push origin --follow-tags", + "format": "prettier --write . --loglevel warn", + "typedoc": "typedoc --tsconfig .tshy/esm.json ./src/*.ts" + }, + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + }, + "tshy": { + "exports": { + "./package.json": "./package.json", + ".": "./src/index.ts" + } + }, + "exports": { + "./package.json": "./package.json", + ".": { + "import": { + "types": "./dist/esm/index.d.ts", + "default": "./dist/esm/index.js" + }, + "require": { + "types": "./dist/commonjs/index.d.ts", + "default": "./dist/commonjs/index.js" + } + } + }, + "main": "./dist/commonjs/index.js", + "types": "./dist/commonjs/index.d.ts", + "type": "module", + "prettier": { + "semi": false, + "printWidth": 75, + "tabWidth": 2, + "useTabs": false, + "singleQuote": true, + "jsxSingleQuote": false, + "bracketSameLine": true, + "arrowParens": "avoid", + "endOfLine": "lf" + } +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/index.js b/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/index.js deleted file mode 100644 index cefcb66b5c5434..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/index.js +++ /dev/null @@ -1,46 +0,0 @@ -"use strict"; -var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { - if (k2 === undefined) k2 = k; - var desc = Object.getOwnPropertyDescriptor(m, k); - if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { - desc = { enumerable: true, get: function() { return m[k]; } }; - } - Object.defineProperty(o, k2, desc); -}) : (function(o, m, k, k2) { - if (k2 === undefined) k2 = k; - o[k2] = m[k]; -})); -var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { - Object.defineProperty(o, "default", { enumerable: true, value: v }); -}) : function(o, v) { - o["default"] = v; -}); -var __importStar = (this && this.__importStar) || function (mod) { - if (mod && mod.__esModule) return mod; - var result = {}; - if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); - __setModuleDefault(result, mod); - return result; -}; -var __exportStar = (this && this.__exportStar) || function(m, exports) { - for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); -}; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.sync = exports.isexe = exports.posix = exports.win32 = void 0; -const posix = __importStar(require("./posix.js")); -exports.posix = posix; -const win32 = __importStar(require("./win32.js")); -exports.win32 = win32; -__exportStar(require("./options.js"), exports); -const platform = process.env._ISEXE_TEST_PLATFORM_ || process.platform; -const impl = platform === 'win32' ? win32 : posix; -/** - * Determine whether a path is executable on the current platform. - */ -exports.isexe = impl.isexe; -/** - * Synchronously determine whether a path is executable on the - * current platform. - */ -exports.sync = impl.sync; -//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/options.js b/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/options.js deleted file mode 100644 index 0dfad0762cc32c..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/options.js +++ /dev/null @@ -1,3 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); -//# sourceMappingURL=options.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/posix.js b/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/posix.js deleted file mode 100644 index 3bc5e79d7007e9..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/posix.js +++ /dev/null @@ -1,67 +0,0 @@ -"use strict"; -/** - * This is the Posix implementation of isexe, which uses the file - * mode and uid/gid values. - * - * @module - */ -Object.defineProperty(exports, "__esModule", { value: true }); -exports.sync = exports.isexe = void 0; -const fs_1 = require("fs"); -const promises_1 = require("fs/promises"); -/** - * Determine whether a path is executable according to the mode and - * current (or specified) user and group IDs. - */ -const isexe = async (path, options = {}) => { - const { ignoreErrors = false } = options; - try { - return checkStat(await (0, promises_1.stat)(path), options); - } - catch (e) { - const er = e; - if (ignoreErrors || er.code === 'EACCES') - return false; - throw er; - } -}; -exports.isexe = isexe; -/** - * Synchronously determine whether a path is executable according to - * the mode and current (or specified) user and group IDs. - */ -const sync = (path, options = {}) => { - const { ignoreErrors = false } = options; - try { - return checkStat((0, fs_1.statSync)(path), options); - } - catch (e) { - const er = e; - if (ignoreErrors || er.code === 'EACCES') - return false; - throw er; - } -}; -exports.sync = sync; -const checkStat = (stat, options) => stat.isFile() && checkMode(stat, options); -const checkMode = (stat, options) => { - const myUid = options.uid ?? process.getuid?.(); - const myGroups = options.groups ?? process.getgroups?.() ?? []; - const myGid = options.gid ?? process.getgid?.() ?? myGroups[0]; - if (myUid === undefined || myGid === undefined) { - throw new Error('cannot get uid or gid'); - } - const groups = new Set([myGid, ...myGroups]); - const mod = stat.mode; - const uid = stat.uid; - const gid = stat.gid; - const u = parseInt('100', 8); - const g = parseInt('010', 8); - const o = parseInt('001', 8); - const ug = u | g; - return !!(mod & o || - (mod & g && groups.has(gid)) || - (mod & u && uid === myUid) || - (mod & ug && myUid === 0)); -}; -//# sourceMappingURL=posix.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/win32.js b/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/win32.js deleted file mode 100644 index fa7a4d2f7d240d..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/cjs/win32.js +++ /dev/null @@ -1,62 +0,0 @@ -"use strict"; -/** - * This is the Windows implementation of isexe, which uses the file - * extension and PATHEXT setting. - * - * @module - */ -Object.defineProperty(exports, "__esModule", { value: true }); -exports.sync = exports.isexe = void 0; -const fs_1 = require("fs"); -const promises_1 = require("fs/promises"); -/** - * Determine whether a path is executable based on the file extension - * and PATHEXT environment variable (or specified pathExt option) - */ -const isexe = async (path, options = {}) => { - const { ignoreErrors = false } = options; - try { - return checkStat(await (0, promises_1.stat)(path), path, options); - } - catch (e) { - const er = e; - if (ignoreErrors || er.code === 'EACCES') - return false; - throw er; - } -}; -exports.isexe = isexe; -/** - * Synchronously determine whether a path is executable based on the file - * extension and PATHEXT environment variable (or specified pathExt option) - */ -const sync = (path, options = {}) => { - const { ignoreErrors = false } = options; - try { - return checkStat((0, fs_1.statSync)(path), path, options); - } - catch (e) { - const er = e; - if (ignoreErrors || er.code === 'EACCES') - return false; - throw er; - } -}; -exports.sync = sync; -const checkPathExt = (path, options) => { - const { pathExt = process.env.PATHEXT || '' } = options; - const peSplit = pathExt.split(';'); - if (peSplit.indexOf('') !== -1) { - return true; - } - for (let i = 0; i < peSplit.length; i++) { - const p = peSplit[i].toLowerCase(); - const ext = path.substring(path.length - p.length).toLowerCase(); - if (p && ext === p) { - return true; - } - } - return false; -}; -const checkStat = (stat, path, options) => stat.isFile() && checkPathExt(path, options); -//# sourceMappingURL=win32.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/index.js b/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/index.js deleted file mode 100644 index 1e309acd7355ec..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/index.js +++ /dev/null @@ -1,16 +0,0 @@ -import * as posix from './posix.js'; -import * as win32 from './win32.js'; -export * from './options.js'; -export { win32, posix }; -const platform = process.env._ISEXE_TEST_PLATFORM_ || process.platform; -const impl = platform === 'win32' ? win32 : posix; -/** - * Determine whether a path is executable on the current platform. - */ -export const isexe = impl.isexe; -/** - * Synchronously determine whether a path is executable on the - * current platform. - */ -export const sync = impl.sync; -//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/options.js b/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/options.js deleted file mode 100644 index e9ded40bd5b2cd..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/options.js +++ /dev/null @@ -1,2 +0,0 @@ -export {}; -//# sourceMappingURL=options.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/posix.js b/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/posix.js deleted file mode 100644 index c453776c0452f7..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/posix.js +++ /dev/null @@ -1,62 +0,0 @@ -/** - * This is the Posix implementation of isexe, which uses the file - * mode and uid/gid values. - * - * @module - */ -import { statSync } from 'fs'; -import { stat } from 'fs/promises'; -/** - * Determine whether a path is executable according to the mode and - * current (or specified) user and group IDs. - */ -export const isexe = async (path, options = {}) => { - const { ignoreErrors = false } = options; - try { - return checkStat(await stat(path), options); - } - catch (e) { - const er = e; - if (ignoreErrors || er.code === 'EACCES') - return false; - throw er; - } -}; -/** - * Synchronously determine whether a path is executable according to - * the mode and current (or specified) user and group IDs. - */ -export const sync = (path, options = {}) => { - const { ignoreErrors = false } = options; - try { - return checkStat(statSync(path), options); - } - catch (e) { - const er = e; - if (ignoreErrors || er.code === 'EACCES') - return false; - throw er; - } -}; -const checkStat = (stat, options) => stat.isFile() && checkMode(stat, options); -const checkMode = (stat, options) => { - const myUid = options.uid ?? process.getuid?.(); - const myGroups = options.groups ?? process.getgroups?.() ?? []; - const myGid = options.gid ?? process.getgid?.() ?? myGroups[0]; - if (myUid === undefined || myGid === undefined) { - throw new Error('cannot get uid or gid'); - } - const groups = new Set([myGid, ...myGroups]); - const mod = stat.mode; - const uid = stat.uid; - const gid = stat.gid; - const u = parseInt('100', 8); - const g = parseInt('010', 8); - const o = parseInt('001', 8); - const ug = u | g; - return !!(mod & o || - (mod & g && groups.has(gid)) || - (mod & u && uid === myUid) || - (mod & ug && myUid === 0)); -}; -//# sourceMappingURL=posix.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/win32.js b/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/win32.js deleted file mode 100644 index a354ee2a5115c7..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/dist/mjs/win32.js +++ /dev/null @@ -1,57 +0,0 @@ -/** - * This is the Windows implementation of isexe, which uses the file - * extension and PATHEXT setting. - * - * @module - */ -import { statSync } from 'fs'; -import { stat } from 'fs/promises'; -/** - * Determine whether a path is executable based on the file extension - * and PATHEXT environment variable (or specified pathExt option) - */ -export const isexe = async (path, options = {}) => { - const { ignoreErrors = false } = options; - try { - return checkStat(await stat(path), path, options); - } - catch (e) { - const er = e; - if (ignoreErrors || er.code === 'EACCES') - return false; - throw er; - } -}; -/** - * Synchronously determine whether a path is executable based on the file - * extension and PATHEXT environment variable (or specified pathExt option) - */ -export const sync = (path, options = {}) => { - const { ignoreErrors = false } = options; - try { - return checkStat(statSync(path), path, options); - } - catch (e) { - const er = e; - if (ignoreErrors || er.code === 'EACCES') - return false; - throw er; - } -}; -const checkPathExt = (path, options) => { - const { pathExt = process.env.PATHEXT || '' } = options; - const peSplit = pathExt.split(';'); - if (peSplit.indexOf('') !== -1) { - return true; - } - for (let i = 0; i < peSplit.length; i++) { - const p = peSplit[i].toLowerCase(); - const ext = path.substring(path.length - p.length).toLowerCase(); - if (p && ext === p) { - return true; - } - } - return false; -}; -const checkStat = (stat, path, options) => stat.isFile() && checkPathExt(path, options); -//# sourceMappingURL=win32.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/isexe/package.json b/deps/npm/node_modules/node-gyp/node_modules/isexe/package.json deleted file mode 100644 index a0e2cd04bfdbfe..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/isexe/package.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "name": "isexe", - "version": "3.1.1", - "description": "Minimal module to check if a file is executable.", - "main": "./dist/cjs/index.js", - "module": "./dist/mjs/index.js", - "types": "./dist/cjs/index.js", - "files": [ - "dist" - ], - "exports": { - ".": { - "import": { - "types": "./dist/mjs/index.d.ts", - "default": "./dist/mjs/index.js" - }, - "require": { - "types": "./dist/cjs/index.d.ts", - "default": "./dist/cjs/index.js" - } - }, - "./posix": { - "import": { - "types": "./dist/mjs/posix.d.ts", - "default": "./dist/mjs/posix.js" - }, - "require": { - "types": "./dist/cjs/posix.d.ts", - "default": "./dist/cjs/posix.js" - } - }, - "./win32": { - "import": { - "types": "./dist/mjs/win32.d.ts", - "default": "./dist/mjs/win32.js" - }, - "require": { - "types": "./dist/cjs/win32.d.ts", - "default": "./dist/cjs/win32.js" - } - }, - "./package.json": "./package.json" - }, - "devDependencies": { - "@types/node": "^20.4.5", - "@types/tap": "^15.0.8", - "c8": "^8.0.1", - "mkdirp": "^0.5.1", - "prettier": "^2.8.8", - "rimraf": "^2.5.0", - "sync-content": "^1.0.2", - "tap": "^16.3.8", - "ts-node": "^10.9.1", - "typedoc": "^0.24.8", - "typescript": "^5.1.6" - }, - "scripts": { - "preversion": "npm test", - "postversion": "npm publish", - "prepublishOnly": "git push origin --follow-tags", - "prepare": "tsc -p tsconfig/cjs.json && tsc -p tsconfig/esm.json && bash ./scripts/fixup.sh", - "pretest": "npm run prepare", - "presnap": "npm run prepare", - "test": "c8 tap", - "snap": "c8 tap", - "format": "prettier --write . --loglevel warn --ignore-path ../../.prettierignore --cache", - "typedoc": "typedoc --tsconfig tsconfig/esm.json ./src/*.ts" - }, - "author": "Isaac Z. Schlueter (http://blog.izs.me/)", - "license": "ISC", - "tap": { - "coverage": false, - "node-arg": [ - "--enable-source-maps", - "--no-warnings", - "--loader", - "ts-node/esm" - ], - "ts": false - }, - "prettier": { - "semi": false, - "printWidth": 75, - "tabWidth": 2, - "useTabs": false, - "singleQuote": true, - "jsxSingleQuote": false, - "bracketSameLine": true, - "arrowParens": "avoid", - "endOfLine": "lf" - }, - "repository": "https://github.com/isaacs/isexe", - "engines": { - "node": ">=16" - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/LICENSE deleted file mode 100644 index 1808eb2844231c..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/LICENSE +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright 2017-2022 (c) npm, Inc. - -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted, provided that the -above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE COPYRIGHT HOLDER DISCLAIMS -ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/entry.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/entry.js deleted file mode 100644 index bfcfacbcc95e18..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/entry.js +++ /dev/null @@ -1,471 +0,0 @@ -const { Request, Response } = require('minipass-fetch') -const { Minipass } = require('minipass') -const MinipassFlush = require('minipass-flush') -const cacache = require('cacache') -const url = require('url') - -const CachingMinipassPipeline = require('../pipeline.js') -const CachePolicy = require('./policy.js') -const cacheKey = require('./key.js') -const remote = require('../remote.js') - -const hasOwnProperty = (obj, prop) => Object.prototype.hasOwnProperty.call(obj, prop) - -// allow list for request headers that will be written to the cache index -// note: we will also store any request headers -// that are named in a response's vary header -const KEEP_REQUEST_HEADERS = [ - 'accept-charset', - 'accept-encoding', - 'accept-language', - 'accept', - 'cache-control', -] - -// allow list for response headers that will be written to the cache index -// note: we must not store the real response's age header, or when we load -// a cache policy based on the metadata it will think the cached response -// is always stale -const KEEP_RESPONSE_HEADERS = [ - 'cache-control', - 'content-encoding', - 'content-language', - 'content-type', - 'date', - 'etag', - 'expires', - 'last-modified', - 'link', - 'location', - 'pragma', - 'vary', -] - -// return an object containing all metadata to be written to the index -const getMetadata = (request, response, options) => { - const metadata = { - time: Date.now(), - url: request.url, - reqHeaders: {}, - resHeaders: {}, - - // options on which we must match the request and vary the response - options: { - compress: options.compress != null ? options.compress : request.compress, - }, - } - - // only save the status if it's not a 200 or 304 - if (response.status !== 200 && response.status !== 304) { - metadata.status = response.status - } - - for (const name of KEEP_REQUEST_HEADERS) { - if (request.headers.has(name)) { - metadata.reqHeaders[name] = request.headers.get(name) - } - } - - // if the request's host header differs from the host in the url - // we need to keep it, otherwise it's just noise and we ignore it - const host = request.headers.get('host') - const parsedUrl = new url.URL(request.url) - if (host && parsedUrl.host !== host) { - metadata.reqHeaders.host = host - } - - // if the response has a vary header, make sure - // we store the relevant request headers too - if (response.headers.has('vary')) { - const vary = response.headers.get('vary') - // a vary of "*" means every header causes a different response. - // in that scenario, we do not include any additional headers - // as the freshness check will always fail anyway and we don't - // want to bloat the cache indexes - if (vary !== '*') { - // copy any other request headers that will vary the response - const varyHeaders = vary.trim().toLowerCase().split(/\s*,\s*/) - for (const name of varyHeaders) { - if (request.headers.has(name)) { - metadata.reqHeaders[name] = request.headers.get(name) - } - } - } - } - - for (const name of KEEP_RESPONSE_HEADERS) { - if (response.headers.has(name)) { - metadata.resHeaders[name] = response.headers.get(name) - } - } - - for (const name of options.cacheAdditionalHeaders) { - if (response.headers.has(name)) { - metadata.resHeaders[name] = response.headers.get(name) - } - } - - return metadata -} - -// symbols used to hide objects that may be lazily evaluated in a getter -const _request = Symbol('request') -const _response = Symbol('response') -const _policy = Symbol('policy') - -class CacheEntry { - constructor ({ entry, request, response, options }) { - if (entry) { - this.key = entry.key - this.entry = entry - // previous versions of this module didn't write an explicit timestamp in - // the metadata, so fall back to the entry's timestamp. we can't use the - // entry timestamp to determine staleness because cacache will update it - // when it verifies its data - this.entry.metadata.time = this.entry.metadata.time || this.entry.time - } else { - this.key = cacheKey(request) - } - - this.options = options - - // these properties are behind getters that lazily evaluate - this[_request] = request - this[_response] = response - this[_policy] = null - } - - // returns a CacheEntry instance that satisfies the given request - // or undefined if no existing entry satisfies - static async find (request, options) { - try { - // compacts the index and returns an array of unique entries - var matches = await cacache.index.compact(options.cachePath, cacheKey(request), (A, B) => { - const entryA = new CacheEntry({ entry: A, options }) - const entryB = new CacheEntry({ entry: B, options }) - return entryA.policy.satisfies(entryB.request) - }, { - validateEntry: (entry) => { - // clean out entries with a buggy content-encoding value - if (entry.metadata && - entry.metadata.resHeaders && - entry.metadata.resHeaders['content-encoding'] === null) { - return false - } - - // if an integrity is null, it needs to have a status specified - if (entry.integrity === null) { - return !!(entry.metadata && entry.metadata.status) - } - - return true - }, - }) - } catch (err) { - // if the compact request fails, ignore the error and return - return - } - - // a cache mode of 'reload' means to behave as though we have no cache - // on the way to the network. return undefined to allow cacheFetch to - // create a brand new request no matter what. - if (options.cache === 'reload') { - return - } - - // find the specific entry that satisfies the request - let match - for (const entry of matches) { - const _entry = new CacheEntry({ - entry, - options, - }) - - if (_entry.policy.satisfies(request)) { - match = _entry - break - } - } - - return match - } - - // if the user made a PUT/POST/PATCH then we invalidate our - // cache for the same url by deleting the index entirely - static async invalidate (request, options) { - const key = cacheKey(request) - try { - await cacache.rm.entry(options.cachePath, key, { removeFully: true }) - } catch (err) { - // ignore errors - } - } - - get request () { - if (!this[_request]) { - this[_request] = new Request(this.entry.metadata.url, { - method: 'GET', - headers: this.entry.metadata.reqHeaders, - ...this.entry.metadata.options, - }) - } - - return this[_request] - } - - get response () { - if (!this[_response]) { - this[_response] = new Response(null, { - url: this.entry.metadata.url, - counter: this.options.counter, - status: this.entry.metadata.status || 200, - headers: { - ...this.entry.metadata.resHeaders, - 'content-length': this.entry.size, - }, - }) - } - - return this[_response] - } - - get policy () { - if (!this[_policy]) { - this[_policy] = new CachePolicy({ - entry: this.entry, - request: this.request, - response: this.response, - options: this.options, - }) - } - - return this[_policy] - } - - // wraps the response in a pipeline that stores the data - // in the cache while the user consumes it - async store (status) { - // if we got a status other than 200, 301, or 308, - // or the CachePolicy forbid storage, append the - // cache status header and return it untouched - if ( - this.request.method !== 'GET' || - ![200, 301, 308].includes(this.response.status) || - !this.policy.storable() - ) { - this.response.headers.set('x-local-cache-status', 'skip') - return this.response - } - - const size = this.response.headers.get('content-length') - const cacheOpts = { - algorithms: this.options.algorithms, - metadata: getMetadata(this.request, this.response, this.options), - size, - integrity: this.options.integrity, - integrityEmitter: this.response.body.hasIntegrityEmitter && this.response.body, - } - - let body = null - // we only set a body if the status is a 200, redirects are - // stored as metadata only - if (this.response.status === 200) { - let cacheWriteResolve, cacheWriteReject - const cacheWritePromise = new Promise((resolve, reject) => { - cacheWriteResolve = resolve - cacheWriteReject = reject - }).catch((err) => { - body.emit('error', err) - }) - - body = new CachingMinipassPipeline({ events: ['integrity', 'size'] }, new MinipassFlush({ - flush () { - return cacheWritePromise - }, - })) - // this is always true since if we aren't reusing the one from the remote fetch, we - // are using the one from cacache - body.hasIntegrityEmitter = true - - const onResume = () => { - const tee = new Minipass() - const cacheStream = cacache.put.stream(this.options.cachePath, this.key, cacheOpts) - // re-emit the integrity and size events on our new response body so they can be reused - cacheStream.on('integrity', i => body.emit('integrity', i)) - cacheStream.on('size', s => body.emit('size', s)) - // stick a flag on here so downstream users will know if they can expect integrity events - tee.pipe(cacheStream) - // TODO if the cache write fails, log a warning but return the response anyway - // eslint-disable-next-line promise/catch-or-return - cacheStream.promise().then(cacheWriteResolve, cacheWriteReject) - body.unshift(tee) - body.unshift(this.response.body) - } - - body.once('resume', onResume) - body.once('end', () => body.removeListener('resume', onResume)) - } else { - await cacache.index.insert(this.options.cachePath, this.key, null, cacheOpts) - } - - // note: we do not set the x-local-cache-hash header because we do not know - // the hash value until after the write to the cache completes, which doesn't - // happen until after the response has been sent and it's too late to write - // the header anyway - this.response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath)) - this.response.headers.set('x-local-cache-key', encodeURIComponent(this.key)) - this.response.headers.set('x-local-cache-mode', 'stream') - this.response.headers.set('x-local-cache-status', status) - this.response.headers.set('x-local-cache-time', new Date().toISOString()) - const newResponse = new Response(body, { - url: this.response.url, - status: this.response.status, - headers: this.response.headers, - counter: this.options.counter, - }) - return newResponse - } - - // use the cached data to create a response and return it - async respond (method, options, status) { - let response - if (method === 'HEAD' || [301, 308].includes(this.response.status)) { - // if the request is a HEAD, or the response is a redirect, - // then the metadata in the entry already includes everything - // we need to build a response - response = this.response - } else { - // we're responding with a full cached response, so create a body - // that reads from cacache and attach it to a new Response - const body = new Minipass() - const headers = { ...this.policy.responseHeaders() } - - const onResume = () => { - const cacheStream = cacache.get.stream.byDigest( - this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize } - ) - cacheStream.on('error', async (err) => { - cacheStream.pause() - if (err.code === 'EINTEGRITY') { - await cacache.rm.content( - this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize } - ) - } - if (err.code === 'ENOENT' || err.code === 'EINTEGRITY') { - await CacheEntry.invalidate(this.request, this.options) - } - body.emit('error', err) - cacheStream.resume() - }) - // emit the integrity and size events based on our metadata so we're consistent - body.emit('integrity', this.entry.integrity) - body.emit('size', Number(headers['content-length'])) - cacheStream.pipe(body) - } - - body.once('resume', onResume) - body.once('end', () => body.removeListener('resume', onResume)) - response = new Response(body, { - url: this.entry.metadata.url, - counter: options.counter, - status: 200, - headers, - }) - } - - response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath)) - response.headers.set('x-local-cache-hash', encodeURIComponent(this.entry.integrity)) - response.headers.set('x-local-cache-key', encodeURIComponent(this.key)) - response.headers.set('x-local-cache-mode', 'stream') - response.headers.set('x-local-cache-status', status) - response.headers.set('x-local-cache-time', new Date(this.entry.metadata.time).toUTCString()) - return response - } - - // use the provided request along with this cache entry to - // revalidate the stored response. returns a response, either - // from the cache or from the update - async revalidate (request, options) { - const revalidateRequest = new Request(request, { - headers: this.policy.revalidationHeaders(request), - }) - - try { - // NOTE: be sure to remove the headers property from the - // user supplied options, since we have already defined - // them on the new request object. if they're still in the - // options then those will overwrite the ones from the policy - var response = await remote(revalidateRequest, { - ...options, - headers: undefined, - }) - } catch (err) { - // if the network fetch fails, return the stale - // cached response unless it has a cache-control - // of 'must-revalidate' - if (!this.policy.mustRevalidate) { - return this.respond(request.method, options, 'stale') - } - - throw err - } - - if (this.policy.revalidated(revalidateRequest, response)) { - // we got a 304, write a new index to the cache and respond from cache - const metadata = getMetadata(request, response, options) - // 304 responses do not include headers that are specific to the response data - // since they do not include a body, so we copy values for headers that were - // in the old cache entry to the new one, if the new metadata does not already - // include that header - for (const name of KEEP_RESPONSE_HEADERS) { - if ( - !hasOwnProperty(metadata.resHeaders, name) && - hasOwnProperty(this.entry.metadata.resHeaders, name) - ) { - metadata.resHeaders[name] = this.entry.metadata.resHeaders[name] - } - } - - for (const name of options.cacheAdditionalHeaders) { - const inMeta = hasOwnProperty(metadata.resHeaders, name) - const inEntry = hasOwnProperty(this.entry.metadata.resHeaders, name) - const inPolicy = hasOwnProperty(this.policy.response.headers, name) - - // if the header is in the existing entry, but it is not in the metadata - // then we need to write it to the metadata as this will refresh the on-disk cache - if (!inMeta && inEntry) { - metadata.resHeaders[name] = this.entry.metadata.resHeaders[name] - } - // if the header is in the metadata, but not in the policy, then we need to set - // it in the policy so that it's included in the immediate response. future - // responses will load a new cache entry, so we don't need to change that - if (!inPolicy && inMeta) { - this.policy.response.headers[name] = metadata.resHeaders[name] - } - } - - try { - await cacache.index.insert(options.cachePath, this.key, this.entry.integrity, { - size: this.entry.size, - metadata, - }) - } catch (err) { - // if updating the cache index fails, we ignore it and - // respond anyway - } - return this.respond(request.method, options, 'revalidated') - } - - // if we got a modified response, create a new entry based on it - const newEntry = new CacheEntry({ - request, - response, - options, - }) - - // respond with the new entry while writing it to the cache - return newEntry.store('updated') - } -} - -module.exports = CacheEntry diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/errors.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/errors.js deleted file mode 100644 index 67a66573bebe66..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/errors.js +++ /dev/null @@ -1,11 +0,0 @@ -class NotCachedError extends Error { - constructor (url) { - /* eslint-disable-next-line max-len */ - super(`request to ${url} failed: cache mode is 'only-if-cached' but no cached response is available.`) - this.code = 'ENOTCACHED' - } -} - -module.exports = { - NotCachedError, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/index.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/index.js deleted file mode 100644 index 0de49d23fb9336..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/index.js +++ /dev/null @@ -1,49 +0,0 @@ -const { NotCachedError } = require('./errors.js') -const CacheEntry = require('./entry.js') -const remote = require('../remote.js') - -// do whatever is necessary to get a Response and return it -const cacheFetch = async (request, options) => { - // try to find a cached entry that satisfies this request - const entry = await CacheEntry.find(request, options) - if (!entry) { - // no cached result, if the cache mode is 'only-if-cached' that's a failure - if (options.cache === 'only-if-cached') { - throw new NotCachedError(request.url) - } - - // otherwise, we make a request, store it and return it - const response = await remote(request, options) - const newEntry = new CacheEntry({ request, response, options }) - return newEntry.store('miss') - } - - // we have a cached response that satisfies this request, however if the cache - // mode is 'no-cache' then we send the revalidation request no matter what - if (options.cache === 'no-cache') { - return entry.revalidate(request, options) - } - - // if the cached entry is not stale, or if the cache mode is 'force-cache' or - // 'only-if-cached' we can respond with the cached entry. set the status - // based on the result of needsRevalidation and respond - const _needsRevalidation = entry.policy.needsRevalidation(request) - if (options.cache === 'force-cache' || - options.cache === 'only-if-cached' || - !_needsRevalidation) { - return entry.respond(request.method, options, _needsRevalidation ? 'stale' : 'hit') - } - - // if we got here, the cache entry is stale so revalidate it - return entry.revalidate(request, options) -} - -cacheFetch.invalidate = async (request, options) => { - if (!options.cachePath) { - return - } - - return CacheEntry.invalidate(request, options) -} - -module.exports = cacheFetch diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/key.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/key.js deleted file mode 100644 index f7684d562b7fae..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/key.js +++ /dev/null @@ -1,17 +0,0 @@ -const { URL, format } = require('url') - -// options passed to url.format() when generating a key -const formatOptions = { - auth: false, - fragment: false, - search: true, - unicode: false, -} - -// returns a string to be used as the cache key for the Request -const cacheKey = (request) => { - const parsed = new URL(request.url) - return `make-fetch-happen:request-cache:${format(parsed, formatOptions)}` -} - -module.exports = cacheKey diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/policy.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/policy.js deleted file mode 100644 index ada3c8600dae92..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/cache/policy.js +++ /dev/null @@ -1,161 +0,0 @@ -const CacheSemantics = require('http-cache-semantics') -const Negotiator = require('negotiator') -const ssri = require('ssri') - -// options passed to http-cache-semantics constructor -const policyOptions = { - shared: false, - ignoreCargoCult: true, -} - -// a fake empty response, used when only testing the -// request for storability -const emptyResponse = { status: 200, headers: {} } - -// returns a plain object representation of the Request -const requestObject = (request) => { - const _obj = { - method: request.method, - url: request.url, - headers: {}, - compress: request.compress, - } - - request.headers.forEach((value, key) => { - _obj.headers[key] = value - }) - - return _obj -} - -// returns a plain object representation of the Response -const responseObject = (response) => { - const _obj = { - status: response.status, - headers: {}, - } - - response.headers.forEach((value, key) => { - _obj.headers[key] = value - }) - - return _obj -} - -class CachePolicy { - constructor ({ entry, request, response, options }) { - this.entry = entry - this.request = requestObject(request) - this.response = responseObject(response) - this.options = options - this.policy = new CacheSemantics(this.request, this.response, policyOptions) - - if (this.entry) { - // if we have an entry, copy the timestamp to the _responseTime - // this is necessary because the CacheSemantics constructor forces - // the value to Date.now() which means a policy created from a - // cache entry is likely to always identify itself as stale - this.policy._responseTime = this.entry.metadata.time - } - } - - // static method to quickly determine if a request alone is storable - static storable (request, options) { - // no cachePath means no caching - if (!options.cachePath) { - return false - } - - // user explicitly asked not to cache - if (options.cache === 'no-store') { - return false - } - - // we only cache GET and HEAD requests - if (!['GET', 'HEAD'].includes(request.method)) { - return false - } - - // otherwise, let http-cache-semantics make the decision - // based on the request's headers - const policy = new CacheSemantics(requestObject(request), emptyResponse, policyOptions) - return policy.storable() - } - - // returns true if the policy satisfies the request - satisfies (request) { - const _req = requestObject(request) - if (this.request.headers.host !== _req.headers.host) { - return false - } - - if (this.request.compress !== _req.compress) { - return false - } - - const negotiatorA = new Negotiator(this.request) - const negotiatorB = new Negotiator(_req) - - if (JSON.stringify(negotiatorA.mediaTypes()) !== JSON.stringify(negotiatorB.mediaTypes())) { - return false - } - - if (JSON.stringify(negotiatorA.languages()) !== JSON.stringify(negotiatorB.languages())) { - return false - } - - if (JSON.stringify(negotiatorA.encodings()) !== JSON.stringify(negotiatorB.encodings())) { - return false - } - - if (this.options.integrity) { - return ssri.parse(this.options.integrity).match(this.entry.integrity) - } - - return true - } - - // returns true if the request and response allow caching - storable () { - return this.policy.storable() - } - - // NOTE: this is a hack to avoid parsing the cache-control - // header ourselves, it returns true if the response's - // cache-control contains must-revalidate - get mustRevalidate () { - return !!this.policy._rescc['must-revalidate'] - } - - // returns true if the cached response requires revalidation - // for the given request - needsRevalidation (request) { - const _req = requestObject(request) - // force method to GET because we only cache GETs - // but can serve a HEAD from a cached GET - _req.method = 'GET' - return !this.policy.satisfiesWithoutRevalidation(_req) - } - - responseHeaders () { - return this.policy.responseHeaders() - } - - // returns a new object containing the appropriate headers - // to send a revalidation request - revalidationHeaders (request) { - const _req = requestObject(request) - return this.policy.revalidationHeaders(_req) - } - - // returns true if the request/response was revalidated - // successfully. returns false if a new response was received - revalidated (request, response) { - const _req = requestObject(request) - const _res = responseObject(response) - const policy = this.policy.revalidatedPolicy(_req, _res) - return !policy.modified - } -} - -module.exports = CachePolicy diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/fetch.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/fetch.js deleted file mode 100644 index 233ba67e165502..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/fetch.js +++ /dev/null @@ -1,118 +0,0 @@ -'use strict' - -const { FetchError, Request, isRedirect } = require('minipass-fetch') -const url = require('url') - -const CachePolicy = require('./cache/policy.js') -const cache = require('./cache/index.js') -const remote = require('./remote.js') - -// given a Request, a Response and user options -// return true if the response is a redirect that -// can be followed. we throw errors that will result -// in the fetch being rejected if the redirect is -// possible but invalid for some reason -const canFollowRedirect = (request, response, options) => { - if (!isRedirect(response.status)) { - return false - } - - if (options.redirect === 'manual') { - return false - } - - if (options.redirect === 'error') { - throw new FetchError(`redirect mode is set to error: ${request.url}`, - 'no-redirect', { code: 'ENOREDIRECT' }) - } - - if (!response.headers.has('location')) { - throw new FetchError(`redirect location header missing for: ${request.url}`, - 'no-location', { code: 'EINVALIDREDIRECT' }) - } - - if (request.counter >= request.follow) { - throw new FetchError(`maximum redirect reached at: ${request.url}`, - 'max-redirect', { code: 'EMAXREDIRECT' }) - } - - return true -} - -// given a Request, a Response, and the user's options return an object -// with a new Request and a new options object that will be used for -// following the redirect -const getRedirect = (request, response, options) => { - const _opts = { ...options } - const location = response.headers.get('location') - const redirectUrl = new url.URL(location, /^https?:/.test(location) ? undefined : request.url) - // Comment below is used under the following license: - /** - * @license - * Copyright (c) 2010-2012 Mikeal Rogers - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language - * governing permissions and limitations under the License. - */ - - // Remove authorization if changing hostnames (but not if just - // changing ports or protocols). This matches the behavior of request: - // https://github.com/request/request/blob/b12a6245/lib/redirect.js#L134-L138 - if (new url.URL(request.url).hostname !== redirectUrl.hostname) { - request.headers.delete('authorization') - request.headers.delete('cookie') - } - - // for POST request with 301/302 response, or any request with 303 response, - // use GET when following redirect - if ( - response.status === 303 || - (request.method === 'POST' && [301, 302].includes(response.status)) - ) { - _opts.method = 'GET' - _opts.body = null - request.headers.delete('content-length') - } - - _opts.headers = {} - request.headers.forEach((value, key) => { - _opts.headers[key] = value - }) - - _opts.counter = ++request.counter - const redirectReq = new Request(url.format(redirectUrl), _opts) - return { - request: redirectReq, - options: _opts, - } -} - -const fetch = async (request, options) => { - const response = CachePolicy.storable(request, options) - ? await cache(request, options) - : await remote(request, options) - - // if the request wasn't a GET or HEAD, and the response - // status is between 200 and 399 inclusive, invalidate the - // request url - if (!['GET', 'HEAD'].includes(request.method) && - response.status >= 200 && - response.status <= 399) { - await cache.invalidate(request, options) - } - - if (!canFollowRedirect(request, response, options)) { - return response - } - - const redirect = getRedirect(request, response, options) - return fetch(redirect.request, redirect.options) -} - -module.exports = fetch diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/index.js deleted file mode 100644 index 2f12e8e1b61131..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/index.js +++ /dev/null @@ -1,41 +0,0 @@ -const { FetchError, Headers, Request, Response } = require('minipass-fetch') - -const configureOptions = require('./options.js') -const fetch = require('./fetch.js') - -const makeFetchHappen = (url, opts) => { - const options = configureOptions(opts) - - const request = new Request(url, options) - return fetch(request, options) -} - -makeFetchHappen.defaults = (defaultUrl, defaultOptions = {}, wrappedFetch = makeFetchHappen) => { - if (typeof defaultUrl === 'object') { - defaultOptions = defaultUrl - defaultUrl = null - } - - const defaultedFetch = (url, options = {}) => { - const finalUrl = url || defaultUrl - const finalOptions = { - ...defaultOptions, - ...options, - headers: { - ...defaultOptions.headers, - ...options.headers, - }, - } - return wrappedFetch(finalUrl, finalOptions) - } - - defaultedFetch.defaults = (defaultUrl1, defaultOptions1 = {}) => - makeFetchHappen.defaults(defaultUrl1, defaultOptions1, defaultedFetch) - return defaultedFetch -} - -module.exports = makeFetchHappen -module.exports.FetchError = FetchError -module.exports.Headers = Headers -module.exports.Request = Request -module.exports.Response = Response diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/options.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/options.js deleted file mode 100644 index f77511279f831d..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/options.js +++ /dev/null @@ -1,54 +0,0 @@ -const dns = require('dns') - -const conditionalHeaders = [ - 'if-modified-since', - 'if-none-match', - 'if-unmodified-since', - 'if-match', - 'if-range', -] - -const configureOptions = (opts) => { - const { strictSSL, ...options } = { ...opts } - options.method = options.method ? options.method.toUpperCase() : 'GET' - options.rejectUnauthorized = strictSSL !== false - - if (!options.retry) { - options.retry = { retries: 0 } - } else if (typeof options.retry === 'string') { - const retries = parseInt(options.retry, 10) - if (isFinite(retries)) { - options.retry = { retries } - } else { - options.retry = { retries: 0 } - } - } else if (typeof options.retry === 'number') { - options.retry = { retries: options.retry } - } else { - options.retry = { retries: 0, ...options.retry } - } - - options.dns = { ttl: 5 * 60 * 1000, lookup: dns.lookup, ...options.dns } - - options.cache = options.cache || 'default' - if (options.cache === 'default') { - const hasConditionalHeader = Object.keys(options.headers || {}).some((name) => { - return conditionalHeaders.includes(name.toLowerCase()) - }) - if (hasConditionalHeader) { - options.cache = 'no-store' - } - } - - options.cacheAdditionalHeaders = options.cacheAdditionalHeaders || [] - - // cacheManager is deprecated, but if it's set and - // cachePath is not we should copy it to the new field - if (options.cacheManager && !options.cachePath) { - options.cachePath = options.cacheManager - } - - return options -} - -module.exports = configureOptions diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/pipeline.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/pipeline.js deleted file mode 100644 index b1d221b2d0ce31..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/pipeline.js +++ /dev/null @@ -1,41 +0,0 @@ -'use strict' - -const MinipassPipeline = require('minipass-pipeline') - -class CachingMinipassPipeline extends MinipassPipeline { - #events = [] - #data = new Map() - - constructor (opts, ...streams) { - // CRITICAL: do NOT pass the streams to the call to super(), this will start - // the flow of data and potentially cause the events we need to catch to emit - // before we've finished our own setup. instead we call super() with no args, - // finish our setup, and then push the streams into ourselves to start the - // data flow - super() - this.#events = opts.events - - /* istanbul ignore next - coverage disabled because this is pointless to test here */ - if (streams.length) { - this.push(...streams) - } - } - - on (event, handler) { - if (this.#events.includes(event) && this.#data.has(event)) { - return handler(...this.#data.get(event)) - } - - return super.on(event, handler) - } - - emit (event, ...data) { - if (this.#events.includes(event)) { - this.#data.set(event, data) - } - - return super.emit(event, ...data) - } -} - -module.exports = CachingMinipassPipeline diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/remote.js b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/remote.js deleted file mode 100644 index 8554564074de6e..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/lib/remote.js +++ /dev/null @@ -1,131 +0,0 @@ -const { Minipass } = require('minipass') -const fetch = require('minipass-fetch') -const promiseRetry = require('promise-retry') -const ssri = require('ssri') -const { log } = require('proc-log') - -const CachingMinipassPipeline = require('./pipeline.js') -const { getAgent } = require('@npmcli/agent') -const pkg = require('../package.json') - -const USER_AGENT = `${pkg.name}/${pkg.version} (+https://npm.im/${pkg.name})` - -const RETRY_ERRORS = [ - 'ECONNRESET', // remote socket closed on us - 'ECONNREFUSED', // remote host refused to open connection - 'EADDRINUSE', // failed to bind to a local port (proxy?) - 'ETIMEDOUT', // someone in the transaction is WAY TOO SLOW - // from @npmcli/agent - 'ECONNECTIONTIMEOUT', - 'EIDLETIMEOUT', - 'ERESPONSETIMEOUT', - 'ETRANSFERTIMEOUT', - // Known codes we do NOT retry on: - // ENOTFOUND (getaddrinfo failure. Either bad hostname, or offline) - // EINVALIDPROXY // invalid protocol from @npmcli/agent - // EINVALIDRESPONSE // invalid status code from @npmcli/agent -] - -const RETRY_TYPES = [ - 'request-timeout', -] - -// make a request directly to the remote source, -// retrying certain classes of errors as well as -// following redirects (through the cache if necessary) -// and verifying response integrity -const remoteFetch = (request, options) => { - const agent = getAgent(request.url, options) - if (!request.headers.has('connection')) { - request.headers.set('connection', agent ? 'keep-alive' : 'close') - } - - if (!request.headers.has('user-agent')) { - request.headers.set('user-agent', USER_AGENT) - } - - // keep our own options since we're overriding the agent - // and the redirect mode - const _opts = { - ...options, - agent, - redirect: 'manual', - } - - return promiseRetry(async (retryHandler, attemptNum) => { - const req = new fetch.Request(request, _opts) - try { - let res = await fetch(req, _opts) - if (_opts.integrity && res.status === 200) { - // we got a 200 response and the user has specified an expected - // integrity value, so wrap the response in an ssri stream to verify it - const integrityStream = ssri.integrityStream({ - algorithms: _opts.algorithms, - integrity: _opts.integrity, - size: _opts.size, - }) - const pipeline = new CachingMinipassPipeline({ - events: ['integrity', 'size'], - }, res.body, integrityStream) - // we also propagate the integrity and size events out to the pipeline so we can use - // this new response body as an integrityEmitter for cacache - integrityStream.on('integrity', i => pipeline.emit('integrity', i)) - integrityStream.on('size', s => pipeline.emit('size', s)) - res = new fetch.Response(pipeline, res) - // set an explicit flag so we know if our response body will emit integrity and size - res.body.hasIntegrityEmitter = true - } - - res.headers.set('x-fetch-attempts', attemptNum) - - // do not retry POST requests, or requests with a streaming body - // do retry requests with a 408, 420, 429 or 500+ status in the response - const isStream = Minipass.isStream(req.body) - const isRetriable = req.method !== 'POST' && - !isStream && - ([408, 420, 429].includes(res.status) || res.status >= 500) - - if (isRetriable) { - if (typeof options.onRetry === 'function') { - options.onRetry(res) - } - - /* eslint-disable-next-line max-len */ - log.http('fetch', `${req.method} ${req.url} attempt ${attemptNum} failed with ${res.status}`) - return retryHandler(res) - } - - return res - } catch (err) { - const code = (err.code === 'EPROMISERETRY') - ? err.retried.code - : err.code - - // err.retried will be the thing that was thrown from above - // if it's a response, we just got a bad status code and we - // can re-throw to allow the retry - const isRetryError = err.retried instanceof fetch.Response || - (RETRY_ERRORS.includes(code) && RETRY_TYPES.includes(err.type)) - - if (req.method === 'POST' || isRetryError) { - throw err - } - - if (typeof options.onRetry === 'function') { - options.onRetry(err) - } - - log.http('fetch', `${req.method} ${req.url} attempt ${attemptNum} failed with ${err.code}`) - return retryHandler(err) - } - }, options.retry).catch((err) => { - // don't reject for http errors, just return them - if (err.status >= 400 && err.type !== 'system') { - return err - } - - throw err - }) -} - -module.exports = remoteFetch diff --git a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/package.json b/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/package.json deleted file mode 100644 index 7adb4d1e7f9719..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/make-fetch-happen/package.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "name": "make-fetch-happen", - "version": "13.0.1", - "description": "Opinionated, caching, retrying fetch client", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "test": "tap", - "posttest": "npm run lint", - "eslint": "eslint", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "lintfix": "npm run lint -- --fix", - "postlint": "template-oss-check", - "snap": "tap", - "template-oss-apply": "template-oss-apply --force" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/make-fetch-happen.git" - }, - "keywords": [ - "http", - "request", - "fetch", - "mean girls", - "caching", - "cache", - "subresource integrity" - ], - "author": "GitHub Inc.", - "license": "ISC", - "dependencies": { - "@npmcli/agent": "^2.0.0", - "cacache": "^18.0.0", - "http-cache-semantics": "^4.1.1", - "is-lambda": "^1.0.1", - "minipass": "^7.0.2", - "minipass-fetch": "^3.0.0", - "minipass-flush": "^1.0.5", - "minipass-pipeline": "^1.2.4", - "negotiator": "^0.6.3", - "proc-log": "^4.2.0", - "promise-retry": "^2.0.1", - "ssri": "^10.0.0" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.21.4", - "nock": "^13.2.4", - "safe-buffer": "^5.2.1", - "standard-version": "^9.3.2", - "tap": "^16.0.0" - }, - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "tap": { - "color": 1, - "files": "test/*.js", - "check-coverage": true, - "timeout": 60, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.21.4", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/LICENSE deleted file mode 100644 index 3c3410cdc12ee3..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/LICENSE +++ /dev/null @@ -1,28 +0,0 @@ -The MIT License (MIT) - -Copyright (c) Isaac Z. Schlueter and Contributors -Copyright (c) 2016 David Frank - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - ---- - -Note: This is a derivative work based on "node-fetch" by David Frank, -modified and distributed under the terms of the MIT license above. -https://github.com/bitinn/node-fetch diff --git a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/abort-error.js b/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/abort-error.js deleted file mode 100644 index b18f643269e375..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/abort-error.js +++ /dev/null @@ -1,17 +0,0 @@ -'use strict' -class AbortError extends Error { - constructor (message) { - super(message) - this.code = 'FETCH_ABORTED' - this.type = 'aborted' - Error.captureStackTrace(this, this.constructor) - } - - get name () { - return 'AbortError' - } - - // don't allow name to be overridden, but don't throw either - set name (s) {} -} -module.exports = AbortError diff --git a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/blob.js b/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/blob.js deleted file mode 100644 index 121b1730102e72..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/blob.js +++ /dev/null @@ -1,97 +0,0 @@ -'use strict' -const { Minipass } = require('minipass') -const TYPE = Symbol('type') -const BUFFER = Symbol('buffer') - -class Blob { - constructor (blobParts, options) { - this[TYPE] = '' - - const buffers = [] - let size = 0 - - if (blobParts) { - const a = blobParts - const length = Number(a.length) - for (let i = 0; i < length; i++) { - const element = a[i] - const buffer = element instanceof Buffer ? element - : ArrayBuffer.isView(element) - ? Buffer.from(element.buffer, element.byteOffset, element.byteLength) - : element instanceof ArrayBuffer ? Buffer.from(element) - : element instanceof Blob ? element[BUFFER] - : typeof element === 'string' ? Buffer.from(element) - : Buffer.from(String(element)) - size += buffer.length - buffers.push(buffer) - } - } - - this[BUFFER] = Buffer.concat(buffers, size) - - const type = options && options.type !== undefined - && String(options.type).toLowerCase() - if (type && !/[^\u0020-\u007E]/.test(type)) { - this[TYPE] = type - } - } - - get size () { - return this[BUFFER].length - } - - get type () { - return this[TYPE] - } - - text () { - return Promise.resolve(this[BUFFER].toString()) - } - - arrayBuffer () { - const buf = this[BUFFER] - const off = buf.byteOffset - const len = buf.byteLength - const ab = buf.buffer.slice(off, off + len) - return Promise.resolve(ab) - } - - stream () { - return new Minipass().end(this[BUFFER]) - } - - slice (start, end, type) { - const size = this.size - const relativeStart = start === undefined ? 0 - : start < 0 ? Math.max(size + start, 0) - : Math.min(start, size) - const relativeEnd = end === undefined ? size - : end < 0 ? Math.max(size + end, 0) - : Math.min(end, size) - const span = Math.max(relativeEnd - relativeStart, 0) - - const buffer = this[BUFFER] - const slicedBuffer = buffer.slice( - relativeStart, - relativeStart + span - ) - const blob = new Blob([], { type }) - blob[BUFFER] = slicedBuffer - return blob - } - - get [Symbol.toStringTag] () { - return 'Blob' - } - - static get BUFFER () { - return BUFFER - } -} - -Object.defineProperties(Blob.prototype, { - size: { enumerable: true }, - type: { enumerable: true }, -}) - -module.exports = Blob diff --git a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/body.js b/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/body.js deleted file mode 100644 index 62286bd1de0d91..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/body.js +++ /dev/null @@ -1,350 +0,0 @@ -'use strict' -const { Minipass } = require('minipass') -const MinipassSized = require('minipass-sized') - -const Blob = require('./blob.js') -const { BUFFER } = Blob -const FetchError = require('./fetch-error.js') - -// optional dependency on 'encoding' -let convert -try { - convert = require('encoding').convert -} catch (e) { - // defer error until textConverted is called -} - -const INTERNALS = Symbol('Body internals') -const CONSUME_BODY = Symbol('consumeBody') - -class Body { - constructor (bodyArg, options = {}) { - const { size = 0, timeout = 0 } = options - const body = bodyArg === undefined || bodyArg === null ? null - : isURLSearchParams(bodyArg) ? Buffer.from(bodyArg.toString()) - : isBlob(bodyArg) ? bodyArg - : Buffer.isBuffer(bodyArg) ? bodyArg - : Object.prototype.toString.call(bodyArg) === '[object ArrayBuffer]' - ? Buffer.from(bodyArg) - : ArrayBuffer.isView(bodyArg) - ? Buffer.from(bodyArg.buffer, bodyArg.byteOffset, bodyArg.byteLength) - : Minipass.isStream(bodyArg) ? bodyArg - : Buffer.from(String(bodyArg)) - - this[INTERNALS] = { - body, - disturbed: false, - error: null, - } - - this.size = size - this.timeout = timeout - - if (Minipass.isStream(body)) { - body.on('error', er => { - const error = er.name === 'AbortError' ? er - : new FetchError(`Invalid response while trying to fetch ${ - this.url}: ${er.message}`, 'system', er) - this[INTERNALS].error = error - }) - } - } - - get body () { - return this[INTERNALS].body - } - - get bodyUsed () { - return this[INTERNALS].disturbed - } - - arrayBuffer () { - return this[CONSUME_BODY]().then(buf => - buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength)) - } - - blob () { - const ct = this.headers && this.headers.get('content-type') || '' - return this[CONSUME_BODY]().then(buf => Object.assign( - new Blob([], { type: ct.toLowerCase() }), - { [BUFFER]: buf } - )) - } - - async json () { - const buf = await this[CONSUME_BODY]() - try { - return JSON.parse(buf.toString()) - } catch (er) { - throw new FetchError( - `invalid json response body at ${this.url} reason: ${er.message}`, - 'invalid-json' - ) - } - } - - text () { - return this[CONSUME_BODY]().then(buf => buf.toString()) - } - - buffer () { - return this[CONSUME_BODY]() - } - - textConverted () { - return this[CONSUME_BODY]().then(buf => convertBody(buf, this.headers)) - } - - [CONSUME_BODY] () { - if (this[INTERNALS].disturbed) { - return Promise.reject(new TypeError(`body used already for: ${ - this.url}`)) - } - - this[INTERNALS].disturbed = true - - if (this[INTERNALS].error) { - return Promise.reject(this[INTERNALS].error) - } - - // body is null - if (this.body === null) { - return Promise.resolve(Buffer.alloc(0)) - } - - if (Buffer.isBuffer(this.body)) { - return Promise.resolve(this.body) - } - - const upstream = isBlob(this.body) ? this.body.stream() : this.body - - /* istanbul ignore if: should never happen */ - if (!Minipass.isStream(upstream)) { - return Promise.resolve(Buffer.alloc(0)) - } - - const stream = this.size && upstream instanceof MinipassSized ? upstream - : !this.size && upstream instanceof Minipass && - !(upstream instanceof MinipassSized) ? upstream - : this.size ? new MinipassSized({ size: this.size }) - : new Minipass() - - // allow timeout on slow response body, but only if the stream is still writable. this - // makes the timeout center on the socket stream from lib/index.js rather than the - // intermediary minipass stream we create to receive the data - const resTimeout = this.timeout && stream.writable ? setTimeout(() => { - stream.emit('error', new FetchError( - `Response timeout while trying to fetch ${ - this.url} (over ${this.timeout}ms)`, 'body-timeout')) - }, this.timeout) : null - - // do not keep the process open just for this timeout, even - // though we expect it'll get cleared eventually. - if (resTimeout && resTimeout.unref) { - resTimeout.unref() - } - - // do the pipe in the promise, because the pipe() can send too much - // data through right away and upset the MP Sized object - return new Promise((resolve) => { - // if the stream is some other kind of stream, then pipe through a MP - // so we can collect it more easily. - if (stream !== upstream) { - upstream.on('error', er => stream.emit('error', er)) - upstream.pipe(stream) - } - resolve() - }).then(() => stream.concat()).then(buf => { - clearTimeout(resTimeout) - return buf - }).catch(er => { - clearTimeout(resTimeout) - // request was aborted, reject with this Error - if (er.name === 'AbortError' || er.name === 'FetchError') { - throw er - } else if (er.name === 'RangeError') { - throw new FetchError(`Could not create Buffer from response body for ${ - this.url}: ${er.message}`, 'system', er) - } else { - // other errors, such as incorrect content-encoding or content-length - throw new FetchError(`Invalid response body while trying to fetch ${ - this.url}: ${er.message}`, 'system', er) - } - }) - } - - static clone (instance) { - if (instance.bodyUsed) { - throw new Error('cannot clone body after it is used') - } - - const body = instance.body - - // check that body is a stream and not form-data object - // NB: can't clone the form-data object without having it as a dependency - if (Minipass.isStream(body) && typeof body.getBoundary !== 'function') { - // create a dedicated tee stream so that we don't lose data - // potentially sitting in the body stream's buffer by writing it - // immediately to p1 and not having it for p2. - const tee = new Minipass() - const p1 = new Minipass() - const p2 = new Minipass() - tee.on('error', er => { - p1.emit('error', er) - p2.emit('error', er) - }) - body.on('error', er => tee.emit('error', er)) - tee.pipe(p1) - tee.pipe(p2) - body.pipe(tee) - // set instance body to one fork, return the other - instance[INTERNALS].body = p1 - return p2 - } else { - return instance.body - } - } - - static extractContentType (body) { - return body === null || body === undefined ? null - : typeof body === 'string' ? 'text/plain;charset=UTF-8' - : isURLSearchParams(body) - ? 'application/x-www-form-urlencoded;charset=UTF-8' - : isBlob(body) ? body.type || null - : Buffer.isBuffer(body) ? null - : Object.prototype.toString.call(body) === '[object ArrayBuffer]' ? null - : ArrayBuffer.isView(body) ? null - : typeof body.getBoundary === 'function' - ? `multipart/form-data;boundary=${body.getBoundary()}` - : Minipass.isStream(body) ? null - : 'text/plain;charset=UTF-8' - } - - static getTotalBytes (instance) { - const { body } = instance - return (body === null || body === undefined) ? 0 - : isBlob(body) ? body.size - : Buffer.isBuffer(body) ? body.length - : body && typeof body.getLengthSync === 'function' && ( - // detect form data input from form-data module - body._lengthRetrievers && - /* istanbul ignore next */ body._lengthRetrievers.length === 0 || // 1.x - body.hasKnownLength && body.hasKnownLength()) // 2.x - ? body.getLengthSync() - : null - } - - static writeToStream (dest, instance) { - const { body } = instance - - if (body === null || body === undefined) { - dest.end() - } else if (Buffer.isBuffer(body) || typeof body === 'string') { - dest.end(body) - } else { - // body is stream or blob - const stream = isBlob(body) ? body.stream() : body - stream.on('error', er => dest.emit('error', er)).pipe(dest) - } - - return dest - } -} - -Object.defineProperties(Body.prototype, { - body: { enumerable: true }, - bodyUsed: { enumerable: true }, - arrayBuffer: { enumerable: true }, - blob: { enumerable: true }, - json: { enumerable: true }, - text: { enumerable: true }, -}) - -const isURLSearchParams = obj => - // Duck-typing as a necessary condition. - (typeof obj !== 'object' || - typeof obj.append !== 'function' || - typeof obj.delete !== 'function' || - typeof obj.get !== 'function' || - typeof obj.getAll !== 'function' || - typeof obj.has !== 'function' || - typeof obj.set !== 'function') ? false - // Brand-checking and more duck-typing as optional condition. - : obj.constructor.name === 'URLSearchParams' || - Object.prototype.toString.call(obj) === '[object URLSearchParams]' || - typeof obj.sort === 'function' - -const isBlob = obj => - typeof obj === 'object' && - typeof obj.arrayBuffer === 'function' && - typeof obj.type === 'string' && - typeof obj.stream === 'function' && - typeof obj.constructor === 'function' && - typeof obj.constructor.name === 'string' && - /^(Blob|File)$/.test(obj.constructor.name) && - /^(Blob|File)$/.test(obj[Symbol.toStringTag]) - -const convertBody = (buffer, headers) => { - /* istanbul ignore if */ - if (typeof convert !== 'function') { - throw new Error('The package `encoding` must be installed to use the textConverted() function') - } - - const ct = headers && headers.get('content-type') - let charset = 'utf-8' - let res - - // header - if (ct) { - res = /charset=([^;]*)/i.exec(ct) - } - - // no charset in content type, peek at response body for at most 1024 bytes - const str = buffer.slice(0, 1024).toString() - - // html5 - if (!res && str) { - res = / this.expect - ? 'max-size' : type - this.message = message - Error.captureStackTrace(this, this.constructor) - } - - get name () { - return 'FetchError' - } - - // don't allow name to be overwritten - set name (n) {} - - get [Symbol.toStringTag] () { - return 'FetchError' - } -} -module.exports = FetchError diff --git a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/headers.js b/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/headers.js deleted file mode 100644 index dd6e854d5ba399..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/headers.js +++ /dev/null @@ -1,267 +0,0 @@ -'use strict' -const invalidTokenRegex = /[^^_`a-zA-Z\-0-9!#$%&'*+.|~]/ -const invalidHeaderCharRegex = /[^\t\x20-\x7e\x80-\xff]/ - -const validateName = name => { - name = `${name}` - if (invalidTokenRegex.test(name) || name === '') { - throw new TypeError(`${name} is not a legal HTTP header name`) - } -} - -const validateValue = value => { - value = `${value}` - if (invalidHeaderCharRegex.test(value)) { - throw new TypeError(`${value} is not a legal HTTP header value`) - } -} - -const find = (map, name) => { - name = name.toLowerCase() - for (const key in map) { - if (key.toLowerCase() === name) { - return key - } - } - return undefined -} - -const MAP = Symbol('map') -class Headers { - constructor (init = undefined) { - this[MAP] = Object.create(null) - if (init instanceof Headers) { - const rawHeaders = init.raw() - const headerNames = Object.keys(rawHeaders) - for (const headerName of headerNames) { - for (const value of rawHeaders[headerName]) { - this.append(headerName, value) - } - } - return - } - - // no-op - if (init === undefined || init === null) { - return - } - - if (typeof init === 'object') { - const method = init[Symbol.iterator] - if (method !== null && method !== undefined) { - if (typeof method !== 'function') { - throw new TypeError('Header pairs must be iterable') - } - - // sequence> - // Note: per spec we have to first exhaust the lists then process them - const pairs = [] - for (const pair of init) { - if (typeof pair !== 'object' || - typeof pair[Symbol.iterator] !== 'function') { - throw new TypeError('Each header pair must be iterable') - } - const arrPair = Array.from(pair) - if (arrPair.length !== 2) { - throw new TypeError('Each header pair must be a name/value tuple') - } - pairs.push(arrPair) - } - - for (const pair of pairs) { - this.append(pair[0], pair[1]) - } - } else { - // record - for (const key of Object.keys(init)) { - this.append(key, init[key]) - } - } - } else { - throw new TypeError('Provided initializer must be an object') - } - } - - get (name) { - name = `${name}` - validateName(name) - const key = find(this[MAP], name) - if (key === undefined) { - return null - } - - return this[MAP][key].join(', ') - } - - forEach (callback, thisArg = undefined) { - let pairs = getHeaders(this) - for (let i = 0; i < pairs.length; i++) { - const [name, value] = pairs[i] - callback.call(thisArg, value, name, this) - // refresh in case the callback added more headers - pairs = getHeaders(this) - } - } - - set (name, value) { - name = `${name}` - value = `${value}` - validateName(name) - validateValue(value) - const key = find(this[MAP], name) - this[MAP][key !== undefined ? key : name] = [value] - } - - append (name, value) { - name = `${name}` - value = `${value}` - validateName(name) - validateValue(value) - const key = find(this[MAP], name) - if (key !== undefined) { - this[MAP][key].push(value) - } else { - this[MAP][name] = [value] - } - } - - has (name) { - name = `${name}` - validateName(name) - return find(this[MAP], name) !== undefined - } - - delete (name) { - name = `${name}` - validateName(name) - const key = find(this[MAP], name) - if (key !== undefined) { - delete this[MAP][key] - } - } - - raw () { - return this[MAP] - } - - keys () { - return new HeadersIterator(this, 'key') - } - - values () { - return new HeadersIterator(this, 'value') - } - - [Symbol.iterator] () { - return new HeadersIterator(this, 'key+value') - } - - entries () { - return new HeadersIterator(this, 'key+value') - } - - get [Symbol.toStringTag] () { - return 'Headers' - } - - static exportNodeCompatibleHeaders (headers) { - const obj = Object.assign(Object.create(null), headers[MAP]) - - // http.request() only supports string as Host header. This hack makes - // specifying custom Host header possible. - const hostHeaderKey = find(headers[MAP], 'Host') - if (hostHeaderKey !== undefined) { - obj[hostHeaderKey] = obj[hostHeaderKey][0] - } - - return obj - } - - static createHeadersLenient (obj) { - const headers = new Headers() - for (const name of Object.keys(obj)) { - if (invalidTokenRegex.test(name)) { - continue - } - - if (Array.isArray(obj[name])) { - for (const val of obj[name]) { - if (invalidHeaderCharRegex.test(val)) { - continue - } - - if (headers[MAP][name] === undefined) { - headers[MAP][name] = [val] - } else { - headers[MAP][name].push(val) - } - } - } else if (!invalidHeaderCharRegex.test(obj[name])) { - headers[MAP][name] = [obj[name]] - } - } - return headers - } -} - -Object.defineProperties(Headers.prototype, { - get: { enumerable: true }, - forEach: { enumerable: true }, - set: { enumerable: true }, - append: { enumerable: true }, - has: { enumerable: true }, - delete: { enumerable: true }, - keys: { enumerable: true }, - values: { enumerable: true }, - entries: { enumerable: true }, -}) - -const getHeaders = (headers, kind = 'key+value') => - Object.keys(headers[MAP]).sort().map( - kind === 'key' ? k => k.toLowerCase() - : kind === 'value' ? k => headers[MAP][k].join(', ') - : k => [k.toLowerCase(), headers[MAP][k].join(', ')] - ) - -const INTERNAL = Symbol('internal') - -class HeadersIterator { - constructor (target, kind) { - this[INTERNAL] = { - target, - kind, - index: 0, - } - } - - get [Symbol.toStringTag] () { - return 'HeadersIterator' - } - - next () { - /* istanbul ignore if: should be impossible */ - if (!this || Object.getPrototypeOf(this) !== HeadersIterator.prototype) { - throw new TypeError('Value of `this` is not a HeadersIterator') - } - - const { target, kind, index } = this[INTERNAL] - const values = getHeaders(target, kind) - const len = values.length - if (index >= len) { - return { - value: undefined, - done: true, - } - } - - this[INTERNAL].index++ - - return { value: values[index], done: false } - } -} - -// manually extend because 'extends' requires a ctor -Object.setPrototypeOf(HeadersIterator.prototype, - Object.getPrototypeOf(Object.getPrototypeOf([][Symbol.iterator]()))) - -module.exports = Headers diff --git a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/index.js deleted file mode 100644 index da402161670e65..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/index.js +++ /dev/null @@ -1,377 +0,0 @@ -'use strict' -const { URL } = require('url') -const http = require('http') -const https = require('https') -const zlib = require('minizlib') -const { Minipass } = require('minipass') - -const Body = require('./body.js') -const { writeToStream, getTotalBytes } = Body -const Response = require('./response.js') -const Headers = require('./headers.js') -const { createHeadersLenient } = Headers -const Request = require('./request.js') -const { getNodeRequestOptions } = Request -const FetchError = require('./fetch-error.js') -const AbortError = require('./abort-error.js') - -// XXX this should really be split up and unit-ized for easier testing -// and better DRY implementation of data/http request aborting -const fetch = async (url, opts) => { - if (/^data:/.test(url)) { - const request = new Request(url, opts) - // delay 1 promise tick so that the consumer can abort right away - return Promise.resolve().then(() => new Promise((resolve, reject) => { - let type, data - try { - const { pathname, search } = new URL(url) - const split = pathname.split(',') - if (split.length < 2) { - throw new Error('invalid data: URI') - } - const mime = split.shift() - const base64 = /;base64$/.test(mime) - type = base64 ? mime.slice(0, -1 * ';base64'.length) : mime - const rawData = decodeURIComponent(split.join(',') + search) - data = base64 ? Buffer.from(rawData, 'base64') : Buffer.from(rawData) - } catch (er) { - return reject(new FetchError(`[${request.method}] ${ - request.url} invalid URL, ${er.message}`, 'system', er)) - } - - const { signal } = request - if (signal && signal.aborted) { - return reject(new AbortError('The user aborted a request.')) - } - - const headers = { 'Content-Length': data.length } - if (type) { - headers['Content-Type'] = type - } - return resolve(new Response(data, { headers })) - })) - } - - return new Promise((resolve, reject) => { - // build request object - const request = new Request(url, opts) - let options - try { - options = getNodeRequestOptions(request) - } catch (er) { - return reject(er) - } - - const send = (options.protocol === 'https:' ? https : http).request - const { signal } = request - let response = null - const abort = () => { - const error = new AbortError('The user aborted a request.') - reject(error) - if (Minipass.isStream(request.body) && - typeof request.body.destroy === 'function') { - request.body.destroy(error) - } - if (response && response.body) { - response.body.emit('error', error) - } - } - - if (signal && signal.aborted) { - return abort() - } - - const abortAndFinalize = () => { - abort() - finalize() - } - - const finalize = () => { - req.abort() - if (signal) { - signal.removeEventListener('abort', abortAndFinalize) - } - clearTimeout(reqTimeout) - } - - // send request - const req = send(options) - - if (signal) { - signal.addEventListener('abort', abortAndFinalize) - } - - let reqTimeout = null - if (request.timeout) { - req.once('socket', () => { - reqTimeout = setTimeout(() => { - reject(new FetchError(`network timeout at: ${ - request.url}`, 'request-timeout')) - finalize() - }, request.timeout) - }) - } - - req.on('error', er => { - // if a 'response' event is emitted before the 'error' event, then by the - // time this handler is run it's too late to reject the Promise for the - // response. instead, we forward the error event to the response stream - // so that the error will surface to the user when they try to consume - // the body. this is done as a side effect of aborting the request except - // for in windows, where we must forward the event manually, otherwise - // there is no longer a ref'd socket attached to the request and the - // stream never ends so the event loop runs out of work and the process - // exits without warning. - // coverage skipped here due to the difficulty in testing - // istanbul ignore next - if (req.res) { - req.res.emit('error', er) - } - reject(new FetchError(`request to ${request.url} failed, reason: ${ - er.message}`, 'system', er)) - finalize() - }) - - req.on('response', res => { - clearTimeout(reqTimeout) - - const headers = createHeadersLenient(res.headers) - - // HTTP fetch step 5 - if (fetch.isRedirect(res.statusCode)) { - // HTTP fetch step 5.2 - const location = headers.get('Location') - - // HTTP fetch step 5.3 - let locationURL = null - try { - locationURL = location === null ? null : new URL(location, request.url).toString() - } catch { - // error here can only be invalid URL in Location: header - // do not throw when options.redirect == manual - // let the user extract the errorneous redirect URL - if (request.redirect !== 'manual') { - /* eslint-disable-next-line max-len */ - reject(new FetchError(`uri requested responds with an invalid redirect URL: ${location}`, 'invalid-redirect')) - finalize() - return - } - } - - // HTTP fetch step 5.5 - if (request.redirect === 'error') { - reject(new FetchError('uri requested responds with a redirect, ' + - `redirect mode is set to error: ${request.url}`, 'no-redirect')) - finalize() - return - } else if (request.redirect === 'manual') { - // node-fetch-specific step: make manual redirect a bit easier to - // use by setting the Location header value to the resolved URL. - if (locationURL !== null) { - // handle corrupted header - try { - headers.set('Location', locationURL) - } catch (err) { - /* istanbul ignore next: nodejs server prevent invalid - response headers, we can't test this through normal - request */ - reject(err) - } - } - } else if (request.redirect === 'follow' && locationURL !== null) { - // HTTP-redirect fetch step 5 - if (request.counter >= request.follow) { - reject(new FetchError(`maximum redirect reached at: ${ - request.url}`, 'max-redirect')) - finalize() - return - } - - // HTTP-redirect fetch step 9 - if (res.statusCode !== 303 && - request.body && - getTotalBytes(request) === null) { - reject(new FetchError( - 'Cannot follow redirect with body being a readable stream', - 'unsupported-redirect' - )) - finalize() - return - } - - // Update host due to redirection - request.headers.set('host', (new URL(locationURL)).host) - - // HTTP-redirect fetch step 6 (counter increment) - // Create a new Request object. - const requestOpts = { - headers: new Headers(request.headers), - follow: request.follow, - counter: request.counter + 1, - agent: request.agent, - compress: request.compress, - method: request.method, - body: request.body, - signal: request.signal, - timeout: request.timeout, - } - - // if the redirect is to a new hostname, strip the authorization and cookie headers - const parsedOriginal = new URL(request.url) - const parsedRedirect = new URL(locationURL) - if (parsedOriginal.hostname !== parsedRedirect.hostname) { - requestOpts.headers.delete('authorization') - requestOpts.headers.delete('cookie') - } - - // HTTP-redirect fetch step 11 - if (res.statusCode === 303 || ( - (res.statusCode === 301 || res.statusCode === 302) && - request.method === 'POST' - )) { - requestOpts.method = 'GET' - requestOpts.body = undefined - requestOpts.headers.delete('content-length') - } - - // HTTP-redirect fetch step 15 - resolve(fetch(new Request(locationURL, requestOpts))) - finalize() - return - } - } // end if(isRedirect) - - // prepare response - res.once('end', () => - signal && signal.removeEventListener('abort', abortAndFinalize)) - - const body = new Minipass() - // if an error occurs, either on the response stream itself, on one of the - // decoder streams, or a response length timeout from the Body class, we - // forward the error through to our internal body stream. If we see an - // error event on that, we call finalize to abort the request and ensure - // we don't leave a socket believing a request is in flight. - // this is difficult to test, so lacks specific coverage. - body.on('error', finalize) - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - res.on('error', /* istanbul ignore next */ er => body.emit('error', er)) - res.on('data', (chunk) => body.write(chunk)) - res.on('end', () => body.end()) - - const responseOptions = { - url: request.url, - status: res.statusCode, - statusText: res.statusMessage, - headers: headers, - size: request.size, - timeout: request.timeout, - counter: request.counter, - trailer: new Promise(resolveTrailer => - res.on('end', () => resolveTrailer(createHeadersLenient(res.trailers)))), - } - - // HTTP-network fetch step 12.1.1.3 - const codings = headers.get('Content-Encoding') - - // HTTP-network fetch step 12.1.1.4: handle content codings - - // in following scenarios we ignore compression support - // 1. compression support is disabled - // 2. HEAD request - // 3. no Content-Encoding header - // 4. no content response (204) - // 5. content not modified response (304) - if (!request.compress || - request.method === 'HEAD' || - codings === null || - res.statusCode === 204 || - res.statusCode === 304) { - response = new Response(body, responseOptions) - resolve(response) - return - } - - // Be less strict when decoding compressed responses, since sometimes - // servers send slightly invalid responses that are still accepted - // by common browsers. - // Always using Z_SYNC_FLUSH is what cURL does. - const zlibOptions = { - flush: zlib.constants.Z_SYNC_FLUSH, - finishFlush: zlib.constants.Z_SYNC_FLUSH, - } - - // for gzip - if (codings === 'gzip' || codings === 'x-gzip') { - const unzip = new zlib.Gunzip(zlibOptions) - response = new Response( - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - body.on('error', /* istanbul ignore next */ er => unzip.emit('error', er)).pipe(unzip), - responseOptions - ) - resolve(response) - return - } - - // for deflate - if (codings === 'deflate' || codings === 'x-deflate') { - // handle the infamous raw deflate response from old servers - // a hack for old IIS and Apache servers - const raw = res.pipe(new Minipass()) - raw.once('data', chunk => { - // see http://stackoverflow.com/questions/37519828 - const decoder = (chunk[0] & 0x0F) === 0x08 - ? new zlib.Inflate() - : new zlib.InflateRaw() - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - body.on('error', /* istanbul ignore next */ er => decoder.emit('error', er)).pipe(decoder) - response = new Response(decoder, responseOptions) - resolve(response) - }) - return - } - - // for br - if (codings === 'br') { - // ignoring coverage so tests don't have to fake support (or lack of) for brotli - // istanbul ignore next - try { - var decoder = new zlib.BrotliDecompress() - } catch (err) { - reject(err) - finalize() - return - } - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - body.on('error', /* istanbul ignore next */ er => decoder.emit('error', er)).pipe(decoder) - response = new Response(decoder, responseOptions) - resolve(response) - return - } - - // otherwise, use response as-is - response = new Response(body, responseOptions) - resolve(response) - }) - - writeToStream(req, request) - }) -} - -module.exports = fetch - -fetch.isRedirect = code => - code === 301 || - code === 302 || - code === 303 || - code === 307 || - code === 308 - -fetch.Headers = Headers -fetch.Request = Request -fetch.Response = Response -fetch.FetchError = FetchError -fetch.AbortError = AbortError diff --git a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/request.js b/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/request.js deleted file mode 100644 index 054439e6699107..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/request.js +++ /dev/null @@ -1,282 +0,0 @@ -'use strict' -const { URL } = require('url') -const { Minipass } = require('minipass') -const Headers = require('./headers.js') -const { exportNodeCompatibleHeaders } = Headers -const Body = require('./body.js') -const { clone, extractContentType, getTotalBytes } = Body - -const version = require('../package.json').version -const defaultUserAgent = - `minipass-fetch/${version} (+https://github.com/isaacs/minipass-fetch)` - -const INTERNALS = Symbol('Request internals') - -const isRequest = input => - typeof input === 'object' && typeof input[INTERNALS] === 'object' - -const isAbortSignal = signal => { - const proto = ( - signal - && typeof signal === 'object' - && Object.getPrototypeOf(signal) - ) - return !!(proto && proto.constructor.name === 'AbortSignal') -} - -class Request extends Body { - constructor (input, init = {}) { - const parsedURL = isRequest(input) ? new URL(input.url) - : input && input.href ? new URL(input.href) - : new URL(`${input}`) - - if (isRequest(input)) { - init = { ...input[INTERNALS], ...init } - } else if (!input || typeof input === 'string') { - input = {} - } - - const method = (init.method || input.method || 'GET').toUpperCase() - const isGETHEAD = method === 'GET' || method === 'HEAD' - - if ((init.body !== null && init.body !== undefined || - isRequest(input) && input.body !== null) && isGETHEAD) { - throw new TypeError('Request with GET/HEAD method cannot have body') - } - - const inputBody = init.body !== null && init.body !== undefined ? init.body - : isRequest(input) && input.body !== null ? clone(input) - : null - - super(inputBody, { - timeout: init.timeout || input.timeout || 0, - size: init.size || input.size || 0, - }) - - const headers = new Headers(init.headers || input.headers || {}) - - if (inputBody !== null && inputBody !== undefined && - !headers.has('Content-Type')) { - const contentType = extractContentType(inputBody) - if (contentType) { - headers.append('Content-Type', contentType) - } - } - - const signal = 'signal' in init ? init.signal - : null - - if (signal !== null && signal !== undefined && !isAbortSignal(signal)) { - throw new TypeError('Expected signal must be an instanceof AbortSignal') - } - - // TLS specific options that are handled by node - const { - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized = process.env.NODE_TLS_REJECT_UNAUTHORIZED !== '0', - secureOptions, - secureProtocol, - servername, - sessionIdContext, - } = init - - this[INTERNALS] = { - method, - redirect: init.redirect || input.redirect || 'follow', - headers, - parsedURL, - signal, - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized, - secureOptions, - secureProtocol, - servername, - sessionIdContext, - } - - // node-fetch-only options - this.follow = init.follow !== undefined ? init.follow - : input.follow !== undefined ? input.follow - : 20 - this.compress = init.compress !== undefined ? init.compress - : input.compress !== undefined ? input.compress - : true - this.counter = init.counter || input.counter || 0 - this.agent = init.agent || input.agent - } - - get method () { - return this[INTERNALS].method - } - - get url () { - return this[INTERNALS].parsedURL.toString() - } - - get headers () { - return this[INTERNALS].headers - } - - get redirect () { - return this[INTERNALS].redirect - } - - get signal () { - return this[INTERNALS].signal - } - - clone () { - return new Request(this) - } - - get [Symbol.toStringTag] () { - return 'Request' - } - - static getNodeRequestOptions (request) { - const parsedURL = request[INTERNALS].parsedURL - const headers = new Headers(request[INTERNALS].headers) - - // fetch step 1.3 - if (!headers.has('Accept')) { - headers.set('Accept', '*/*') - } - - // Basic fetch - if (!/^https?:$/.test(parsedURL.protocol)) { - throw new TypeError('Only HTTP(S) protocols are supported') - } - - if (request.signal && - Minipass.isStream(request.body) && - typeof request.body.destroy !== 'function') { - throw new Error( - 'Cancellation of streamed requests with AbortSignal is not supported') - } - - // HTTP-network-or-cache fetch steps 2.4-2.7 - const contentLengthValue = - (request.body === null || request.body === undefined) && - /^(POST|PUT)$/i.test(request.method) ? '0' - : request.body !== null && request.body !== undefined - ? getTotalBytes(request) - : null - - if (contentLengthValue) { - headers.set('Content-Length', contentLengthValue + '') - } - - // HTTP-network-or-cache fetch step 2.11 - if (!headers.has('User-Agent')) { - headers.set('User-Agent', defaultUserAgent) - } - - // HTTP-network-or-cache fetch step 2.15 - if (request.compress && !headers.has('Accept-Encoding')) { - headers.set('Accept-Encoding', 'gzip,deflate') - } - - const agent = typeof request.agent === 'function' - ? request.agent(parsedURL) - : request.agent - - if (!headers.has('Connection') && !agent) { - headers.set('Connection', 'close') - } - - // TLS specific options that are handled by node - const { - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized, - secureOptions, - secureProtocol, - servername, - sessionIdContext, - } = request[INTERNALS] - - // HTTP-network fetch step 4.2 - // chunked encoding is handled by Node.js - - // we cannot spread parsedURL directly, so we have to read each property one-by-one - // and map them to the equivalent https?.request() method options - const urlProps = { - auth: parsedURL.username || parsedURL.password - ? `${parsedURL.username}:${parsedURL.password}` - : '', - host: parsedURL.host, - hostname: parsedURL.hostname, - path: `${parsedURL.pathname}${parsedURL.search}`, - port: parsedURL.port, - protocol: parsedURL.protocol, - } - - return { - ...urlProps, - method: request.method, - headers: exportNodeCompatibleHeaders(headers), - agent, - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized, - secureOptions, - secureProtocol, - servername, - sessionIdContext, - timeout: request.timeout, - } - } -} - -module.exports = Request - -Object.defineProperties(Request.prototype, { - method: { enumerable: true }, - url: { enumerable: true }, - headers: { enumerable: true }, - redirect: { enumerable: true }, - clone: { enumerable: true }, - signal: { enumerable: true }, -}) diff --git a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/response.js b/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/response.js deleted file mode 100644 index 54cb52db3594a7..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/lib/response.js +++ /dev/null @@ -1,90 +0,0 @@ -'use strict' -const http = require('http') -const { STATUS_CODES } = http - -const Headers = require('./headers.js') -const Body = require('./body.js') -const { clone, extractContentType } = Body - -const INTERNALS = Symbol('Response internals') - -class Response extends Body { - constructor (body = null, opts = {}) { - super(body, opts) - - const status = opts.status || 200 - const headers = new Headers(opts.headers) - - if (body !== null && body !== undefined && !headers.has('Content-Type')) { - const contentType = extractContentType(body) - if (contentType) { - headers.append('Content-Type', contentType) - } - } - - this[INTERNALS] = { - url: opts.url, - status, - statusText: opts.statusText || STATUS_CODES[status], - headers, - counter: opts.counter, - trailer: Promise.resolve(opts.trailer || new Headers()), - } - } - - get trailer () { - return this[INTERNALS].trailer - } - - get url () { - return this[INTERNALS].url || '' - } - - get status () { - return this[INTERNALS].status - } - - get ok () { - return this[INTERNALS].status >= 200 && this[INTERNALS].status < 300 - } - - get redirected () { - return this[INTERNALS].counter > 0 - } - - get statusText () { - return this[INTERNALS].statusText - } - - get headers () { - return this[INTERNALS].headers - } - - clone () { - return new Response(clone(this), { - url: this.url, - status: this.status, - statusText: this.statusText, - headers: this.headers, - ok: this.ok, - redirected: this.redirected, - trailer: this.trailer, - }) - } - - get [Symbol.toStringTag] () { - return 'Response' - } -} - -module.exports = Response - -Object.defineProperties(Response.prototype, { - url: { enumerable: true }, - status: { enumerable: true }, - ok: { enumerable: true }, - redirected: { enumerable: true }, - statusText: { enumerable: true }, - headers: { enumerable: true }, - clone: { enumerable: true }, -}) diff --git a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/package.json b/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/package.json deleted file mode 100644 index d491a7fba126d0..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/minipass-fetch/package.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "name": "minipass-fetch", - "version": "3.0.5", - "description": "An implementation of window.fetch in Node.js using Minipass streams", - "license": "MIT", - "main": "lib/index.js", - "scripts": { - "test:tls-fixtures": "./test/fixtures/tls/setup.sh", - "test": "tap", - "snap": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "lintfix": "npm run lint -- --fix", - "posttest": "npm run lint", - "template-oss-apply": "template-oss-apply --force" - }, - "tap": { - "coverage-map": "map.js", - "check-coverage": true, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "@ungap/url-search-params": "^0.2.2", - "abort-controller": "^3.0.0", - "abortcontroller-polyfill": "~1.7.3", - "encoding": "^0.1.13", - "form-data": "^4.0.0", - "nock": "^13.2.4", - "parted": "^0.1.1", - "string-to-arraybuffer": "^1.0.2", - "tap": "^16.0.0" - }, - "dependencies": { - "minipass": "^7.0.3", - "minipass-sized": "^1.0.3", - "minizlib": "^2.1.2" - }, - "optionalDependencies": { - "encoding": "^0.1.13" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/minipass-fetch.git" - }, - "keywords": [ - "fetch", - "minipass", - "node-fetch", - "window.fetch" - ], - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "author": "GitHub Inc.", - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.22.0", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/minizlib/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/minizlib/LICENSE new file mode 100644 index 00000000000000..49f7efe431c9ea --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/minizlib/LICENSE @@ -0,0 +1,26 @@ +Minizlib was created by Isaac Z. Schlueter. +It is a derivative work of the Node.js project. + +""" +Copyright (c) 2017-2023 Isaac Z. Schlueter and Contributors +Copyright (c) 2017-2023 Node.js contributors. All rights reserved. +Copyright (c) 2017-2023 Joyent, Inc. and other Node contributors. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" diff --git a/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/commonjs/constants.js b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/commonjs/constants.js new file mode 100644 index 00000000000000..dfc2c1957bfc99 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/commonjs/constants.js @@ -0,0 +1,123 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.constants = void 0; +// Update with any zlib constants that are added or changed in the future. +// Node v6 didn't export this, so we just hard code the version and rely +// on all the other hard-coded values from zlib v4736. When node v6 +// support drops, we can just export the realZlibConstants object. +const zlib_1 = __importDefault(require("zlib")); +/* c8 ignore start */ +const realZlibConstants = zlib_1.default.constants || { ZLIB_VERNUM: 4736 }; +/* c8 ignore stop */ +exports.constants = Object.freeze(Object.assign(Object.create(null), { + Z_NO_FLUSH: 0, + Z_PARTIAL_FLUSH: 1, + Z_SYNC_FLUSH: 2, + Z_FULL_FLUSH: 3, + Z_FINISH: 4, + Z_BLOCK: 5, + Z_OK: 0, + Z_STREAM_END: 1, + Z_NEED_DICT: 2, + Z_ERRNO: -1, + Z_STREAM_ERROR: -2, + Z_DATA_ERROR: -3, + Z_MEM_ERROR: -4, + Z_BUF_ERROR: -5, + Z_VERSION_ERROR: -6, + Z_NO_COMPRESSION: 0, + Z_BEST_SPEED: 1, + Z_BEST_COMPRESSION: 9, + Z_DEFAULT_COMPRESSION: -1, + Z_FILTERED: 1, + Z_HUFFMAN_ONLY: 2, + Z_RLE: 3, + Z_FIXED: 4, + Z_DEFAULT_STRATEGY: 0, + DEFLATE: 1, + INFLATE: 2, + GZIP: 3, + GUNZIP: 4, + DEFLATERAW: 5, + INFLATERAW: 6, + UNZIP: 7, + BROTLI_DECODE: 8, + BROTLI_ENCODE: 9, + Z_MIN_WINDOWBITS: 8, + Z_MAX_WINDOWBITS: 15, + Z_DEFAULT_WINDOWBITS: 15, + Z_MIN_CHUNK: 64, + Z_MAX_CHUNK: Infinity, + Z_DEFAULT_CHUNK: 16384, + Z_MIN_MEMLEVEL: 1, + Z_MAX_MEMLEVEL: 9, + Z_DEFAULT_MEMLEVEL: 8, + Z_MIN_LEVEL: -1, + Z_MAX_LEVEL: 9, + Z_DEFAULT_LEVEL: -1, + BROTLI_OPERATION_PROCESS: 0, + BROTLI_OPERATION_FLUSH: 1, + BROTLI_OPERATION_FINISH: 2, + BROTLI_OPERATION_EMIT_METADATA: 3, + BROTLI_MODE_GENERIC: 0, + BROTLI_MODE_TEXT: 1, + BROTLI_MODE_FONT: 2, + BROTLI_DEFAULT_MODE: 0, + BROTLI_MIN_QUALITY: 0, + BROTLI_MAX_QUALITY: 11, + BROTLI_DEFAULT_QUALITY: 11, + BROTLI_MIN_WINDOW_BITS: 10, + BROTLI_MAX_WINDOW_BITS: 24, + BROTLI_LARGE_MAX_WINDOW_BITS: 30, + BROTLI_DEFAULT_WINDOW: 22, + BROTLI_MIN_INPUT_BLOCK_BITS: 16, + BROTLI_MAX_INPUT_BLOCK_BITS: 24, + BROTLI_PARAM_MODE: 0, + BROTLI_PARAM_QUALITY: 1, + BROTLI_PARAM_LGWIN: 2, + BROTLI_PARAM_LGBLOCK: 3, + BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING: 4, + BROTLI_PARAM_SIZE_HINT: 5, + BROTLI_PARAM_LARGE_WINDOW: 6, + BROTLI_PARAM_NPOSTFIX: 7, + BROTLI_PARAM_NDIRECT: 8, + BROTLI_DECODER_RESULT_ERROR: 0, + BROTLI_DECODER_RESULT_SUCCESS: 1, + BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: 2, + BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: 3, + BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION: 0, + BROTLI_DECODER_PARAM_LARGE_WINDOW: 1, + BROTLI_DECODER_NO_ERROR: 0, + BROTLI_DECODER_SUCCESS: 1, + BROTLI_DECODER_NEEDS_MORE_INPUT: 2, + BROTLI_DECODER_NEEDS_MORE_OUTPUT: 3, + BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE: -1, + BROTLI_DECODER_ERROR_FORMAT_RESERVED: -2, + BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE: -3, + BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET: -4, + BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_SAME: -5, + BROTLI_DECODER_ERROR_FORMAT_CL_SPACE: -6, + BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE: -7, + BROTLI_DECODER_ERROR_FORMAT_CONTEXT_MAP_REPEAT: -8, + BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_1: -9, + BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2: -10, + BROTLI_DECODER_ERROR_FORMAT_TRANSFORM: -11, + BROTLI_DECODER_ERROR_FORMAT_DICTIONARY: -12, + BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS: -13, + BROTLI_DECODER_ERROR_FORMAT_PADDING_1: -14, + BROTLI_DECODER_ERROR_FORMAT_PADDING_2: -15, + BROTLI_DECODER_ERROR_FORMAT_DISTANCE: -16, + BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET: -19, + BROTLI_DECODER_ERROR_INVALID_ARGUMENTS: -20, + BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MODES: -21, + BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS: -22, + BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MAP: -25, + BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_1: -26, + BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2: -27, + BROTLI_DECODER_ERROR_ALLOC_BLOCK_TYPE_TREES: -30, + BROTLI_DECODER_ERROR_UNREACHABLE: -31, +}, realZlibConstants)); +//# sourceMappingURL=constants.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/commonjs/index.js b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/commonjs/index.js new file mode 100644 index 00000000000000..ad65eef0495076 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/commonjs/index.js @@ -0,0 +1,352 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.BrotliDecompress = exports.BrotliCompress = exports.Brotli = exports.Unzip = exports.InflateRaw = exports.DeflateRaw = exports.Gunzip = exports.Gzip = exports.Inflate = exports.Deflate = exports.Zlib = exports.ZlibError = exports.constants = void 0; +const assert_1 = __importDefault(require("assert")); +const buffer_1 = require("buffer"); +const minipass_1 = require("minipass"); +const zlib_1 = __importDefault(require("zlib")); +const constants_js_1 = require("./constants.js"); +var constants_js_2 = require("./constants.js"); +Object.defineProperty(exports, "constants", { enumerable: true, get: function () { return constants_js_2.constants; } }); +const OriginalBufferConcat = buffer_1.Buffer.concat; +const _superWrite = Symbol('_superWrite'); +class ZlibError extends Error { + code; + errno; + constructor(err) { + super('zlib: ' + err.message); + this.code = err.code; + this.errno = err.errno; + /* c8 ignore next */ + if (!this.code) + this.code = 'ZLIB_ERROR'; + this.message = 'zlib: ' + err.message; + Error.captureStackTrace(this, this.constructor); + } + get name() { + return 'ZlibError'; + } +} +exports.ZlibError = ZlibError; +// the Zlib class they all inherit from +// This thing manages the queue of requests, and returns +// true or false if there is anything in the queue when +// you call the .write() method. +const _flushFlag = Symbol('flushFlag'); +class ZlibBase extends minipass_1.Minipass { + #sawError = false; + #ended = false; + #flushFlag; + #finishFlushFlag; + #fullFlushFlag; + #handle; + #onError; + get sawError() { + return this.#sawError; + } + get handle() { + return this.#handle; + } + /* c8 ignore start */ + get flushFlag() { + return this.#flushFlag; + } + /* c8 ignore stop */ + constructor(opts, mode) { + if (!opts || typeof opts !== 'object') + throw new TypeError('invalid options for ZlibBase constructor'); + //@ts-ignore + super(opts); + /* c8 ignore start */ + this.#flushFlag = opts.flush ?? 0; + this.#finishFlushFlag = opts.finishFlush ?? 0; + this.#fullFlushFlag = opts.fullFlushFlag ?? 0; + /* c8 ignore stop */ + // this will throw if any options are invalid for the class selected + try { + // @types/node doesn't know that it exports the classes, but they're there + //@ts-ignore + this.#handle = new zlib_1.default[mode](opts); + } + catch (er) { + // make sure that all errors get decorated properly + throw new ZlibError(er); + } + this.#onError = err => { + // no sense raising multiple errors, since we abort on the first one. + if (this.#sawError) + return; + this.#sawError = true; + // there is no way to cleanly recover. + // continuing only obscures problems. + this.close(); + this.emit('error', err); + }; + this.#handle?.on('error', er => this.#onError(new ZlibError(er))); + this.once('end', () => this.close); + } + close() { + if (this.#handle) { + this.#handle.close(); + this.#handle = undefined; + this.emit('close'); + } + } + reset() { + if (!this.#sawError) { + (0, assert_1.default)(this.#handle, 'zlib binding closed'); + //@ts-ignore + return this.#handle.reset?.(); + } + } + flush(flushFlag) { + if (this.ended) + return; + if (typeof flushFlag !== 'number') + flushFlag = this.#fullFlushFlag; + this.write(Object.assign(buffer_1.Buffer.alloc(0), { [_flushFlag]: flushFlag })); + } + end(chunk, encoding, cb) { + /* c8 ignore start */ + if (typeof chunk === 'function') { + cb = chunk; + encoding = undefined; + chunk = undefined; + } + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + /* c8 ignore stop */ + if (chunk) { + if (encoding) + this.write(chunk, encoding); + else + this.write(chunk); + } + this.flush(this.#finishFlushFlag); + this.#ended = true; + return super.end(cb); + } + get ended() { + return this.#ended; + } + // overridden in the gzip classes to do portable writes + [_superWrite](data) { + return super.write(data); + } + write(chunk, encoding, cb) { + // process the chunk using the sync process + // then super.write() all the outputted chunks + if (typeof encoding === 'function') + (cb = encoding), (encoding = 'utf8'); + if (typeof chunk === 'string') + chunk = buffer_1.Buffer.from(chunk, encoding); + if (this.#sawError) + return; + (0, assert_1.default)(this.#handle, 'zlib binding closed'); + // _processChunk tries to .close() the native handle after it's done, so we + // intercept that by temporarily making it a no-op. + // diving into the node:zlib internals a bit here + const nativeHandle = this.#handle + ._handle; + const originalNativeClose = nativeHandle.close; + nativeHandle.close = () => { }; + const originalClose = this.#handle.close; + this.#handle.close = () => { }; + // It also calls `Buffer.concat()` at the end, which may be convenient + // for some, but which we are not interested in as it slows us down. + buffer_1.Buffer.concat = args => args; + let result = undefined; + try { + const flushFlag = typeof chunk[_flushFlag] === 'number' + ? chunk[_flushFlag] + : this.#flushFlag; + result = this.#handle._processChunk(chunk, flushFlag); + // if we don't throw, reset it back how it was + buffer_1.Buffer.concat = OriginalBufferConcat; + } + catch (err) { + // or if we do, put Buffer.concat() back before we emit error + // Error events call into user code, which may call Buffer.concat() + buffer_1.Buffer.concat = OriginalBufferConcat; + this.#onError(new ZlibError(err)); + } + finally { + if (this.#handle) { + // Core zlib resets `_handle` to null after attempting to close the + // native handle. Our no-op handler prevented actual closure, but we + // need to restore the `._handle` property. + ; + this.#handle._handle = + nativeHandle; + nativeHandle.close = originalNativeClose; + this.#handle.close = originalClose; + // `_processChunk()` adds an 'error' listener. If we don't remove it + // after each call, these handlers start piling up. + this.#handle.removeAllListeners('error'); + // make sure OUR error listener is still attached tho + } + } + if (this.#handle) + this.#handle.on('error', er => this.#onError(new ZlibError(er))); + let writeReturn; + if (result) { + if (Array.isArray(result) && result.length > 0) { + const r = result[0]; + // The first buffer is always `handle._outBuffer`, which would be + // re-used for later invocations; so, we always have to copy that one. + writeReturn = this[_superWrite](buffer_1.Buffer.from(r)); + for (let i = 1; i < result.length; i++) { + writeReturn = this[_superWrite](result[i]); + } + } + else { + // either a single Buffer or an empty array + writeReturn = this[_superWrite](buffer_1.Buffer.from(result)); + } + } + if (cb) + cb(); + return writeReturn; + } +} +class Zlib extends ZlibBase { + #level; + #strategy; + constructor(opts, mode) { + opts = opts || {}; + opts.flush = opts.flush || constants_js_1.constants.Z_NO_FLUSH; + opts.finishFlush = opts.finishFlush || constants_js_1.constants.Z_FINISH; + opts.fullFlushFlag = constants_js_1.constants.Z_FULL_FLUSH; + super(opts, mode); + this.#level = opts.level; + this.#strategy = opts.strategy; + } + params(level, strategy) { + if (this.sawError) + return; + if (!this.handle) + throw new Error('cannot switch params when binding is closed'); + // no way to test this without also not supporting params at all + /* c8 ignore start */ + if (!this.handle.params) + throw new Error('not supported in this implementation'); + /* c8 ignore stop */ + if (this.#level !== level || this.#strategy !== strategy) { + this.flush(constants_js_1.constants.Z_SYNC_FLUSH); + (0, assert_1.default)(this.handle, 'zlib binding closed'); + // .params() calls .flush(), but the latter is always async in the + // core zlib. We override .flush() temporarily to intercept that and + // flush synchronously. + const origFlush = this.handle.flush; + this.handle.flush = (flushFlag, cb) => { + /* c8 ignore start */ + if (typeof flushFlag === 'function') { + cb = flushFlag; + flushFlag = this.flushFlag; + } + /* c8 ignore stop */ + this.flush(flushFlag); + cb?.(); + }; + try { + ; + this.handle.params(level, strategy); + } + finally { + this.handle.flush = origFlush; + } + /* c8 ignore start */ + if (this.handle) { + this.#level = level; + this.#strategy = strategy; + } + /* c8 ignore stop */ + } + } +} +exports.Zlib = Zlib; +// minimal 2-byte header +class Deflate extends Zlib { + constructor(opts) { + super(opts, 'Deflate'); + } +} +exports.Deflate = Deflate; +class Inflate extends Zlib { + constructor(opts) { + super(opts, 'Inflate'); + } +} +exports.Inflate = Inflate; +class Gzip extends Zlib { + #portable; + constructor(opts) { + super(opts, 'Gzip'); + this.#portable = opts && !!opts.portable; + } + [_superWrite](data) { + if (!this.#portable) + return super[_superWrite](data); + // we'll always get the header emitted in one first chunk + // overwrite the OS indicator byte with 0xFF + this.#portable = false; + data[9] = 255; + return super[_superWrite](data); + } +} +exports.Gzip = Gzip; +class Gunzip extends Zlib { + constructor(opts) { + super(opts, 'Gunzip'); + } +} +exports.Gunzip = Gunzip; +// raw - no header +class DeflateRaw extends Zlib { + constructor(opts) { + super(opts, 'DeflateRaw'); + } +} +exports.DeflateRaw = DeflateRaw; +class InflateRaw extends Zlib { + constructor(opts) { + super(opts, 'InflateRaw'); + } +} +exports.InflateRaw = InflateRaw; +// auto-detect header. +class Unzip extends Zlib { + constructor(opts) { + super(opts, 'Unzip'); + } +} +exports.Unzip = Unzip; +class Brotli extends ZlibBase { + constructor(opts, mode) { + opts = opts || {}; + opts.flush = opts.flush || constants_js_1.constants.BROTLI_OPERATION_PROCESS; + opts.finishFlush = + opts.finishFlush || constants_js_1.constants.BROTLI_OPERATION_FINISH; + opts.fullFlushFlag = constants_js_1.constants.BROTLI_OPERATION_FLUSH; + super(opts, mode); + } +} +exports.Brotli = Brotli; +class BrotliCompress extends Brotli { + constructor(opts) { + super(opts, 'BrotliCompress'); + } +} +exports.BrotliCompress = BrotliCompress; +class BrotliDecompress extends Brotli { + constructor(opts) { + super(opts, 'BrotliDecompress'); + } +} +exports.BrotliDecompress = BrotliDecompress; +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/commonjs/package.json b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/commonjs/package.json new file mode 100644 index 00000000000000..5bbefffbabee39 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/commonjs/package.json @@ -0,0 +1,3 @@ +{ + "type": "commonjs" +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/esm/constants.js b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/esm/constants.js new file mode 100644 index 00000000000000..7faf40be5068d0 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/esm/constants.js @@ -0,0 +1,117 @@ +// Update with any zlib constants that are added or changed in the future. +// Node v6 didn't export this, so we just hard code the version and rely +// on all the other hard-coded values from zlib v4736. When node v6 +// support drops, we can just export the realZlibConstants object. +import realZlib from 'zlib'; +/* c8 ignore start */ +const realZlibConstants = realZlib.constants || { ZLIB_VERNUM: 4736 }; +/* c8 ignore stop */ +export const constants = Object.freeze(Object.assign(Object.create(null), { + Z_NO_FLUSH: 0, + Z_PARTIAL_FLUSH: 1, + Z_SYNC_FLUSH: 2, + Z_FULL_FLUSH: 3, + Z_FINISH: 4, + Z_BLOCK: 5, + Z_OK: 0, + Z_STREAM_END: 1, + Z_NEED_DICT: 2, + Z_ERRNO: -1, + Z_STREAM_ERROR: -2, + Z_DATA_ERROR: -3, + Z_MEM_ERROR: -4, + Z_BUF_ERROR: -5, + Z_VERSION_ERROR: -6, + Z_NO_COMPRESSION: 0, + Z_BEST_SPEED: 1, + Z_BEST_COMPRESSION: 9, + Z_DEFAULT_COMPRESSION: -1, + Z_FILTERED: 1, + Z_HUFFMAN_ONLY: 2, + Z_RLE: 3, + Z_FIXED: 4, + Z_DEFAULT_STRATEGY: 0, + DEFLATE: 1, + INFLATE: 2, + GZIP: 3, + GUNZIP: 4, + DEFLATERAW: 5, + INFLATERAW: 6, + UNZIP: 7, + BROTLI_DECODE: 8, + BROTLI_ENCODE: 9, + Z_MIN_WINDOWBITS: 8, + Z_MAX_WINDOWBITS: 15, + Z_DEFAULT_WINDOWBITS: 15, + Z_MIN_CHUNK: 64, + Z_MAX_CHUNK: Infinity, + Z_DEFAULT_CHUNK: 16384, + Z_MIN_MEMLEVEL: 1, + Z_MAX_MEMLEVEL: 9, + Z_DEFAULT_MEMLEVEL: 8, + Z_MIN_LEVEL: -1, + Z_MAX_LEVEL: 9, + Z_DEFAULT_LEVEL: -1, + BROTLI_OPERATION_PROCESS: 0, + BROTLI_OPERATION_FLUSH: 1, + BROTLI_OPERATION_FINISH: 2, + BROTLI_OPERATION_EMIT_METADATA: 3, + BROTLI_MODE_GENERIC: 0, + BROTLI_MODE_TEXT: 1, + BROTLI_MODE_FONT: 2, + BROTLI_DEFAULT_MODE: 0, + BROTLI_MIN_QUALITY: 0, + BROTLI_MAX_QUALITY: 11, + BROTLI_DEFAULT_QUALITY: 11, + BROTLI_MIN_WINDOW_BITS: 10, + BROTLI_MAX_WINDOW_BITS: 24, + BROTLI_LARGE_MAX_WINDOW_BITS: 30, + BROTLI_DEFAULT_WINDOW: 22, + BROTLI_MIN_INPUT_BLOCK_BITS: 16, + BROTLI_MAX_INPUT_BLOCK_BITS: 24, + BROTLI_PARAM_MODE: 0, + BROTLI_PARAM_QUALITY: 1, + BROTLI_PARAM_LGWIN: 2, + BROTLI_PARAM_LGBLOCK: 3, + BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING: 4, + BROTLI_PARAM_SIZE_HINT: 5, + BROTLI_PARAM_LARGE_WINDOW: 6, + BROTLI_PARAM_NPOSTFIX: 7, + BROTLI_PARAM_NDIRECT: 8, + BROTLI_DECODER_RESULT_ERROR: 0, + BROTLI_DECODER_RESULT_SUCCESS: 1, + BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: 2, + BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: 3, + BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION: 0, + BROTLI_DECODER_PARAM_LARGE_WINDOW: 1, + BROTLI_DECODER_NO_ERROR: 0, + BROTLI_DECODER_SUCCESS: 1, + BROTLI_DECODER_NEEDS_MORE_INPUT: 2, + BROTLI_DECODER_NEEDS_MORE_OUTPUT: 3, + BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE: -1, + BROTLI_DECODER_ERROR_FORMAT_RESERVED: -2, + BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE: -3, + BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET: -4, + BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_SAME: -5, + BROTLI_DECODER_ERROR_FORMAT_CL_SPACE: -6, + BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE: -7, + BROTLI_DECODER_ERROR_FORMAT_CONTEXT_MAP_REPEAT: -8, + BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_1: -9, + BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2: -10, + BROTLI_DECODER_ERROR_FORMAT_TRANSFORM: -11, + BROTLI_DECODER_ERROR_FORMAT_DICTIONARY: -12, + BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS: -13, + BROTLI_DECODER_ERROR_FORMAT_PADDING_1: -14, + BROTLI_DECODER_ERROR_FORMAT_PADDING_2: -15, + BROTLI_DECODER_ERROR_FORMAT_DISTANCE: -16, + BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET: -19, + BROTLI_DECODER_ERROR_INVALID_ARGUMENTS: -20, + BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MODES: -21, + BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS: -22, + BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MAP: -25, + BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_1: -26, + BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2: -27, + BROTLI_DECODER_ERROR_ALLOC_BLOCK_TYPE_TREES: -30, + BROTLI_DECODER_ERROR_UNREACHABLE: -31, +}, realZlibConstants)); +//# sourceMappingURL=constants.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/esm/index.js b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/esm/index.js new file mode 100644 index 00000000000000..a6269b505f47cc --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/esm/index.js @@ -0,0 +1,333 @@ +import assert from 'assert'; +import { Buffer } from 'buffer'; +import { Minipass } from 'minipass'; +import realZlib from 'zlib'; +import { constants } from './constants.js'; +export { constants } from './constants.js'; +const OriginalBufferConcat = Buffer.concat; +const _superWrite = Symbol('_superWrite'); +export class ZlibError extends Error { + code; + errno; + constructor(err) { + super('zlib: ' + err.message); + this.code = err.code; + this.errno = err.errno; + /* c8 ignore next */ + if (!this.code) + this.code = 'ZLIB_ERROR'; + this.message = 'zlib: ' + err.message; + Error.captureStackTrace(this, this.constructor); + } + get name() { + return 'ZlibError'; + } +} +// the Zlib class they all inherit from +// This thing manages the queue of requests, and returns +// true or false if there is anything in the queue when +// you call the .write() method. +const _flushFlag = Symbol('flushFlag'); +class ZlibBase extends Minipass { + #sawError = false; + #ended = false; + #flushFlag; + #finishFlushFlag; + #fullFlushFlag; + #handle; + #onError; + get sawError() { + return this.#sawError; + } + get handle() { + return this.#handle; + } + /* c8 ignore start */ + get flushFlag() { + return this.#flushFlag; + } + /* c8 ignore stop */ + constructor(opts, mode) { + if (!opts || typeof opts !== 'object') + throw new TypeError('invalid options for ZlibBase constructor'); + //@ts-ignore + super(opts); + /* c8 ignore start */ + this.#flushFlag = opts.flush ?? 0; + this.#finishFlushFlag = opts.finishFlush ?? 0; + this.#fullFlushFlag = opts.fullFlushFlag ?? 0; + /* c8 ignore stop */ + // this will throw if any options are invalid for the class selected + try { + // @types/node doesn't know that it exports the classes, but they're there + //@ts-ignore + this.#handle = new realZlib[mode](opts); + } + catch (er) { + // make sure that all errors get decorated properly + throw new ZlibError(er); + } + this.#onError = err => { + // no sense raising multiple errors, since we abort on the first one. + if (this.#sawError) + return; + this.#sawError = true; + // there is no way to cleanly recover. + // continuing only obscures problems. + this.close(); + this.emit('error', err); + }; + this.#handle?.on('error', er => this.#onError(new ZlibError(er))); + this.once('end', () => this.close); + } + close() { + if (this.#handle) { + this.#handle.close(); + this.#handle = undefined; + this.emit('close'); + } + } + reset() { + if (!this.#sawError) { + assert(this.#handle, 'zlib binding closed'); + //@ts-ignore + return this.#handle.reset?.(); + } + } + flush(flushFlag) { + if (this.ended) + return; + if (typeof flushFlag !== 'number') + flushFlag = this.#fullFlushFlag; + this.write(Object.assign(Buffer.alloc(0), { [_flushFlag]: flushFlag })); + } + end(chunk, encoding, cb) { + /* c8 ignore start */ + if (typeof chunk === 'function') { + cb = chunk; + encoding = undefined; + chunk = undefined; + } + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + /* c8 ignore stop */ + if (chunk) { + if (encoding) + this.write(chunk, encoding); + else + this.write(chunk); + } + this.flush(this.#finishFlushFlag); + this.#ended = true; + return super.end(cb); + } + get ended() { + return this.#ended; + } + // overridden in the gzip classes to do portable writes + [_superWrite](data) { + return super.write(data); + } + write(chunk, encoding, cb) { + // process the chunk using the sync process + // then super.write() all the outputted chunks + if (typeof encoding === 'function') + (cb = encoding), (encoding = 'utf8'); + if (typeof chunk === 'string') + chunk = Buffer.from(chunk, encoding); + if (this.#sawError) + return; + assert(this.#handle, 'zlib binding closed'); + // _processChunk tries to .close() the native handle after it's done, so we + // intercept that by temporarily making it a no-op. + // diving into the node:zlib internals a bit here + const nativeHandle = this.#handle + ._handle; + const originalNativeClose = nativeHandle.close; + nativeHandle.close = () => { }; + const originalClose = this.#handle.close; + this.#handle.close = () => { }; + // It also calls `Buffer.concat()` at the end, which may be convenient + // for some, but which we are not interested in as it slows us down. + Buffer.concat = args => args; + let result = undefined; + try { + const flushFlag = typeof chunk[_flushFlag] === 'number' + ? chunk[_flushFlag] + : this.#flushFlag; + result = this.#handle._processChunk(chunk, flushFlag); + // if we don't throw, reset it back how it was + Buffer.concat = OriginalBufferConcat; + } + catch (err) { + // or if we do, put Buffer.concat() back before we emit error + // Error events call into user code, which may call Buffer.concat() + Buffer.concat = OriginalBufferConcat; + this.#onError(new ZlibError(err)); + } + finally { + if (this.#handle) { + // Core zlib resets `_handle` to null after attempting to close the + // native handle. Our no-op handler prevented actual closure, but we + // need to restore the `._handle` property. + ; + this.#handle._handle = + nativeHandle; + nativeHandle.close = originalNativeClose; + this.#handle.close = originalClose; + // `_processChunk()` adds an 'error' listener. If we don't remove it + // after each call, these handlers start piling up. + this.#handle.removeAllListeners('error'); + // make sure OUR error listener is still attached tho + } + } + if (this.#handle) + this.#handle.on('error', er => this.#onError(new ZlibError(er))); + let writeReturn; + if (result) { + if (Array.isArray(result) && result.length > 0) { + const r = result[0]; + // The first buffer is always `handle._outBuffer`, which would be + // re-used for later invocations; so, we always have to copy that one. + writeReturn = this[_superWrite](Buffer.from(r)); + for (let i = 1; i < result.length; i++) { + writeReturn = this[_superWrite](result[i]); + } + } + else { + // either a single Buffer or an empty array + writeReturn = this[_superWrite](Buffer.from(result)); + } + } + if (cb) + cb(); + return writeReturn; + } +} +export class Zlib extends ZlibBase { + #level; + #strategy; + constructor(opts, mode) { + opts = opts || {}; + opts.flush = opts.flush || constants.Z_NO_FLUSH; + opts.finishFlush = opts.finishFlush || constants.Z_FINISH; + opts.fullFlushFlag = constants.Z_FULL_FLUSH; + super(opts, mode); + this.#level = opts.level; + this.#strategy = opts.strategy; + } + params(level, strategy) { + if (this.sawError) + return; + if (!this.handle) + throw new Error('cannot switch params when binding is closed'); + // no way to test this without also not supporting params at all + /* c8 ignore start */ + if (!this.handle.params) + throw new Error('not supported in this implementation'); + /* c8 ignore stop */ + if (this.#level !== level || this.#strategy !== strategy) { + this.flush(constants.Z_SYNC_FLUSH); + assert(this.handle, 'zlib binding closed'); + // .params() calls .flush(), but the latter is always async in the + // core zlib. We override .flush() temporarily to intercept that and + // flush synchronously. + const origFlush = this.handle.flush; + this.handle.flush = (flushFlag, cb) => { + /* c8 ignore start */ + if (typeof flushFlag === 'function') { + cb = flushFlag; + flushFlag = this.flushFlag; + } + /* c8 ignore stop */ + this.flush(flushFlag); + cb?.(); + }; + try { + ; + this.handle.params(level, strategy); + } + finally { + this.handle.flush = origFlush; + } + /* c8 ignore start */ + if (this.handle) { + this.#level = level; + this.#strategy = strategy; + } + /* c8 ignore stop */ + } + } +} +// minimal 2-byte header +export class Deflate extends Zlib { + constructor(opts) { + super(opts, 'Deflate'); + } +} +export class Inflate extends Zlib { + constructor(opts) { + super(opts, 'Inflate'); + } +} +export class Gzip extends Zlib { + #portable; + constructor(opts) { + super(opts, 'Gzip'); + this.#portable = opts && !!opts.portable; + } + [_superWrite](data) { + if (!this.#portable) + return super[_superWrite](data); + // we'll always get the header emitted in one first chunk + // overwrite the OS indicator byte with 0xFF + this.#portable = false; + data[9] = 255; + return super[_superWrite](data); + } +} +export class Gunzip extends Zlib { + constructor(opts) { + super(opts, 'Gunzip'); + } +} +// raw - no header +export class DeflateRaw extends Zlib { + constructor(opts) { + super(opts, 'DeflateRaw'); + } +} +export class InflateRaw extends Zlib { + constructor(opts) { + super(opts, 'InflateRaw'); + } +} +// auto-detect header. +export class Unzip extends Zlib { + constructor(opts) { + super(opts, 'Unzip'); + } +} +export class Brotli extends ZlibBase { + constructor(opts, mode) { + opts = opts || {}; + opts.flush = opts.flush || constants.BROTLI_OPERATION_PROCESS; + opts.finishFlush = + opts.finishFlush || constants.BROTLI_OPERATION_FINISH; + opts.fullFlushFlag = constants.BROTLI_OPERATION_FLUSH; + super(opts, mode); + } +} +export class BrotliCompress extends Brotli { + constructor(opts) { + super(opts, 'BrotliCompress'); + } +} +export class BrotliDecompress extends Brotli { + constructor(opts) { + super(opts, 'BrotliDecompress'); + } +} +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/esm/package.json b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/esm/package.json new file mode 100644 index 00000000000000..3dbc1ca591c055 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/minizlib/dist/esm/package.json @@ -0,0 +1,3 @@ +{ + "type": "module" +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/minizlib/package.json b/deps/npm/node_modules/node-gyp/node_modules/minizlib/package.json new file mode 100644 index 00000000000000..e94623ff43d353 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/minizlib/package.json @@ -0,0 +1,81 @@ +{ + "name": "minizlib", + "version": "3.0.1", + "description": "A small fast zlib stream built on [minipass](http://npm.im/minipass) and Node.js's zlib binding.", + "main": "./dist/commonjs/index.js", + "dependencies": { + "minipass": "^7.0.4", + "rimraf": "^5.0.5" + }, + "scripts": { + "prepare": "tshy", + "pretest": "npm run prepare", + "test": "tap", + "preversion": "npm test", + "postversion": "npm publish", + "prepublishOnly": "git push origin --follow-tags", + "format": "prettier --write . --loglevel warn", + "typedoc": "typedoc --tsconfig .tshy/esm.json ./src/*.ts" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/isaacs/minizlib.git" + }, + "keywords": [ + "zlib", + "gzip", + "gunzip", + "deflate", + "inflate", + "compression", + "zip", + "unzip" + ], + "author": "Isaac Z. Schlueter (http://blog.izs.me/)", + "license": "MIT", + "devDependencies": { + "@types/node": "^20.11.29", + "mkdirp": "^3.0.1", + "tap": "^18.7.1", + "tshy": "^1.12.0", + "typedoc": "^0.25.12" + }, + "files": [ + "dist" + ], + "engines": { + "node": ">= 18" + }, + "tshy": { + "exports": { + "./package.json": "./package.json", + ".": "./src/index.ts" + } + }, + "exports": { + "./package.json": "./package.json", + ".": { + "import": { + "types": "./dist/esm/index.d.ts", + "default": "./dist/esm/index.js" + }, + "require": { + "types": "./dist/commonjs/index.d.ts", + "default": "./dist/commonjs/index.js" + } + } + }, + "types": "./dist/commonjs/index.d.ts", + "type": "module", + "prettier": { + "semi": false, + "printWidth": 75, + "tabWidth": 2, + "useTabs": false, + "singleQuote": true, + "jsxSingleQuote": false, + "bracketSameLine": true, + "arrowParens": "avoid", + "endOfLine": "lf" + } +} diff --git a/deps/npm/node_modules/is-lambda/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/LICENSE similarity index 79% rename from deps/npm/node_modules/is-lambda/LICENSE rename to deps/npm/node_modules/node-gyp/node_modules/mkdirp/LICENSE index 4a59c94175c2a3..0a034db7a73b5d 100644 --- a/deps/npm/node_modules/is-lambda/LICENSE +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/LICENSE @@ -1,6 +1,6 @@ -The MIT License (MIT) +Copyright (c) 2011-2023 James Halliday (mail@substack.net) and Isaac Z. Schlueter (i@izs.me) -Copyright (c) 2016-2017 Thomas Watson Steen +This project is free software released under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -9,13 +9,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/package.json b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/package.json new file mode 100644 index 00000000000000..9d04a66e16cd93 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/package.json @@ -0,0 +1,91 @@ +{ + "name": "mkdirp", + "description": "Recursively mkdir, like `mkdir -p`", + "version": "3.0.1", + "keywords": [ + "mkdir", + "directory", + "make dir", + "make", + "dir", + "recursive", + "native" + ], + "bin": "./dist/cjs/src/bin.js", + "main": "./dist/cjs/src/index.js", + "module": "./dist/mjs/index.js", + "types": "./dist/mjs/index.d.ts", + "exports": { + ".": { + "import": { + "types": "./dist/mjs/index.d.ts", + "default": "./dist/mjs/index.js" + }, + "require": { + "types": "./dist/cjs/src/index.d.ts", + "default": "./dist/cjs/src/index.js" + } + } + }, + "files": [ + "dist" + ], + "scripts": { + "preversion": "npm test", + "postversion": "npm publish", + "prepublishOnly": "git push origin --follow-tags", + "preprepare": "rm -rf dist", + "prepare": "tsc -p tsconfig.json && tsc -p tsconfig-esm.json", + "postprepare": "bash fixup.sh", + "pretest": "npm run prepare", + "presnap": "npm run prepare", + "test": "c8 tap", + "snap": "c8 tap", + "format": "prettier --write . --loglevel warn", + "benchmark": "node benchmark/index.js", + "typedoc": "typedoc --tsconfig tsconfig-esm.json ./src/*.ts" + }, + "prettier": { + "semi": false, + "printWidth": 80, + "tabWidth": 2, + "useTabs": false, + "singleQuote": true, + "jsxSingleQuote": false, + "bracketSameLine": true, + "arrowParens": "avoid", + "endOfLine": "lf" + }, + "devDependencies": { + "@types/brace-expansion": "^1.1.0", + "@types/node": "^18.11.9", + "@types/tap": "^15.0.7", + "c8": "^7.12.0", + "eslint-config-prettier": "^8.6.0", + "prettier": "^2.8.2", + "tap": "^16.3.3", + "ts-node": "^10.9.1", + "typedoc": "^0.23.21", + "typescript": "^4.9.3" + }, + "tap": { + "coverage": false, + "node-arg": [ + "--no-warnings", + "--loader", + "ts-node/esm" + ], + "ts": false + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "repository": { + "type": "git", + "url": "https://github.com/isaacs/node-mkdirp.git" + }, + "license": "MIT", + "engines": { + "node": ">=10" + } +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.d.ts new file mode 100644 index 00000000000000..34e005228653c8 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.d.ts @@ -0,0 +1,3 @@ +#!/usr/bin/env node +export {}; +//# sourceMappingURL=bin.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.d.ts.map new file mode 100644 index 00000000000000..c10c656ec75109 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"bin.d.ts","sourceRoot":"","sources":["../../../src/bin.ts"],"names":[],"mappings":""} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.js new file mode 100755 index 00000000000000..757aae1fd96cb2 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.js @@ -0,0 +1,80 @@ +#!/usr/bin/env node +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const package_json_1 = require("../package.json"); +const usage = () => ` +usage: mkdirp [DIR1,DIR2..] {OPTIONS} + + Create each supplied directory including any necessary parent directories + that don't yet exist. + + If the directory already exists, do nothing. + +OPTIONS are: + + -m If a directory needs to be created, set the mode as an octal + --mode= permission string. + + -v --version Print the mkdirp version number + + -h --help Print this helpful banner + + -p --print Print the first directories created for each path provided + + --manual Use manual implementation, even if native is available +`; +const dirs = []; +const opts = {}; +let doPrint = false; +let dashdash = false; +let manual = false; +for (const arg of process.argv.slice(2)) { + if (dashdash) + dirs.push(arg); + else if (arg === '--') + dashdash = true; + else if (arg === '--manual') + manual = true; + else if (/^-h/.test(arg) || /^--help/.test(arg)) { + console.log(usage()); + process.exit(0); + } + else if (arg === '-v' || arg === '--version') { + console.log(package_json_1.version); + process.exit(0); + } + else if (arg === '-p' || arg === '--print') { + doPrint = true; + } + else if (/^-m/.test(arg) || /^--mode=/.test(arg)) { + // these don't get covered in CI, but work locally + // weird because the tests below show as passing in the output. + /* c8 ignore start */ + const mode = parseInt(arg.replace(/^(-m|--mode=)/, ''), 8); + if (isNaN(mode)) { + console.error(`invalid mode argument: ${arg}\nMust be an octal number.`); + process.exit(1); + } + /* c8 ignore stop */ + opts.mode = mode; + } + else + dirs.push(arg); +} +const index_js_1 = require("./index.js"); +const impl = manual ? index_js_1.mkdirp.manual : index_js_1.mkdirp; +if (dirs.length === 0) { + console.error(usage()); +} +// these don't get covered in CI, but work locally +/* c8 ignore start */ +Promise.all(dirs.map(dir => impl(dir, opts))) + .then(made => (doPrint ? made.forEach(m => m && console.log(m)) : null)) + .catch(er => { + console.error(er.message); + if (er.code) + console.error(' code: ' + er.code); + process.exit(1); +}); +/* c8 ignore stop */ +//# sourceMappingURL=bin.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.js.map new file mode 100644 index 00000000000000..d99295301b5fa7 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/bin.js.map @@ -0,0 +1 @@ +{"version":3,"file":"bin.js","sourceRoot":"","sources":["../../../src/bin.ts"],"names":[],"mappings":";;;AAEA,kDAAyC;AAGzC,MAAM,KAAK,GAAG,GAAG,EAAE,CAAC;;;;;;;;;;;;;;;;;;;;CAoBnB,CAAA;AAED,MAAM,IAAI,GAAa,EAAE,CAAA;AACzB,MAAM,IAAI,GAAkB,EAAE,CAAA;AAC9B,IAAI,OAAO,GAAY,KAAK,CAAA;AAC5B,IAAI,QAAQ,GAAG,KAAK,CAAA;AACpB,IAAI,MAAM,GAAG,KAAK,CAAA;AAClB,KAAK,MAAM,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;IACvC,IAAI,QAAQ;QAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;SACvB,IAAI,GAAG,KAAK,IAAI;QAAE,QAAQ,GAAG,IAAI,CAAA;SACjC,IAAI,GAAG,KAAK,UAAU;QAAE,MAAM,GAAG,IAAI,CAAA;SACrC,IAAI,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE;QAC/C,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAA;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;KAChB;SAAM,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,WAAW,EAAE;QAC9C,OAAO,CAAC,GAAG,CAAC,sBAAO,CAAC,CAAA;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;KAChB;SAAM,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,SAAS,EAAE;QAC5C,OAAO,GAAG,IAAI,CAAA;KACf;SAAM,IAAI,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE;QAClD,kDAAkD;QAClD,+DAA+D;QAC/D,qBAAqB;QACrB,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;QAC1D,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE;YACf,OAAO,CAAC,KAAK,CAAC,0BAA0B,GAAG,4BAA4B,CAAC,CAAA;YACxE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;SAChB;QACD,oBAAoB;QACpB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAA;KACjB;;QAAM,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;CACtB;AAED,yCAAmC;AACnC,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,iBAAM,CAAC,MAAM,CAAC,CAAC,CAAC,iBAAM,CAAA;AAC5C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,CAAA;CACvB;AAED,kDAAkD;AAClD,qBAAqB;AACrB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;KAC1C,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;KACvE,KAAK,CAAC,EAAE,CAAC,EAAE;IACV,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,OAAO,CAAC,CAAA;IACzB,IAAI,EAAE,CAAC,IAAI;QAAE,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,EAAE,CAAC,IAAI,CAAC,CAAA;IAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;AACjB,CAAC,CAAC,CAAA;AACJ,oBAAoB","sourcesContent":["#!/usr/bin/env node\n\nimport { version } from '../package.json'\nimport { MkdirpOptions } from './opts-arg.js'\n\nconst usage = () => `\nusage: mkdirp [DIR1,DIR2..] {OPTIONS}\n\n Create each supplied directory including any necessary parent directories\n that don't yet exist.\n\n If the directory already exists, do nothing.\n\nOPTIONS are:\n\n -m If a directory needs to be created, set the mode as an octal\n --mode= permission string.\n\n -v --version Print the mkdirp version number\n\n -h --help Print this helpful banner\n\n -p --print Print the first directories created for each path provided\n\n --manual Use manual implementation, even if native is available\n`\n\nconst dirs: string[] = []\nconst opts: MkdirpOptions = {}\nlet doPrint: boolean = false\nlet dashdash = false\nlet manual = false\nfor (const arg of process.argv.slice(2)) {\n if (dashdash) dirs.push(arg)\n else if (arg === '--') dashdash = true\n else if (arg === '--manual') manual = true\n else if (/^-h/.test(arg) || /^--help/.test(arg)) {\n console.log(usage())\n process.exit(0)\n } else if (arg === '-v' || arg === '--version') {\n console.log(version)\n process.exit(0)\n } else if (arg === '-p' || arg === '--print') {\n doPrint = true\n } else if (/^-m/.test(arg) || /^--mode=/.test(arg)) {\n // these don't get covered in CI, but work locally\n // weird because the tests below show as passing in the output.\n /* c8 ignore start */\n const mode = parseInt(arg.replace(/^(-m|--mode=)/, ''), 8)\n if (isNaN(mode)) {\n console.error(`invalid mode argument: ${arg}\\nMust be an octal number.`)\n process.exit(1)\n }\n /* c8 ignore stop */\n opts.mode = mode\n } else dirs.push(arg)\n}\n\nimport { mkdirp } from './index.js'\nconst impl = manual ? mkdirp.manual : mkdirp\nif (dirs.length === 0) {\n console.error(usage())\n}\n\n// these don't get covered in CI, but work locally\n/* c8 ignore start */\nPromise.all(dirs.map(dir => impl(dir, opts)))\n .then(made => (doPrint ? made.forEach(m => m && console.log(m)) : null))\n .catch(er => {\n console.error(er.message)\n if (er.code) console.error(' code: ' + er.code)\n process.exit(1)\n })\n/* c8 ignore stop */\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.d.ts new file mode 100644 index 00000000000000..e47794b3bb72a3 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.d.ts @@ -0,0 +1,4 @@ +import { MkdirpOptionsResolved } from './opts-arg.js'; +export declare const findMade: (opts: MkdirpOptionsResolved, parent: string, path?: string) => Promise; +export declare const findMadeSync: (opts: MkdirpOptionsResolved, parent: string, path?: string) => undefined | string; +//# sourceMappingURL=find-made.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.d.ts.map new file mode 100644 index 00000000000000..00d5d1a4dbefdf --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"find-made.d.ts","sourceRoot":"","sources":["../../../src/find-made.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,eAAe,CAAA;AAErD,eAAO,MAAM,QAAQ,SACb,qBAAqB,UACnB,MAAM,SACP,MAAM,KACZ,QAAQ,SAAS,GAAG,MAAM,CAe5B,CAAA;AAED,eAAO,MAAM,YAAY,SACjB,qBAAqB,UACnB,MAAM,SACP,MAAM,KACZ,SAAS,GAAG,MAad,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.js new file mode 100644 index 00000000000000..e831ef27cadc1d --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.js @@ -0,0 +1,35 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.findMadeSync = exports.findMade = void 0; +const path_1 = require("path"); +const findMade = async (opts, parent, path) => { + // we never want the 'made' return value to be a root directory + if (path === parent) { + return; + } + return opts.statAsync(parent).then(st => (st.isDirectory() ? path : undefined), // will fail later + // will fail later + er => { + const fer = er; + return fer && fer.code === 'ENOENT' + ? (0, exports.findMade)(opts, (0, path_1.dirname)(parent), parent) + : undefined; + }); +}; +exports.findMade = findMade; +const findMadeSync = (opts, parent, path) => { + if (path === parent) { + return undefined; + } + try { + return opts.statSync(parent).isDirectory() ? path : undefined; + } + catch (er) { + const fer = er; + return fer && fer.code === 'ENOENT' + ? (0, exports.findMadeSync)(opts, (0, path_1.dirname)(parent), parent) + : undefined; + } +}; +exports.findMadeSync = findMadeSync; +//# sourceMappingURL=find-made.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.js.map new file mode 100644 index 00000000000000..30a0d66398878d --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/find-made.js.map @@ -0,0 +1 @@ +{"version":3,"file":"find-made.js","sourceRoot":"","sources":["../../../src/find-made.ts"],"names":[],"mappings":";;;AAAA,+BAA8B;AAGvB,MAAM,QAAQ,GAAG,KAAK,EAC3B,IAA2B,EAC3B,MAAc,EACd,IAAa,EACgB,EAAE;IAC/B,+DAA+D;IAC/D,IAAI,IAAI,KAAK,MAAM,EAAE;QACnB,OAAM;KACP;IAED,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,CAChC,EAAE,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,EAAE,kBAAkB;IAC/D,AAD6C,kBAAkB;IAC/D,EAAE,CAAC,EAAE;QACH,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;YACjC,CAAC,CAAC,IAAA,gBAAQ,EAAC,IAAI,EAAE,IAAA,cAAO,EAAC,MAAM,CAAC,EAAE,MAAM,CAAC;YACzC,CAAC,CAAC,SAAS,CAAA;IACf,CAAC,CACF,CAAA;AACH,CAAC,CAAA;AAnBY,QAAA,QAAQ,YAmBpB;AAEM,MAAM,YAAY,GAAG,CAC1B,IAA2B,EAC3B,MAAc,EACd,IAAa,EACO,EAAE;IACtB,IAAI,IAAI,KAAK,MAAM,EAAE;QACnB,OAAO,SAAS,CAAA;KACjB;IAED,IAAI;QACF,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAA;KAC9D;IAAC,OAAO,EAAE,EAAE;QACX,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;YACjC,CAAC,CAAC,IAAA,oBAAY,EAAC,IAAI,EAAE,IAAA,cAAO,EAAC,MAAM,CAAC,EAAE,MAAM,CAAC;YAC7C,CAAC,CAAC,SAAS,CAAA;KACd;AACH,CAAC,CAAA;AAjBY,QAAA,YAAY,gBAiBxB","sourcesContent":["import { dirname } from 'path'\nimport { MkdirpOptionsResolved } from './opts-arg.js'\n\nexport const findMade = async (\n opts: MkdirpOptionsResolved,\n parent: string,\n path?: string\n): Promise => {\n // we never want the 'made' return value to be a root directory\n if (path === parent) {\n return\n }\n\n return opts.statAsync(parent).then(\n st => (st.isDirectory() ? path : undefined), // will fail later\n er => {\n const fer = er as NodeJS.ErrnoException\n return fer && fer.code === 'ENOENT'\n ? findMade(opts, dirname(parent), parent)\n : undefined\n }\n )\n}\n\nexport const findMadeSync = (\n opts: MkdirpOptionsResolved,\n parent: string,\n path?: string\n): undefined | string => {\n if (path === parent) {\n return undefined\n }\n\n try {\n return opts.statSync(parent).isDirectory() ? path : undefined\n } catch (er) {\n const fer = er as NodeJS.ErrnoException\n return fer && fer.code === 'ENOENT'\n ? findMadeSync(opts, dirname(parent), parent)\n : undefined\n }\n}\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.d.ts new file mode 100644 index 00000000000000..fc9e43b3a45de1 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.d.ts @@ -0,0 +1,39 @@ +import { MkdirpOptions } from './opts-arg.js'; +export { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'; +export { mkdirpNative, mkdirpNativeSync } from './mkdirp-native.js'; +export { useNative, useNativeSync } from './use-native.js'; +export declare const mkdirpSync: (path: string, opts?: MkdirpOptions) => string | void; +export declare const sync: (path: string, opts?: MkdirpOptions) => string | void; +export declare const manual: ((path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; +}; +export declare const manualSync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; +export declare const native: ((path: string, options?: MkdirpOptions | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; +}; +export declare const nativeSync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; +export declare const mkdirp: ((path: string, opts?: MkdirpOptions) => Promise) & { + mkdirpSync: (path: string, opts?: MkdirpOptions) => string | void; + mkdirpNative: ((path: string, options?: MkdirpOptions | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; + }; + mkdirpNativeSync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; + mkdirpManual: ((path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; + }; + mkdirpManualSync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; + sync: (path: string, opts?: MkdirpOptions) => string | void; + native: ((path: string, options?: MkdirpOptions | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; + }; + nativeSync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; + manual: ((path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; + }; + manualSync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; + useNative: ((opts?: MkdirpOptions | undefined) => boolean) & { + sync: (opts?: MkdirpOptions | undefined) => boolean; + }; + useNativeSync: (opts?: MkdirpOptions | undefined) => boolean; +}; +//# sourceMappingURL=index.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.d.ts.map new file mode 100644 index 00000000000000..0e915bbc9a0c7a --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,aAAa,EAAW,MAAM,eAAe,CAAA;AAItD,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AACnE,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AACnE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAA;AAG1D,eAAO,MAAM,UAAU,SAAU,MAAM,SAAS,aAAa,kBAM5D,CAAA;AAED,eAAO,MAAM,IAAI,SARgB,MAAM,SAAS,aAAa,kBAQ/B,CAAA;AAC9B,eAAO,MAAM,MAAM;;CAAe,CAAA;AAClC,eAAO,MAAM,UAAU,oHAAmB,CAAA;AAC1C,eAAO,MAAM,MAAM;;CAAe,CAAA;AAClC,eAAO,MAAM,UAAU,kFAAmB,CAAA;AAC1C,eAAO,MAAM,MAAM,UACJ,MAAM,SAAS,aAAa;uBAdV,MAAM,SAAS,aAAa;;;;;;;;;iBAA5B,MAAM,SAAS,aAAa;;;;;;;;;;;;;CAoC5D,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.js new file mode 100644 index 00000000000000..ab9dc62cddda36 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.js @@ -0,0 +1,53 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.mkdirp = exports.nativeSync = exports.native = exports.manualSync = exports.manual = exports.sync = exports.mkdirpSync = exports.useNativeSync = exports.useNative = exports.mkdirpNativeSync = exports.mkdirpNative = exports.mkdirpManualSync = exports.mkdirpManual = void 0; +const mkdirp_manual_js_1 = require("./mkdirp-manual.js"); +const mkdirp_native_js_1 = require("./mkdirp-native.js"); +const opts_arg_js_1 = require("./opts-arg.js"); +const path_arg_js_1 = require("./path-arg.js"); +const use_native_js_1 = require("./use-native.js"); +/* c8 ignore start */ +var mkdirp_manual_js_2 = require("./mkdirp-manual.js"); +Object.defineProperty(exports, "mkdirpManual", { enumerable: true, get: function () { return mkdirp_manual_js_2.mkdirpManual; } }); +Object.defineProperty(exports, "mkdirpManualSync", { enumerable: true, get: function () { return mkdirp_manual_js_2.mkdirpManualSync; } }); +var mkdirp_native_js_2 = require("./mkdirp-native.js"); +Object.defineProperty(exports, "mkdirpNative", { enumerable: true, get: function () { return mkdirp_native_js_2.mkdirpNative; } }); +Object.defineProperty(exports, "mkdirpNativeSync", { enumerable: true, get: function () { return mkdirp_native_js_2.mkdirpNativeSync; } }); +var use_native_js_2 = require("./use-native.js"); +Object.defineProperty(exports, "useNative", { enumerable: true, get: function () { return use_native_js_2.useNative; } }); +Object.defineProperty(exports, "useNativeSync", { enumerable: true, get: function () { return use_native_js_2.useNativeSync; } }); +/* c8 ignore stop */ +const mkdirpSync = (path, opts) => { + path = (0, path_arg_js_1.pathArg)(path); + const resolved = (0, opts_arg_js_1.optsArg)(opts); + return (0, use_native_js_1.useNativeSync)(resolved) + ? (0, mkdirp_native_js_1.mkdirpNativeSync)(path, resolved) + : (0, mkdirp_manual_js_1.mkdirpManualSync)(path, resolved); +}; +exports.mkdirpSync = mkdirpSync; +exports.sync = exports.mkdirpSync; +exports.manual = mkdirp_manual_js_1.mkdirpManual; +exports.manualSync = mkdirp_manual_js_1.mkdirpManualSync; +exports.native = mkdirp_native_js_1.mkdirpNative; +exports.nativeSync = mkdirp_native_js_1.mkdirpNativeSync; +exports.mkdirp = Object.assign(async (path, opts) => { + path = (0, path_arg_js_1.pathArg)(path); + const resolved = (0, opts_arg_js_1.optsArg)(opts); + return (0, use_native_js_1.useNative)(resolved) + ? (0, mkdirp_native_js_1.mkdirpNative)(path, resolved) + : (0, mkdirp_manual_js_1.mkdirpManual)(path, resolved); +}, { + mkdirpSync: exports.mkdirpSync, + mkdirpNative: mkdirp_native_js_1.mkdirpNative, + mkdirpNativeSync: mkdirp_native_js_1.mkdirpNativeSync, + mkdirpManual: mkdirp_manual_js_1.mkdirpManual, + mkdirpManualSync: mkdirp_manual_js_1.mkdirpManualSync, + sync: exports.mkdirpSync, + native: mkdirp_native_js_1.mkdirpNative, + nativeSync: mkdirp_native_js_1.mkdirpNativeSync, + manual: mkdirp_manual_js_1.mkdirpManual, + manualSync: mkdirp_manual_js_1.mkdirpManualSync, + useNative: use_native_js_1.useNative, + useNativeSync: use_native_js_1.useNativeSync, +}); +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.js.map new file mode 100644 index 00000000000000..fdb572677a98ef --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/index.js.map @@ -0,0 +1 @@ +{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/index.ts"],"names":[],"mappings":";;;AAAA,yDAAmE;AACnE,yDAAmE;AACnE,+CAAsD;AACtD,+CAAuC;AACvC,mDAA0D;AAC1D,qBAAqB;AACrB,uDAAmE;AAA1D,gHAAA,YAAY,OAAA;AAAE,oHAAA,gBAAgB,OAAA;AACvC,uDAAmE;AAA1D,gHAAA,YAAY,OAAA;AAAE,oHAAA,gBAAgB,OAAA;AACvC,iDAA0D;AAAjD,0GAAA,SAAS,OAAA;AAAE,8GAAA,aAAa,OAAA;AACjC,oBAAoB;AAEb,MAAM,UAAU,GAAG,CAAC,IAAY,EAAE,IAAoB,EAAE,EAAE;IAC/D,IAAI,GAAG,IAAA,qBAAO,EAAC,IAAI,CAAC,CAAA;IACpB,MAAM,QAAQ,GAAG,IAAA,qBAAO,EAAC,IAAI,CAAC,CAAA;IAC9B,OAAO,IAAA,6BAAa,EAAC,QAAQ,CAAC;QAC5B,CAAC,CAAC,IAAA,mCAAgB,EAAC,IAAI,EAAE,QAAQ,CAAC;QAClC,CAAC,CAAC,IAAA,mCAAgB,EAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AACtC,CAAC,CAAA;AANY,QAAA,UAAU,cAMtB;AAEY,QAAA,IAAI,GAAG,kBAAU,CAAA;AACjB,QAAA,MAAM,GAAG,+BAAY,CAAA;AACrB,QAAA,UAAU,GAAG,mCAAgB,CAAA;AAC7B,QAAA,MAAM,GAAG,+BAAY,CAAA;AACrB,QAAA,UAAU,GAAG,mCAAgB,CAAA;AAC7B,QAAA,MAAM,GAAG,MAAM,CAAC,MAAM,CACjC,KAAK,EAAE,IAAY,EAAE,IAAoB,EAAE,EAAE;IAC3C,IAAI,GAAG,IAAA,qBAAO,EAAC,IAAI,CAAC,CAAA;IACpB,MAAM,QAAQ,GAAG,IAAA,qBAAO,EAAC,IAAI,CAAC,CAAA;IAC9B,OAAO,IAAA,yBAAS,EAAC,QAAQ,CAAC;QACxB,CAAC,CAAC,IAAA,+BAAY,EAAC,IAAI,EAAE,QAAQ,CAAC;QAC9B,CAAC,CAAC,IAAA,+BAAY,EAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AAClC,CAAC,EACD;IACE,UAAU,EAAV,kBAAU;IACV,YAAY,EAAZ,+BAAY;IACZ,gBAAgB,EAAhB,mCAAgB;IAChB,YAAY,EAAZ,+BAAY;IACZ,gBAAgB,EAAhB,mCAAgB;IAEhB,IAAI,EAAE,kBAAU;IAChB,MAAM,EAAE,+BAAY;IACpB,UAAU,EAAE,mCAAgB;IAC5B,MAAM,EAAE,+BAAY;IACpB,UAAU,EAAE,mCAAgB;IAC5B,SAAS,EAAT,yBAAS;IACT,aAAa,EAAb,6BAAa;CACd,CACF,CAAA","sourcesContent":["import { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'\nimport { mkdirpNative, mkdirpNativeSync } from './mkdirp-native.js'\nimport { MkdirpOptions, optsArg } from './opts-arg.js'\nimport { pathArg } from './path-arg.js'\nimport { useNative, useNativeSync } from './use-native.js'\n/* c8 ignore start */\nexport { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'\nexport { mkdirpNative, mkdirpNativeSync } from './mkdirp-native.js'\nexport { useNative, useNativeSync } from './use-native.js'\n/* c8 ignore stop */\n\nexport const mkdirpSync = (path: string, opts?: MkdirpOptions) => {\n path = pathArg(path)\n const resolved = optsArg(opts)\n return useNativeSync(resolved)\n ? mkdirpNativeSync(path, resolved)\n : mkdirpManualSync(path, resolved)\n}\n\nexport const sync = mkdirpSync\nexport const manual = mkdirpManual\nexport const manualSync = mkdirpManualSync\nexport const native = mkdirpNative\nexport const nativeSync = mkdirpNativeSync\nexport const mkdirp = Object.assign(\n async (path: string, opts?: MkdirpOptions) => {\n path = pathArg(path)\n const resolved = optsArg(opts)\n return useNative(resolved)\n ? mkdirpNative(path, resolved)\n : mkdirpManual(path, resolved)\n },\n {\n mkdirpSync,\n mkdirpNative,\n mkdirpNativeSync,\n mkdirpManual,\n mkdirpManualSync,\n\n sync: mkdirpSync,\n native: mkdirpNative,\n nativeSync: mkdirpNativeSync,\n manual: mkdirpManual,\n manualSync: mkdirpManualSync,\n useNative,\n useNativeSync,\n }\n)\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.d.ts new file mode 100644 index 00000000000000..e49cdf9f1bd122 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.d.ts @@ -0,0 +1,6 @@ +import { MkdirpOptions } from './opts-arg.js'; +export declare const mkdirpManualSync: (path: string, options?: MkdirpOptions, made?: string | undefined | void) => string | undefined | void; +export declare const mkdirpManual: ((path: string, options?: MkdirpOptions, made?: string | undefined | void) => Promise) & { + sync: (path: string, options?: MkdirpOptions, made?: string | undefined | void) => string | undefined | void; +}; +//# sourceMappingURL=mkdirp-manual.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.d.ts.map new file mode 100644 index 00000000000000..9301bab1ffb35b --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"mkdirp-manual.d.ts","sourceRoot":"","sources":["../../../src/mkdirp-manual.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAW,MAAM,eAAe,CAAA;AAEtD,eAAO,MAAM,gBAAgB,SACrB,MAAM,YACF,aAAa,SAChB,MAAM,GAAG,SAAS,GAAG,IAAI,KAC/B,MAAM,GAAG,SAAS,GAAG,IAmCvB,CAAA;AAED,eAAO,MAAM,YAAY,UAEf,MAAM,YACF,aAAa,SAChB,MAAM,GAAG,SAAS,GAAG,IAAI,KAC/B,QAAQ,MAAM,GAAG,SAAS,GAAG,IAAI,CAAC;iBA7C/B,MAAM,YACF,aAAa,SAChB,MAAM,GAAG,SAAS,GAAG,IAAI,KAC/B,MAAM,GAAG,SAAS,GAAG,IAAI;CAqF3B,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.js new file mode 100644 index 00000000000000..d9bd1d8bb5a49b --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.js @@ -0,0 +1,79 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.mkdirpManual = exports.mkdirpManualSync = void 0; +const path_1 = require("path"); +const opts_arg_js_1 = require("./opts-arg.js"); +const mkdirpManualSync = (path, options, made) => { + const parent = (0, path_1.dirname)(path); + const opts = { ...(0, opts_arg_js_1.optsArg)(options), recursive: false }; + if (parent === path) { + try { + return opts.mkdirSync(path, opts); + } + catch (er) { + // swallowed by recursive implementation on posix systems + // any other error is a failure + const fer = er; + if (fer && fer.code !== 'EISDIR') { + throw er; + } + return; + } + } + try { + opts.mkdirSync(path, opts); + return made || path; + } + catch (er) { + const fer = er; + if (fer && fer.code === 'ENOENT') { + return (0, exports.mkdirpManualSync)(path, opts, (0, exports.mkdirpManualSync)(parent, opts, made)); + } + if (fer && fer.code !== 'EEXIST' && fer && fer.code !== 'EROFS') { + throw er; + } + try { + if (!opts.statSync(path).isDirectory()) + throw er; + } + catch (_) { + throw er; + } + } +}; +exports.mkdirpManualSync = mkdirpManualSync; +exports.mkdirpManual = Object.assign(async (path, options, made) => { + const opts = (0, opts_arg_js_1.optsArg)(options); + opts.recursive = false; + const parent = (0, path_1.dirname)(path); + if (parent === path) { + return opts.mkdirAsync(path, opts).catch(er => { + // swallowed by recursive implementation on posix systems + // any other error is a failure + const fer = er; + if (fer && fer.code !== 'EISDIR') { + throw er; + } + }); + } + return opts.mkdirAsync(path, opts).then(() => made || path, async (er) => { + const fer = er; + if (fer && fer.code === 'ENOENT') { + return (0, exports.mkdirpManual)(parent, opts).then((made) => (0, exports.mkdirpManual)(path, opts, made)); + } + if (fer && fer.code !== 'EEXIST' && fer.code !== 'EROFS') { + throw er; + } + return opts.statAsync(path).then(st => { + if (st.isDirectory()) { + return made; + } + else { + throw er; + } + }, () => { + throw er; + }); + }); +}, { sync: exports.mkdirpManualSync }); +//# sourceMappingURL=mkdirp-manual.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.js.map new file mode 100644 index 00000000000000..ff7ba24dca32ad --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-manual.js.map @@ -0,0 +1 @@ +{"version":3,"file":"mkdirp-manual.js","sourceRoot":"","sources":["../../../src/mkdirp-manual.ts"],"names":[],"mappings":";;;AAAA,+BAA8B;AAC9B,+CAAsD;AAE/C,MAAM,gBAAgB,GAAG,CAC9B,IAAY,EACZ,OAAuB,EACvB,IAAgC,EACL,EAAE;IAC7B,MAAM,MAAM,GAAG,IAAA,cAAO,EAAC,IAAI,CAAC,CAAA;IAC5B,MAAM,IAAI,GAAG,EAAE,GAAG,IAAA,qBAAO,EAAC,OAAO,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,CAAA;IAEtD,IAAI,MAAM,KAAK,IAAI,EAAE;QACnB,IAAI;YACF,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;SAClC;QAAC,OAAO,EAAE,EAAE;YACX,yDAAyD;YACzD,+BAA+B;YAC/B,MAAM,GAAG,GAAG,EAA2B,CAAA;YACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;gBAChC,MAAM,EAAE,CAAA;aACT;YACD,OAAM;SACP;KACF;IAED,IAAI;QACF,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QAC1B,OAAO,IAAI,IAAI,IAAI,CAAA;KACpB;IAAC,OAAO,EAAE,EAAE;QACX,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;YAChC,OAAO,IAAA,wBAAgB,EAAC,IAAI,EAAE,IAAI,EAAE,IAAA,wBAAgB,EAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;SAC1E;QACD,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE;YAC/D,MAAM,EAAE,CAAA;SACT;QACD,IAAI;YACF,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE;gBAAE,MAAM,EAAE,CAAA;SACjD;QAAC,OAAO,CAAC,EAAE;YACV,MAAM,EAAE,CAAA;SACT;KACF;AACH,CAAC,CAAA;AAvCY,QAAA,gBAAgB,oBAuC5B;AAEY,QAAA,YAAY,GAAG,MAAM,CAAC,MAAM,CACvC,KAAK,EACH,IAAY,EACZ,OAAuB,EACvB,IAAgC,EACI,EAAE;IACtC,MAAM,IAAI,GAAG,IAAA,qBAAO,EAAC,OAAO,CAAC,CAAA;IAC7B,IAAI,CAAC,SAAS,GAAG,KAAK,CAAA;IACtB,MAAM,MAAM,GAAG,IAAA,cAAO,EAAC,IAAI,CAAC,CAAA;IAC5B,IAAI,MAAM,KAAK,IAAI,EAAE;QACnB,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE;YAC5C,yDAAyD;YACzD,+BAA+B;YAC/B,MAAM,GAAG,GAAG,EAA2B,CAAA;YACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;gBAChC,MAAM,EAAE,CAAA;aACT;QACH,CAAC,CAAC,CAAA;KACH;IAED,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,CACrC,GAAG,EAAE,CAAC,IAAI,IAAI,IAAI,EAClB,KAAK,EAAC,EAAE,EAAC,EAAE;QACT,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;YAChC,OAAO,IAAA,oBAAY,EAAC,MAAM,EAAE,IAAI,CAAC,CAAC,IAAI,CACpC,CAAC,IAAgC,EAAE,EAAE,CAAC,IAAA,oBAAY,EAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CACrE,CAAA;SACF;QACD,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE;YACxD,MAAM,EAAE,CAAA;SACT;QACD,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,IAAI,CAC9B,EAAE,CAAC,EAAE;YACH,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE;gBACpB,OAAO,IAAI,CAAA;aACZ;iBAAM;gBACL,MAAM,EAAE,CAAA;aACT;QACH,CAAC,EACD,GAAG,EAAE;YACH,MAAM,EAAE,CAAA;QACV,CAAC,CACF,CAAA;IACH,CAAC,CACF,CAAA;AACH,CAAC,EACD,EAAE,IAAI,EAAE,wBAAgB,EAAE,CAC3B,CAAA","sourcesContent":["import { dirname } from 'path'\nimport { MkdirpOptions, optsArg } from './opts-arg.js'\n\nexport const mkdirpManualSync = (\n path: string,\n options?: MkdirpOptions,\n made?: string | undefined | void\n): string | undefined | void => {\n const parent = dirname(path)\n const opts = { ...optsArg(options), recursive: false }\n\n if (parent === path) {\n try {\n return opts.mkdirSync(path, opts)\n } catch (er) {\n // swallowed by recursive implementation on posix systems\n // any other error is a failure\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code !== 'EISDIR') {\n throw er\n }\n return\n }\n }\n\n try {\n opts.mkdirSync(path, opts)\n return made || path\n } catch (er) {\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code === 'ENOENT') {\n return mkdirpManualSync(path, opts, mkdirpManualSync(parent, opts, made))\n }\n if (fer && fer.code !== 'EEXIST' && fer && fer.code !== 'EROFS') {\n throw er\n }\n try {\n if (!opts.statSync(path).isDirectory()) throw er\n } catch (_) {\n throw er\n }\n }\n}\n\nexport const mkdirpManual = Object.assign(\n async (\n path: string,\n options?: MkdirpOptions,\n made?: string | undefined | void\n ): Promise => {\n const opts = optsArg(options)\n opts.recursive = false\n const parent = dirname(path)\n if (parent === path) {\n return opts.mkdirAsync(path, opts).catch(er => {\n // swallowed by recursive implementation on posix systems\n // any other error is a failure\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code !== 'EISDIR') {\n throw er\n }\n })\n }\n\n return opts.mkdirAsync(path, opts).then(\n () => made || path,\n async er => {\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code === 'ENOENT') {\n return mkdirpManual(parent, opts).then(\n (made?: string | undefined | void) => mkdirpManual(path, opts, made)\n )\n }\n if (fer && fer.code !== 'EEXIST' && fer.code !== 'EROFS') {\n throw er\n }\n return opts.statAsync(path).then(\n st => {\n if (st.isDirectory()) {\n return made\n } else {\n throw er\n }\n },\n () => {\n throw er\n }\n )\n }\n )\n },\n { sync: mkdirpManualSync }\n)\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.d.ts new file mode 100644 index 00000000000000..28b64814b2545a --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.d.ts @@ -0,0 +1,6 @@ +import { MkdirpOptions } from './opts-arg.js'; +export declare const mkdirpNativeSync: (path: string, options?: MkdirpOptions) => string | void | undefined; +export declare const mkdirpNative: ((path: string, options?: MkdirpOptions) => Promise) & { + sync: (path: string, options?: MkdirpOptions) => string | void | undefined; +}; +//# sourceMappingURL=mkdirp-native.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.d.ts.map new file mode 100644 index 00000000000000..379c0f6591c686 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"mkdirp-native.d.ts","sourceRoot":"","sources":["../../../src/mkdirp-native.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,aAAa,EAAW,MAAM,eAAe,CAAA;AAEtD,eAAO,MAAM,gBAAgB,SACrB,MAAM,YACF,aAAa,KACtB,MAAM,GAAG,IAAI,GAAG,SAoBlB,CAAA;AAED,eAAO,MAAM,YAAY,UAEf,MAAM,YACF,aAAa,KACtB,QAAQ,MAAM,GAAG,IAAI,GAAG,SAAS,CAAC;iBA5B/B,MAAM,YACF,aAAa,KACtB,MAAM,GAAG,IAAI,GAAG,SAAS;CAgD3B,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.js new file mode 100644 index 00000000000000..9f00567d7cc200 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.js @@ -0,0 +1,50 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.mkdirpNative = exports.mkdirpNativeSync = void 0; +const path_1 = require("path"); +const find_made_js_1 = require("./find-made.js"); +const mkdirp_manual_js_1 = require("./mkdirp-manual.js"); +const opts_arg_js_1 = require("./opts-arg.js"); +const mkdirpNativeSync = (path, options) => { + const opts = (0, opts_arg_js_1.optsArg)(options); + opts.recursive = true; + const parent = (0, path_1.dirname)(path); + if (parent === path) { + return opts.mkdirSync(path, opts); + } + const made = (0, find_made_js_1.findMadeSync)(opts, path); + try { + opts.mkdirSync(path, opts); + return made; + } + catch (er) { + const fer = er; + if (fer && fer.code === 'ENOENT') { + return (0, mkdirp_manual_js_1.mkdirpManualSync)(path, opts); + } + else { + throw er; + } + } +}; +exports.mkdirpNativeSync = mkdirpNativeSync; +exports.mkdirpNative = Object.assign(async (path, options) => { + const opts = { ...(0, opts_arg_js_1.optsArg)(options), recursive: true }; + const parent = (0, path_1.dirname)(path); + if (parent === path) { + return await opts.mkdirAsync(path, opts); + } + return (0, find_made_js_1.findMade)(opts, path).then((made) => opts + .mkdirAsync(path, opts) + .then(m => made || m) + .catch(er => { + const fer = er; + if (fer && fer.code === 'ENOENT') { + return (0, mkdirp_manual_js_1.mkdirpManual)(path, opts); + } + else { + throw er; + } + })); +}, { sync: exports.mkdirpNativeSync }); +//# sourceMappingURL=mkdirp-native.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.js.map new file mode 100644 index 00000000000000..1f889ee98876cc --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/mkdirp-native.js.map @@ -0,0 +1 @@ +{"version":3,"file":"mkdirp-native.js","sourceRoot":"","sources":["../../../src/mkdirp-native.ts"],"names":[],"mappings":";;;AAAA,+BAA8B;AAC9B,iDAAuD;AACvD,yDAAmE;AACnE,+CAAsD;AAE/C,MAAM,gBAAgB,GAAG,CAC9B,IAAY,EACZ,OAAuB,EACI,EAAE;IAC7B,MAAM,IAAI,GAAG,IAAA,qBAAO,EAAC,OAAO,CAAC,CAAA;IAC7B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAA;IACrB,MAAM,MAAM,GAAG,IAAA,cAAO,EAAC,IAAI,CAAC,CAAA;IAC5B,IAAI,MAAM,KAAK,IAAI,EAAE;QACnB,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;KAClC;IAED,MAAM,IAAI,GAAG,IAAA,2BAAY,EAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IACrC,IAAI;QACF,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QAC1B,OAAO,IAAI,CAAA;KACZ;IAAC,OAAO,EAAE,EAAE;QACX,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;YAChC,OAAO,IAAA,mCAAgB,EAAC,IAAI,EAAE,IAAI,CAAC,CAAA;SACpC;aAAM;YACL,MAAM,EAAE,CAAA;SACT;KACF;AACH,CAAC,CAAA;AAvBY,QAAA,gBAAgB,oBAuB5B;AAEY,QAAA,YAAY,GAAG,MAAM,CAAC,MAAM,CACvC,KAAK,EACH,IAAY,EACZ,OAAuB,EACa,EAAE;IACtC,MAAM,IAAI,GAAG,EAAE,GAAG,IAAA,qBAAO,EAAC,OAAO,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;IACrD,MAAM,MAAM,GAAG,IAAA,cAAO,EAAC,IAAI,CAAC,CAAA;IAC5B,IAAI,MAAM,KAAK,IAAI,EAAE;QACnB,OAAO,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;KACzC;IAED,OAAO,IAAA,uBAAQ,EAAC,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,IAAyB,EAAE,EAAE,CAC7D,IAAI;SACD,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC;SACtB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC,CAAC;SACpB,KAAK,CAAC,EAAE,CAAC,EAAE;QACV,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;YAChC,OAAO,IAAA,+BAAY,EAAC,IAAI,EAAE,IAAI,CAAC,CAAA;SAChC;aAAM;YACL,MAAM,EAAE,CAAA;SACT;IACH,CAAC,CAAC,CACL,CAAA;AACH,CAAC,EACD,EAAE,IAAI,EAAE,wBAAgB,EAAE,CAC3B,CAAA","sourcesContent":["import { dirname } from 'path'\nimport { findMade, findMadeSync } from './find-made.js'\nimport { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'\nimport { MkdirpOptions, optsArg } from './opts-arg.js'\n\nexport const mkdirpNativeSync = (\n path: string,\n options?: MkdirpOptions\n): string | void | undefined => {\n const opts = optsArg(options)\n opts.recursive = true\n const parent = dirname(path)\n if (parent === path) {\n return opts.mkdirSync(path, opts)\n }\n\n const made = findMadeSync(opts, path)\n try {\n opts.mkdirSync(path, opts)\n return made\n } catch (er) {\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code === 'ENOENT') {\n return mkdirpManualSync(path, opts)\n } else {\n throw er\n }\n }\n}\n\nexport const mkdirpNative = Object.assign(\n async (\n path: string,\n options?: MkdirpOptions\n ): Promise => {\n const opts = { ...optsArg(options), recursive: true }\n const parent = dirname(path)\n if (parent === path) {\n return await opts.mkdirAsync(path, opts)\n }\n\n return findMade(opts, path).then((made?: string | undefined) =>\n opts\n .mkdirAsync(path, opts)\n .then(m => made || m)\n .catch(er => {\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code === 'ENOENT') {\n return mkdirpManual(path, opts)\n } else {\n throw er\n }\n })\n )\n },\n { sync: mkdirpNativeSync }\n)\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.d.ts new file mode 100644 index 00000000000000..73d076b3b6923c --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.d.ts @@ -0,0 +1,42 @@ +/// +/// +import { MakeDirectoryOptions, Stats } from 'fs'; +export interface FsProvider { + stat?: (path: string, callback: (err: NodeJS.ErrnoException | null, stats: Stats) => any) => any; + mkdir?: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }, callback: (err: NodeJS.ErrnoException | null, made?: string) => any) => any; + statSync?: (path: string) => Stats; + mkdirSync?: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }) => string | undefined; +} +interface Options extends FsProvider { + mode?: number | string; + fs?: FsProvider; + mkdirAsync?: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }) => Promise; + statAsync?: (path: string) => Promise; +} +export type MkdirpOptions = Options | number | string; +export interface MkdirpOptionsResolved { + mode: number; + fs: FsProvider; + mkdirAsync: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }) => Promise; + statAsync: (path: string) => Promise; + stat: (path: string, callback: (err: NodeJS.ErrnoException | null, stats: Stats) => any) => any; + mkdir: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }, callback: (err: NodeJS.ErrnoException | null, made?: string) => any) => any; + statSync: (path: string) => Stats; + mkdirSync: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }) => string | undefined; + recursive?: boolean; +} +export declare const optsArg: (opts?: MkdirpOptions) => MkdirpOptionsResolved; +export {}; +//# sourceMappingURL=opts-arg.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.d.ts.map new file mode 100644 index 00000000000000..e575161714f651 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"opts-arg.d.ts","sourceRoot":"","sources":["../../../src/opts-arg.ts"],"names":[],"mappings":";;AAAA,OAAO,EACL,oBAAoB,EAIpB,KAAK,EAEN,MAAM,IAAI,CAAA;AAEX,MAAM,WAAW,UAAU;IACzB,IAAI,CAAC,EAAE,CACL,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,cAAc,GAAG,IAAI,EAAE,KAAK,EAAE,KAAK,KAAK,GAAG,KAC/D,GAAG,CAAA;IACR,KAAK,CAAC,EAAE,CACN,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,EACpD,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,cAAc,GAAG,IAAI,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,GAAG,KAChE,GAAG,CAAA;IACR,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,KAAK,CAAA;IAClC,SAAS,CAAC,EAAE,CACV,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,KACjD,MAAM,GAAG,SAAS,CAAA;CACxB;AAED,UAAU,OAAQ,SAAQ,UAAU;IAClC,IAAI,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;IACtB,EAAE,CAAC,EAAE,UAAU,CAAA;IACf,UAAU,CAAC,EAAE,CACX,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,KACjD,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAA;IAChC,SAAS,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,KAAK,CAAC,CAAA;CAC7C;AAED,MAAM,MAAM,aAAa,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,CAAA;AAErD,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAA;IACZ,EAAE,EAAE,UAAU,CAAA;IACd,UAAU,EAAE,CACV,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,KACjD,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAA;IAChC,SAAS,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,KAAK,CAAC,CAAA;IAC3C,IAAI,EAAE,CACJ,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,cAAc,GAAG,IAAI,EAAE,KAAK,EAAE,KAAK,KAAK,GAAG,KAC/D,GAAG,CAAA;IACR,KAAK,EAAE,CACL,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,EACpD,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,cAAc,GAAG,IAAI,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,GAAG,KAChE,GAAG,CAAA;IACR,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,KAAK,CAAA;IACjC,SAAS,EAAE,CACT,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,KACjD,MAAM,GAAG,SAAS,CAAA;IACvB,SAAS,CAAC,EAAE,OAAO,CAAA;CACpB;AAED,eAAO,MAAM,OAAO,UAAW,aAAa,KAAG,qBA2C9C,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.js new file mode 100644 index 00000000000000..e8f486c0905957 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.js @@ -0,0 +1,38 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.optsArg = void 0; +const fs_1 = require("fs"); +const optsArg = (opts) => { + if (!opts) { + opts = { mode: 0o777 }; + } + else if (typeof opts === 'object') { + opts = { mode: 0o777, ...opts }; + } + else if (typeof opts === 'number') { + opts = { mode: opts }; + } + else if (typeof opts === 'string') { + opts = { mode: parseInt(opts, 8) }; + } + else { + throw new TypeError('invalid options argument'); + } + const resolved = opts; + const optsFs = opts.fs || {}; + opts.mkdir = opts.mkdir || optsFs.mkdir || fs_1.mkdir; + opts.mkdirAsync = opts.mkdirAsync + ? opts.mkdirAsync + : async (path, options) => { + return new Promise((res, rej) => resolved.mkdir(path, options, (er, made) => er ? rej(er) : res(made))); + }; + opts.stat = opts.stat || optsFs.stat || fs_1.stat; + opts.statAsync = opts.statAsync + ? opts.statAsync + : async (path) => new Promise((res, rej) => resolved.stat(path, (err, stats) => (err ? rej(err) : res(stats)))); + opts.statSync = opts.statSync || optsFs.statSync || fs_1.statSync; + opts.mkdirSync = opts.mkdirSync || optsFs.mkdirSync || fs_1.mkdirSync; + return resolved; +}; +exports.optsArg = optsArg; +//# sourceMappingURL=opts-arg.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.js.map new file mode 100644 index 00000000000000..fd5590f40f54cd --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/opts-arg.js.map @@ -0,0 +1 @@ +{"version":3,"file":"opts-arg.js","sourceRoot":"","sources":["../../../src/opts-arg.ts"],"names":[],"mappings":";;;AAAA,2BAOW;AAwDJ,MAAM,OAAO,GAAG,CAAC,IAAoB,EAAyB,EAAE;IACrE,IAAI,CAAC,IAAI,EAAE;QACT,IAAI,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,CAAA;KACvB;SAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE;QACnC,IAAI,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,IAAI,EAAE,CAAA;KAChC;SAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE;QACnC,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;KACtB;SAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE;QACnC,IAAI,GAAG,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,CAAA;KACnC;SAAM;QACL,MAAM,IAAI,SAAS,CAAC,0BAA0B,CAAC,CAAA;KAChD;IAED,MAAM,QAAQ,GAAG,IAA6B,CAAA;IAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,EAAE,IAAI,EAAE,CAAA;IAE5B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,IAAI,UAAK,CAAA;IAEhD,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU;QAC/B,CAAC,CAAC,IAAI,CAAC,UAAU;QACjB,CAAC,CAAC,KAAK,EACH,IAAY,EACZ,OAAuD,EAC1B,EAAE;YAC/B,OAAO,IAAI,OAAO,CAAqB,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAClD,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE,CACzC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CACzB,CACF,CAAA;QACH,CAAC,CAAA;IAEL,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,IAAI,SAAI,CAAA;IAC5C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS;QAC7B,CAAC,CAAC,IAAI,CAAC,SAAS;QAChB,CAAC,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE,CACrB,IAAI,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CACvB,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CACnE,CAAA;IAEP,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,MAAM,CAAC,QAAQ,IAAI,aAAQ,CAAA;IAC5D,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,MAAM,CAAC,SAAS,IAAI,cAAS,CAAA;IAEhE,OAAO,QAAQ,CAAA;AACjB,CAAC,CAAA;AA3CY,QAAA,OAAO,WA2CnB","sourcesContent":["import {\n MakeDirectoryOptions,\n mkdir,\n mkdirSync,\n stat,\n Stats,\n statSync,\n} from 'fs'\n\nexport interface FsProvider {\n stat?: (\n path: string,\n callback: (err: NodeJS.ErrnoException | null, stats: Stats) => any\n ) => any\n mkdir?: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean },\n callback: (err: NodeJS.ErrnoException | null, made?: string) => any\n ) => any\n statSync?: (path: string) => Stats\n mkdirSync?: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean }\n ) => string | undefined\n}\n\ninterface Options extends FsProvider {\n mode?: number | string\n fs?: FsProvider\n mkdirAsync?: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean }\n ) => Promise\n statAsync?: (path: string) => Promise\n}\n\nexport type MkdirpOptions = Options | number | string\n\nexport interface MkdirpOptionsResolved {\n mode: number\n fs: FsProvider\n mkdirAsync: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean }\n ) => Promise\n statAsync: (path: string) => Promise\n stat: (\n path: string,\n callback: (err: NodeJS.ErrnoException | null, stats: Stats) => any\n ) => any\n mkdir: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean },\n callback: (err: NodeJS.ErrnoException | null, made?: string) => any\n ) => any\n statSync: (path: string) => Stats\n mkdirSync: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean }\n ) => string | undefined\n recursive?: boolean\n}\n\nexport const optsArg = (opts?: MkdirpOptions): MkdirpOptionsResolved => {\n if (!opts) {\n opts = { mode: 0o777 }\n } else if (typeof opts === 'object') {\n opts = { mode: 0o777, ...opts }\n } else if (typeof opts === 'number') {\n opts = { mode: opts }\n } else if (typeof opts === 'string') {\n opts = { mode: parseInt(opts, 8) }\n } else {\n throw new TypeError('invalid options argument')\n }\n\n const resolved = opts as MkdirpOptionsResolved\n const optsFs = opts.fs || {}\n\n opts.mkdir = opts.mkdir || optsFs.mkdir || mkdir\n\n opts.mkdirAsync = opts.mkdirAsync\n ? opts.mkdirAsync\n : async (\n path: string,\n options: MakeDirectoryOptions & { recursive?: boolean }\n ): Promise => {\n return new Promise((res, rej) =>\n resolved.mkdir(path, options, (er, made) =>\n er ? rej(er) : res(made)\n )\n )\n }\n\n opts.stat = opts.stat || optsFs.stat || stat\n opts.statAsync = opts.statAsync\n ? opts.statAsync\n : async (path: string) =>\n new Promise((res, rej) =>\n resolved.stat(path, (err, stats) => (err ? rej(err) : res(stats)))\n )\n\n opts.statSync = opts.statSync || optsFs.statSync || statSync\n opts.mkdirSync = opts.mkdirSync || optsFs.mkdirSync || mkdirSync\n\n return resolved\n}\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.d.ts new file mode 100644 index 00000000000000..ad0ccfc482a485 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.d.ts @@ -0,0 +1,2 @@ +export declare const pathArg: (path: string) => string; +//# sourceMappingURL=path-arg.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.d.ts.map new file mode 100644 index 00000000000000..3b52b077c6c05c --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"path-arg.d.ts","sourceRoot":"","sources":["../../../src/path-arg.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,OAAO,SAAU,MAAM,WAyBnC,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.js new file mode 100644 index 00000000000000..a6b457f6e23d58 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.js @@ -0,0 +1,28 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.pathArg = void 0; +const platform = process.env.__TESTING_MKDIRP_PLATFORM__ || process.platform; +const path_1 = require("path"); +const pathArg = (path) => { + if (/\0/.test(path)) { + // simulate same failure that node raises + throw Object.assign(new TypeError('path must be a string without null bytes'), { + path, + code: 'ERR_INVALID_ARG_VALUE', + }); + } + path = (0, path_1.resolve)(path); + if (platform === 'win32') { + const badWinChars = /[*|"<>?:]/; + const { root } = (0, path_1.parse)(path); + if (badWinChars.test(path.substring(root.length))) { + throw Object.assign(new Error('Illegal characters in path.'), { + path, + code: 'EINVAL', + }); + } + } + return path; +}; +exports.pathArg = pathArg; +//# sourceMappingURL=path-arg.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.js.map new file mode 100644 index 00000000000000..ad3b5d38cad3cd --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/path-arg.js.map @@ -0,0 +1 @@ +{"version":3,"file":"path-arg.js","sourceRoot":"","sources":["../../../src/path-arg.ts"],"names":[],"mappings":";;;AAAA,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,OAAO,CAAC,QAAQ,CAAA;AAC5E,+BAAqC;AAC9B,MAAM,OAAO,GAAG,CAAC,IAAY,EAAE,EAAE;IACtC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;QACnB,yCAAyC;QACzC,MAAM,MAAM,CAAC,MAAM,CACjB,IAAI,SAAS,CAAC,0CAA0C,CAAC,EACzD;YACE,IAAI;YACJ,IAAI,EAAE,uBAAuB;SAC9B,CACF,CAAA;KACF;IAED,IAAI,GAAG,IAAA,cAAO,EAAC,IAAI,CAAC,CAAA;IACpB,IAAI,QAAQ,KAAK,OAAO,EAAE;QACxB,MAAM,WAAW,GAAG,WAAW,CAAA;QAC/B,MAAM,EAAE,IAAI,EAAE,GAAG,IAAA,YAAK,EAAC,IAAI,CAAC,CAAA;QAC5B,IAAI,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE;YACjD,MAAM,MAAM,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,6BAA6B,CAAC,EAAE;gBAC5D,IAAI;gBACJ,IAAI,EAAE,QAAQ;aACf,CAAC,CAAA;SACH;KACF;IAED,OAAO,IAAI,CAAA;AACb,CAAC,CAAA;AAzBY,QAAA,OAAO,WAyBnB","sourcesContent":["const platform = process.env.__TESTING_MKDIRP_PLATFORM__ || process.platform\nimport { parse, resolve } from 'path'\nexport const pathArg = (path: string) => {\n if (/\\0/.test(path)) {\n // simulate same failure that node raises\n throw Object.assign(\n new TypeError('path must be a string without null bytes'),\n {\n path,\n code: 'ERR_INVALID_ARG_VALUE',\n }\n )\n }\n\n path = resolve(path)\n if (platform === 'win32') {\n const badWinChars = /[*|\"<>?:]/\n const { root } = parse(path)\n if (badWinChars.test(path.substring(root.length))) {\n throw Object.assign(new Error('Illegal characters in path.'), {\n path,\n code: 'EINVAL',\n })\n }\n }\n\n return path\n}\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.d.ts new file mode 100644 index 00000000000000..1c6cb619e30405 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.d.ts @@ -0,0 +1,6 @@ +import { MkdirpOptions } from './opts-arg.js'; +export declare const useNativeSync: (opts?: MkdirpOptions) => boolean; +export declare const useNative: ((opts?: MkdirpOptions) => boolean) & { + sync: (opts?: MkdirpOptions) => boolean; +}; +//# sourceMappingURL=use-native.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.d.ts.map new file mode 100644 index 00000000000000..7dc275e322ea3b --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"use-native.d.ts","sourceRoot":"","sources":["../../../src/use-native.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAW,MAAM,eAAe,CAAA;AAMtD,eAAO,MAAM,aAAa,UAEd,aAAa,YAA0C,CAAA;AAEnE,eAAO,MAAM,SAAS,WAGR,aAAa;kBALf,aAAa;CASxB,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.js new file mode 100644 index 00000000000000..550b3452688ee5 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.js @@ -0,0 +1,17 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.useNative = exports.useNativeSync = void 0; +const fs_1 = require("fs"); +const opts_arg_js_1 = require("./opts-arg.js"); +const version = process.env.__TESTING_MKDIRP_NODE_VERSION__ || process.version; +const versArr = version.replace(/^v/, '').split('.'); +const hasNative = +versArr[0] > 10 || (+versArr[0] === 10 && +versArr[1] >= 12); +exports.useNativeSync = !hasNative + ? () => false + : (opts) => (0, opts_arg_js_1.optsArg)(opts).mkdirSync === fs_1.mkdirSync; +exports.useNative = Object.assign(!hasNative + ? () => false + : (opts) => (0, opts_arg_js_1.optsArg)(opts).mkdir === fs_1.mkdir, { + sync: exports.useNativeSync, +}); +//# sourceMappingURL=use-native.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.js.map new file mode 100644 index 00000000000000..9a15efebb9ec28 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/cjs/src/use-native.js.map @@ -0,0 +1 @@ +{"version":3,"file":"use-native.js","sourceRoot":"","sources":["../../../src/use-native.ts"],"names":[],"mappings":";;;AAAA,2BAAqC;AACrC,+CAAsD;AAEtD,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,OAAO,CAAC,OAAO,CAAA;AAC9E,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;AACpD,MAAM,SAAS,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;AAElE,QAAA,aAAa,GAAG,CAAC,SAAS;IACrC,CAAC,CAAC,GAAG,EAAE,CAAC,KAAK;IACb,CAAC,CAAC,CAAC,IAAoB,EAAE,EAAE,CAAC,IAAA,qBAAO,EAAC,IAAI,CAAC,CAAC,SAAS,KAAK,cAAS,CAAA;AAEtD,QAAA,SAAS,GAAG,MAAM,CAAC,MAAM,CACpC,CAAC,SAAS;IACR,CAAC,CAAC,GAAG,EAAE,CAAC,KAAK;IACb,CAAC,CAAC,CAAC,IAAoB,EAAE,EAAE,CAAC,IAAA,qBAAO,EAAC,IAAI,CAAC,CAAC,KAAK,KAAK,UAAK,EAC3D;IACE,IAAI,EAAE,qBAAa;CACpB,CACF,CAAA","sourcesContent":["import { mkdir, mkdirSync } from 'fs'\nimport { MkdirpOptions, optsArg } from './opts-arg.js'\n\nconst version = process.env.__TESTING_MKDIRP_NODE_VERSION__ || process.version\nconst versArr = version.replace(/^v/, '').split('.')\nconst hasNative = +versArr[0] > 10 || (+versArr[0] === 10 && +versArr[1] >= 12)\n\nexport const useNativeSync = !hasNative\n ? () => false\n : (opts?: MkdirpOptions) => optsArg(opts).mkdirSync === mkdirSync\n\nexport const useNative = Object.assign(\n !hasNative\n ? () => false\n : (opts?: MkdirpOptions) => optsArg(opts).mkdir === mkdir,\n {\n sync: useNativeSync,\n }\n)\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.d.ts new file mode 100644 index 00000000000000..e47794b3bb72a3 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.d.ts @@ -0,0 +1,4 @@ +import { MkdirpOptionsResolved } from './opts-arg.js'; +export declare const findMade: (opts: MkdirpOptionsResolved, parent: string, path?: string) => Promise; +export declare const findMadeSync: (opts: MkdirpOptionsResolved, parent: string, path?: string) => undefined | string; +//# sourceMappingURL=find-made.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.d.ts.map new file mode 100644 index 00000000000000..411aad1410eb7a --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"find-made.d.ts","sourceRoot":"","sources":["../../src/find-made.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,eAAe,CAAA;AAErD,eAAO,MAAM,QAAQ,SACb,qBAAqB,UACnB,MAAM,SACP,MAAM,KACZ,QAAQ,SAAS,GAAG,MAAM,CAe5B,CAAA;AAED,eAAO,MAAM,YAAY,SACjB,qBAAqB,UACnB,MAAM,SACP,MAAM,KACZ,SAAS,GAAG,MAad,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.js new file mode 100644 index 00000000000000..3e72fd59a2c1fb --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.js @@ -0,0 +1,30 @@ +import { dirname } from 'path'; +export const findMade = async (opts, parent, path) => { + // we never want the 'made' return value to be a root directory + if (path === parent) { + return; + } + return opts.statAsync(parent).then(st => (st.isDirectory() ? path : undefined), // will fail later + // will fail later + er => { + const fer = er; + return fer && fer.code === 'ENOENT' + ? findMade(opts, dirname(parent), parent) + : undefined; + }); +}; +export const findMadeSync = (opts, parent, path) => { + if (path === parent) { + return undefined; + } + try { + return opts.statSync(parent).isDirectory() ? path : undefined; + } + catch (er) { + const fer = er; + return fer && fer.code === 'ENOENT' + ? findMadeSync(opts, dirname(parent), parent) + : undefined; + } +}; +//# sourceMappingURL=find-made.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.js.map new file mode 100644 index 00000000000000..7b58089c6266c1 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/find-made.js.map @@ -0,0 +1 @@ +{"version":3,"file":"find-made.js","sourceRoot":"","sources":["../../src/find-made.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAA;AAG9B,MAAM,CAAC,MAAM,QAAQ,GAAG,KAAK,EAC3B,IAA2B,EAC3B,MAAc,EACd,IAAa,EACgB,EAAE;IAC/B,+DAA+D;IAC/D,IAAI,IAAI,KAAK,MAAM,EAAE;QACnB,OAAM;KACP;IAED,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,CAChC,EAAE,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,EAAE,kBAAkB;IAC/D,AAD6C,kBAAkB;IAC/D,EAAE,CAAC,EAAE;QACH,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;YACjC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;YACzC,CAAC,CAAC,SAAS,CAAA;IACf,CAAC,CACF,CAAA;AACH,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,CAC1B,IAA2B,EAC3B,MAAc,EACd,IAAa,EACO,EAAE;IACtB,IAAI,IAAI,KAAK,MAAM,EAAE;QACnB,OAAO,SAAS,CAAA;KACjB;IAED,IAAI;QACF,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAA;KAC9D;IAAC,OAAO,EAAE,EAAE;QACX,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;YACjC,CAAC,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;YAC7C,CAAC,CAAC,SAAS,CAAA;KACd;AACH,CAAC,CAAA","sourcesContent":["import { dirname } from 'path'\nimport { MkdirpOptionsResolved } from './opts-arg.js'\n\nexport const findMade = async (\n opts: MkdirpOptionsResolved,\n parent: string,\n path?: string\n): Promise => {\n // we never want the 'made' return value to be a root directory\n if (path === parent) {\n return\n }\n\n return opts.statAsync(parent).then(\n st => (st.isDirectory() ? path : undefined), // will fail later\n er => {\n const fer = er as NodeJS.ErrnoException\n return fer && fer.code === 'ENOENT'\n ? findMade(opts, dirname(parent), parent)\n : undefined\n }\n )\n}\n\nexport const findMadeSync = (\n opts: MkdirpOptionsResolved,\n parent: string,\n path?: string\n): undefined | string => {\n if (path === parent) {\n return undefined\n }\n\n try {\n return opts.statSync(parent).isDirectory() ? path : undefined\n } catch (er) {\n const fer = er as NodeJS.ErrnoException\n return fer && fer.code === 'ENOENT'\n ? findMadeSync(opts, dirname(parent), parent)\n : undefined\n }\n}\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.d.ts new file mode 100644 index 00000000000000..fc9e43b3a45de1 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.d.ts @@ -0,0 +1,39 @@ +import { MkdirpOptions } from './opts-arg.js'; +export { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'; +export { mkdirpNative, mkdirpNativeSync } from './mkdirp-native.js'; +export { useNative, useNativeSync } from './use-native.js'; +export declare const mkdirpSync: (path: string, opts?: MkdirpOptions) => string | void; +export declare const sync: (path: string, opts?: MkdirpOptions) => string | void; +export declare const manual: ((path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; +}; +export declare const manualSync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; +export declare const native: ((path: string, options?: MkdirpOptions | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; +}; +export declare const nativeSync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; +export declare const mkdirp: ((path: string, opts?: MkdirpOptions) => Promise) & { + mkdirpSync: (path: string, opts?: MkdirpOptions) => string | void; + mkdirpNative: ((path: string, options?: MkdirpOptions | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; + }; + mkdirpNativeSync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; + mkdirpManual: ((path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; + }; + mkdirpManualSync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; + sync: (path: string, opts?: MkdirpOptions) => string | void; + native: ((path: string, options?: MkdirpOptions | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; + }; + nativeSync: (path: string, options?: MkdirpOptions | undefined) => string | void | undefined; + manual: ((path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => Promise) & { + sync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; + }; + manualSync: (path: string, options?: MkdirpOptions | undefined, made?: string | void | undefined) => string | void | undefined; + useNative: ((opts?: MkdirpOptions | undefined) => boolean) & { + sync: (opts?: MkdirpOptions | undefined) => boolean; + }; + useNativeSync: (opts?: MkdirpOptions | undefined) => boolean; +}; +//# sourceMappingURL=index.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.d.ts.map new file mode 100644 index 00000000000000..cfcc78083857b1 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,aAAa,EAAW,MAAM,eAAe,CAAA;AAItD,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AACnE,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AACnE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAA;AAG1D,eAAO,MAAM,UAAU,SAAU,MAAM,SAAS,aAAa,kBAM5D,CAAA;AAED,eAAO,MAAM,IAAI,SARgB,MAAM,SAAS,aAAa,kBAQ/B,CAAA;AAC9B,eAAO,MAAM,MAAM;;CAAe,CAAA;AAClC,eAAO,MAAM,UAAU,oHAAmB,CAAA;AAC1C,eAAO,MAAM,MAAM;;CAAe,CAAA;AAClC,eAAO,MAAM,UAAU,kFAAmB,CAAA;AAC1C,eAAO,MAAM,MAAM,UACJ,MAAM,SAAS,aAAa;uBAdV,MAAM,SAAS,aAAa;;;;;;;;;iBAA5B,MAAM,SAAS,aAAa;;;;;;;;;;;;;CAoC5D,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.js new file mode 100644 index 00000000000000..0217ecc8cdd83d --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.js @@ -0,0 +1,43 @@ +import { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'; +import { mkdirpNative, mkdirpNativeSync } from './mkdirp-native.js'; +import { optsArg } from './opts-arg.js'; +import { pathArg } from './path-arg.js'; +import { useNative, useNativeSync } from './use-native.js'; +/* c8 ignore start */ +export { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'; +export { mkdirpNative, mkdirpNativeSync } from './mkdirp-native.js'; +export { useNative, useNativeSync } from './use-native.js'; +/* c8 ignore stop */ +export const mkdirpSync = (path, opts) => { + path = pathArg(path); + const resolved = optsArg(opts); + return useNativeSync(resolved) + ? mkdirpNativeSync(path, resolved) + : mkdirpManualSync(path, resolved); +}; +export const sync = mkdirpSync; +export const manual = mkdirpManual; +export const manualSync = mkdirpManualSync; +export const native = mkdirpNative; +export const nativeSync = mkdirpNativeSync; +export const mkdirp = Object.assign(async (path, opts) => { + path = pathArg(path); + const resolved = optsArg(opts); + return useNative(resolved) + ? mkdirpNative(path, resolved) + : mkdirpManual(path, resolved); +}, { + mkdirpSync, + mkdirpNative, + mkdirpNativeSync, + mkdirpManual, + mkdirpManualSync, + sync: mkdirpSync, + native: mkdirpNative, + nativeSync: mkdirpNativeSync, + manual: mkdirpManual, + manualSync: mkdirpManualSync, + useNative, + useNativeSync, +}); +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.js.map new file mode 100644 index 00000000000000..47a8133a070c8f --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/index.js.map @@ -0,0 +1 @@ +{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AACnE,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AACnE,OAAO,EAAiB,OAAO,EAAE,MAAM,eAAe,CAAA;AACtD,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAA;AACvC,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAA;AAC1D,qBAAqB;AACrB,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AACnE,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AACnE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAA;AAC1D,oBAAoB;AAEpB,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,IAAY,EAAE,IAAoB,EAAE,EAAE;IAC/D,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC9B,OAAO,aAAa,CAAC,QAAQ,CAAC;QAC5B,CAAC,CAAC,gBAAgB,CAAC,IAAI,EAAE,QAAQ,CAAC;QAClC,CAAC,CAAC,gBAAgB,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AACtC,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,IAAI,GAAG,UAAU,CAAA;AAC9B,MAAM,CAAC,MAAM,MAAM,GAAG,YAAY,CAAA;AAClC,MAAM,CAAC,MAAM,UAAU,GAAG,gBAAgB,CAAA;AAC1C,MAAM,CAAC,MAAM,MAAM,GAAG,YAAY,CAAA;AAClC,MAAM,CAAC,MAAM,UAAU,GAAG,gBAAgB,CAAA;AAC1C,MAAM,CAAC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CACjC,KAAK,EAAE,IAAY,EAAE,IAAoB,EAAE,EAAE;IAC3C,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC9B,OAAO,SAAS,CAAC,QAAQ,CAAC;QACxB,CAAC,CAAC,YAAY,CAAC,IAAI,EAAE,QAAQ,CAAC;QAC9B,CAAC,CAAC,YAAY,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AAClC,CAAC,EACD;IACE,UAAU;IACV,YAAY;IACZ,gBAAgB;IAChB,YAAY;IACZ,gBAAgB;IAEhB,IAAI,EAAE,UAAU;IAChB,MAAM,EAAE,YAAY;IACpB,UAAU,EAAE,gBAAgB;IAC5B,MAAM,EAAE,YAAY;IACpB,UAAU,EAAE,gBAAgB;IAC5B,SAAS;IACT,aAAa;CACd,CACF,CAAA","sourcesContent":["import { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'\nimport { mkdirpNative, mkdirpNativeSync } from './mkdirp-native.js'\nimport { MkdirpOptions, optsArg } from './opts-arg.js'\nimport { pathArg } from './path-arg.js'\nimport { useNative, useNativeSync } from './use-native.js'\n/* c8 ignore start */\nexport { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'\nexport { mkdirpNative, mkdirpNativeSync } from './mkdirp-native.js'\nexport { useNative, useNativeSync } from './use-native.js'\n/* c8 ignore stop */\n\nexport const mkdirpSync = (path: string, opts?: MkdirpOptions) => {\n path = pathArg(path)\n const resolved = optsArg(opts)\n return useNativeSync(resolved)\n ? mkdirpNativeSync(path, resolved)\n : mkdirpManualSync(path, resolved)\n}\n\nexport const sync = mkdirpSync\nexport const manual = mkdirpManual\nexport const manualSync = mkdirpManualSync\nexport const native = mkdirpNative\nexport const nativeSync = mkdirpNativeSync\nexport const mkdirp = Object.assign(\n async (path: string, opts?: MkdirpOptions) => {\n path = pathArg(path)\n const resolved = optsArg(opts)\n return useNative(resolved)\n ? mkdirpNative(path, resolved)\n : mkdirpManual(path, resolved)\n },\n {\n mkdirpSync,\n mkdirpNative,\n mkdirpNativeSync,\n mkdirpManual,\n mkdirpManualSync,\n\n sync: mkdirpSync,\n native: mkdirpNative,\n nativeSync: mkdirpNativeSync,\n manual: mkdirpManual,\n manualSync: mkdirpManualSync,\n useNative,\n useNativeSync,\n }\n)\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.d.ts new file mode 100644 index 00000000000000..e49cdf9f1bd122 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.d.ts @@ -0,0 +1,6 @@ +import { MkdirpOptions } from './opts-arg.js'; +export declare const mkdirpManualSync: (path: string, options?: MkdirpOptions, made?: string | undefined | void) => string | undefined | void; +export declare const mkdirpManual: ((path: string, options?: MkdirpOptions, made?: string | undefined | void) => Promise) & { + sync: (path: string, options?: MkdirpOptions, made?: string | undefined | void) => string | undefined | void; +}; +//# sourceMappingURL=mkdirp-manual.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.d.ts.map new file mode 100644 index 00000000000000..ae7f243d3ca78b --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"mkdirp-manual.d.ts","sourceRoot":"","sources":["../../src/mkdirp-manual.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAW,MAAM,eAAe,CAAA;AAEtD,eAAO,MAAM,gBAAgB,SACrB,MAAM,YACF,aAAa,SAChB,MAAM,GAAG,SAAS,GAAG,IAAI,KAC/B,MAAM,GAAG,SAAS,GAAG,IAmCvB,CAAA;AAED,eAAO,MAAM,YAAY,UAEf,MAAM,YACF,aAAa,SAChB,MAAM,GAAG,SAAS,GAAG,IAAI,KAC/B,QAAQ,MAAM,GAAG,SAAS,GAAG,IAAI,CAAC;iBA7C/B,MAAM,YACF,aAAa,SAChB,MAAM,GAAG,SAAS,GAAG,IAAI,KAC/B,MAAM,GAAG,SAAS,GAAG,IAAI;CAqF3B,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.js new file mode 100644 index 00000000000000..a4d044e02d3bfc --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.js @@ -0,0 +1,75 @@ +import { dirname } from 'path'; +import { optsArg } from './opts-arg.js'; +export const mkdirpManualSync = (path, options, made) => { + const parent = dirname(path); + const opts = { ...optsArg(options), recursive: false }; + if (parent === path) { + try { + return opts.mkdirSync(path, opts); + } + catch (er) { + // swallowed by recursive implementation on posix systems + // any other error is a failure + const fer = er; + if (fer && fer.code !== 'EISDIR') { + throw er; + } + return; + } + } + try { + opts.mkdirSync(path, opts); + return made || path; + } + catch (er) { + const fer = er; + if (fer && fer.code === 'ENOENT') { + return mkdirpManualSync(path, opts, mkdirpManualSync(parent, opts, made)); + } + if (fer && fer.code !== 'EEXIST' && fer && fer.code !== 'EROFS') { + throw er; + } + try { + if (!opts.statSync(path).isDirectory()) + throw er; + } + catch (_) { + throw er; + } + } +}; +export const mkdirpManual = Object.assign(async (path, options, made) => { + const opts = optsArg(options); + opts.recursive = false; + const parent = dirname(path); + if (parent === path) { + return opts.mkdirAsync(path, opts).catch(er => { + // swallowed by recursive implementation on posix systems + // any other error is a failure + const fer = er; + if (fer && fer.code !== 'EISDIR') { + throw er; + } + }); + } + return opts.mkdirAsync(path, opts).then(() => made || path, async (er) => { + const fer = er; + if (fer && fer.code === 'ENOENT') { + return mkdirpManual(parent, opts).then((made) => mkdirpManual(path, opts, made)); + } + if (fer && fer.code !== 'EEXIST' && fer.code !== 'EROFS') { + throw er; + } + return opts.statAsync(path).then(st => { + if (st.isDirectory()) { + return made; + } + else { + throw er; + } + }, () => { + throw er; + }); + }); +}, { sync: mkdirpManualSync }); +//# sourceMappingURL=mkdirp-manual.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.js.map new file mode 100644 index 00000000000000..29eab250e126c8 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-manual.js.map @@ -0,0 +1 @@ +{"version":3,"file":"mkdirp-manual.js","sourceRoot":"","sources":["../../src/mkdirp-manual.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAA;AAC9B,OAAO,EAAiB,OAAO,EAAE,MAAM,eAAe,CAAA;AAEtD,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAC9B,IAAY,EACZ,OAAuB,EACvB,IAAgC,EACL,EAAE;IAC7B,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC5B,MAAM,IAAI,GAAG,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,CAAA;IAEtD,IAAI,MAAM,KAAK,IAAI,EAAE;QACnB,IAAI;YACF,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;SAClC;QAAC,OAAO,EAAE,EAAE;YACX,yDAAyD;YACzD,+BAA+B;YAC/B,MAAM,GAAG,GAAG,EAA2B,CAAA;YACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;gBAChC,MAAM,EAAE,CAAA;aACT;YACD,OAAM;SACP;KACF;IAED,IAAI;QACF,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QAC1B,OAAO,IAAI,IAAI,IAAI,CAAA;KACpB;IAAC,OAAO,EAAE,EAAE;QACX,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;YAChC,OAAO,gBAAgB,CAAC,IAAI,EAAE,IAAI,EAAE,gBAAgB,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;SAC1E;QACD,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE;YAC/D,MAAM,EAAE,CAAA;SACT;QACD,IAAI;YACF,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE;gBAAE,MAAM,EAAE,CAAA;SACjD;QAAC,OAAO,CAAC,EAAE;YACV,MAAM,EAAE,CAAA;SACT;KACF;AACH,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CACvC,KAAK,EACH,IAAY,EACZ,OAAuB,EACvB,IAAgC,EACI,EAAE;IACtC,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,CAAA;IAC7B,IAAI,CAAC,SAAS,GAAG,KAAK,CAAA;IACtB,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC5B,IAAI,MAAM,KAAK,IAAI,EAAE;QACnB,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE;YAC5C,yDAAyD;YACzD,+BAA+B;YAC/B,MAAM,GAAG,GAAG,EAA2B,CAAA;YACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;gBAChC,MAAM,EAAE,CAAA;aACT;QACH,CAAC,CAAC,CAAA;KACH;IAED,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,CACrC,GAAG,EAAE,CAAC,IAAI,IAAI,IAAI,EAClB,KAAK,EAAC,EAAE,EAAC,EAAE;QACT,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;YAChC,OAAO,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,IAAI,CACpC,CAAC,IAAgC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CACrE,CAAA;SACF;QACD,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE;YACxD,MAAM,EAAE,CAAA;SACT;QACD,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,IAAI,CAC9B,EAAE,CAAC,EAAE;YACH,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE;gBACpB,OAAO,IAAI,CAAA;aACZ;iBAAM;gBACL,MAAM,EAAE,CAAA;aACT;QACH,CAAC,EACD,GAAG,EAAE;YACH,MAAM,EAAE,CAAA;QACV,CAAC,CACF,CAAA;IACH,CAAC,CACF,CAAA;AACH,CAAC,EACD,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAC3B,CAAA","sourcesContent":["import { dirname } from 'path'\nimport { MkdirpOptions, optsArg } from './opts-arg.js'\n\nexport const mkdirpManualSync = (\n path: string,\n options?: MkdirpOptions,\n made?: string | undefined | void\n): string | undefined | void => {\n const parent = dirname(path)\n const opts = { ...optsArg(options), recursive: false }\n\n if (parent === path) {\n try {\n return opts.mkdirSync(path, opts)\n } catch (er) {\n // swallowed by recursive implementation on posix systems\n // any other error is a failure\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code !== 'EISDIR') {\n throw er\n }\n return\n }\n }\n\n try {\n opts.mkdirSync(path, opts)\n return made || path\n } catch (er) {\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code === 'ENOENT') {\n return mkdirpManualSync(path, opts, mkdirpManualSync(parent, opts, made))\n }\n if (fer && fer.code !== 'EEXIST' && fer && fer.code !== 'EROFS') {\n throw er\n }\n try {\n if (!opts.statSync(path).isDirectory()) throw er\n } catch (_) {\n throw er\n }\n }\n}\n\nexport const mkdirpManual = Object.assign(\n async (\n path: string,\n options?: MkdirpOptions,\n made?: string | undefined | void\n ): Promise => {\n const opts = optsArg(options)\n opts.recursive = false\n const parent = dirname(path)\n if (parent === path) {\n return opts.mkdirAsync(path, opts).catch(er => {\n // swallowed by recursive implementation on posix systems\n // any other error is a failure\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code !== 'EISDIR') {\n throw er\n }\n })\n }\n\n return opts.mkdirAsync(path, opts).then(\n () => made || path,\n async er => {\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code === 'ENOENT') {\n return mkdirpManual(parent, opts).then(\n (made?: string | undefined | void) => mkdirpManual(path, opts, made)\n )\n }\n if (fer && fer.code !== 'EEXIST' && fer.code !== 'EROFS') {\n throw er\n }\n return opts.statAsync(path).then(\n st => {\n if (st.isDirectory()) {\n return made\n } else {\n throw er\n }\n },\n () => {\n throw er\n }\n )\n }\n )\n },\n { sync: mkdirpManualSync }\n)\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.d.ts new file mode 100644 index 00000000000000..28b64814b2545a --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.d.ts @@ -0,0 +1,6 @@ +import { MkdirpOptions } from './opts-arg.js'; +export declare const mkdirpNativeSync: (path: string, options?: MkdirpOptions) => string | void | undefined; +export declare const mkdirpNative: ((path: string, options?: MkdirpOptions) => Promise) & { + sync: (path: string, options?: MkdirpOptions) => string | void | undefined; +}; +//# sourceMappingURL=mkdirp-native.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.d.ts.map new file mode 100644 index 00000000000000..517dfabe7d1213 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"mkdirp-native.d.ts","sourceRoot":"","sources":["../../src/mkdirp-native.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,aAAa,EAAW,MAAM,eAAe,CAAA;AAEtD,eAAO,MAAM,gBAAgB,SACrB,MAAM,YACF,aAAa,KACtB,MAAM,GAAG,IAAI,GAAG,SAoBlB,CAAA;AAED,eAAO,MAAM,YAAY,UAEf,MAAM,YACF,aAAa,KACtB,QAAQ,MAAM,GAAG,IAAI,GAAG,SAAS,CAAC;iBA5B/B,MAAM,YACF,aAAa,KACtB,MAAM,GAAG,IAAI,GAAG,SAAS;CAgD3B,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.js new file mode 100644 index 00000000000000..99d10a5425dade --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.js @@ -0,0 +1,46 @@ +import { dirname } from 'path'; +import { findMade, findMadeSync } from './find-made.js'; +import { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'; +import { optsArg } from './opts-arg.js'; +export const mkdirpNativeSync = (path, options) => { + const opts = optsArg(options); + opts.recursive = true; + const parent = dirname(path); + if (parent === path) { + return opts.mkdirSync(path, opts); + } + const made = findMadeSync(opts, path); + try { + opts.mkdirSync(path, opts); + return made; + } + catch (er) { + const fer = er; + if (fer && fer.code === 'ENOENT') { + return mkdirpManualSync(path, opts); + } + else { + throw er; + } + } +}; +export const mkdirpNative = Object.assign(async (path, options) => { + const opts = { ...optsArg(options), recursive: true }; + const parent = dirname(path); + if (parent === path) { + return await opts.mkdirAsync(path, opts); + } + return findMade(opts, path).then((made) => opts + .mkdirAsync(path, opts) + .then(m => made || m) + .catch(er => { + const fer = er; + if (fer && fer.code === 'ENOENT') { + return mkdirpManual(path, opts); + } + else { + throw er; + } + })); +}, { sync: mkdirpNativeSync }); +//# sourceMappingURL=mkdirp-native.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.js.map new file mode 100644 index 00000000000000..27de32d9436d67 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/mkdirp-native.js.map @@ -0,0 +1 @@ +{"version":3,"file":"mkdirp-native.js","sourceRoot":"","sources":["../../src/mkdirp-native.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAA;AAC9B,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AACvD,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AACnE,OAAO,EAAiB,OAAO,EAAE,MAAM,eAAe,CAAA;AAEtD,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAC9B,IAAY,EACZ,OAAuB,EACI,EAAE;IAC7B,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,CAAA;IAC7B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAA;IACrB,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC5B,IAAI,MAAM,KAAK,IAAI,EAAE;QACnB,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;KAClC;IAED,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IACrC,IAAI;QACF,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QAC1B,OAAO,IAAI,CAAA;KACZ;IAAC,OAAO,EAAE,EAAE;QACX,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;YAChC,OAAO,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;SACpC;aAAM;YACL,MAAM,EAAE,CAAA;SACT;KACF;AACH,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CACvC,KAAK,EACH,IAAY,EACZ,OAAuB,EACa,EAAE;IACtC,MAAM,IAAI,GAAG,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;IACrD,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC5B,IAAI,MAAM,KAAK,IAAI,EAAE;QACnB,OAAO,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;KACzC;IAED,OAAO,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,IAAyB,EAAE,EAAE,CAC7D,IAAI;SACD,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC;SACtB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC,CAAC;SACpB,KAAK,CAAC,EAAE,CAAC,EAAE;QACV,MAAM,GAAG,GAAG,EAA2B,CAAA;QACvC,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE;YAChC,OAAO,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;SAChC;aAAM;YACL,MAAM,EAAE,CAAA;SACT;IACH,CAAC,CAAC,CACL,CAAA;AACH,CAAC,EACD,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAC3B,CAAA","sourcesContent":["import { dirname } from 'path'\nimport { findMade, findMadeSync } from './find-made.js'\nimport { mkdirpManual, mkdirpManualSync } from './mkdirp-manual.js'\nimport { MkdirpOptions, optsArg } from './opts-arg.js'\n\nexport const mkdirpNativeSync = (\n path: string,\n options?: MkdirpOptions\n): string | void | undefined => {\n const opts = optsArg(options)\n opts.recursive = true\n const parent = dirname(path)\n if (parent === path) {\n return opts.mkdirSync(path, opts)\n }\n\n const made = findMadeSync(opts, path)\n try {\n opts.mkdirSync(path, opts)\n return made\n } catch (er) {\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code === 'ENOENT') {\n return mkdirpManualSync(path, opts)\n } else {\n throw er\n }\n }\n}\n\nexport const mkdirpNative = Object.assign(\n async (\n path: string,\n options?: MkdirpOptions\n ): Promise => {\n const opts = { ...optsArg(options), recursive: true }\n const parent = dirname(path)\n if (parent === path) {\n return await opts.mkdirAsync(path, opts)\n }\n\n return findMade(opts, path).then((made?: string | undefined) =>\n opts\n .mkdirAsync(path, opts)\n .then(m => made || m)\n .catch(er => {\n const fer = er as NodeJS.ErrnoException\n if (fer && fer.code === 'ENOENT') {\n return mkdirpManual(path, opts)\n } else {\n throw er\n }\n })\n )\n },\n { sync: mkdirpNativeSync }\n)\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.d.ts new file mode 100644 index 00000000000000..73d076b3b6923c --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.d.ts @@ -0,0 +1,42 @@ +/// +/// +import { MakeDirectoryOptions, Stats } from 'fs'; +export interface FsProvider { + stat?: (path: string, callback: (err: NodeJS.ErrnoException | null, stats: Stats) => any) => any; + mkdir?: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }, callback: (err: NodeJS.ErrnoException | null, made?: string) => any) => any; + statSync?: (path: string) => Stats; + mkdirSync?: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }) => string | undefined; +} +interface Options extends FsProvider { + mode?: number | string; + fs?: FsProvider; + mkdirAsync?: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }) => Promise; + statAsync?: (path: string) => Promise; +} +export type MkdirpOptions = Options | number | string; +export interface MkdirpOptionsResolved { + mode: number; + fs: FsProvider; + mkdirAsync: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }) => Promise; + statAsync: (path: string) => Promise; + stat: (path: string, callback: (err: NodeJS.ErrnoException | null, stats: Stats) => any) => any; + mkdir: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }, callback: (err: NodeJS.ErrnoException | null, made?: string) => any) => any; + statSync: (path: string) => Stats; + mkdirSync: (path: string, opts: MakeDirectoryOptions & { + recursive?: boolean; + }) => string | undefined; + recursive?: boolean; +} +export declare const optsArg: (opts?: MkdirpOptions) => MkdirpOptionsResolved; +export {}; +//# sourceMappingURL=opts-arg.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.d.ts.map new file mode 100644 index 00000000000000..717deb5f9cb0c6 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"opts-arg.d.ts","sourceRoot":"","sources":["../../src/opts-arg.ts"],"names":[],"mappings":";;AAAA,OAAO,EACL,oBAAoB,EAIpB,KAAK,EAEN,MAAM,IAAI,CAAA;AAEX,MAAM,WAAW,UAAU;IACzB,IAAI,CAAC,EAAE,CACL,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,cAAc,GAAG,IAAI,EAAE,KAAK,EAAE,KAAK,KAAK,GAAG,KAC/D,GAAG,CAAA;IACR,KAAK,CAAC,EAAE,CACN,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,EACpD,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,cAAc,GAAG,IAAI,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,GAAG,KAChE,GAAG,CAAA;IACR,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,KAAK,CAAA;IAClC,SAAS,CAAC,EAAE,CACV,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,KACjD,MAAM,GAAG,SAAS,CAAA;CACxB;AAED,UAAU,OAAQ,SAAQ,UAAU;IAClC,IAAI,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;IACtB,EAAE,CAAC,EAAE,UAAU,CAAA;IACf,UAAU,CAAC,EAAE,CACX,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,KACjD,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAA;IAChC,SAAS,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,KAAK,CAAC,CAAA;CAC7C;AAED,MAAM,MAAM,aAAa,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,CAAA;AAErD,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAA;IACZ,EAAE,EAAE,UAAU,CAAA;IACd,UAAU,EAAE,CACV,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,KACjD,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAA;IAChC,SAAS,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,KAAK,CAAC,CAAA;IAC3C,IAAI,EAAE,CACJ,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,cAAc,GAAG,IAAI,EAAE,KAAK,EAAE,KAAK,KAAK,GAAG,KAC/D,GAAG,CAAA;IACR,KAAK,EAAE,CACL,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,EACpD,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,cAAc,GAAG,IAAI,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,GAAG,KAChE,GAAG,CAAA;IACR,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,KAAK,CAAA;IACjC,SAAS,EAAE,CACT,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,GAAG;QAAE,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,KACjD,MAAM,GAAG,SAAS,CAAA;IACvB,SAAS,CAAC,EAAE,OAAO,CAAA;CACpB;AAED,eAAO,MAAM,OAAO,UAAW,aAAa,KAAG,qBA2C9C,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.js new file mode 100644 index 00000000000000..d47e2927fee4c0 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.js @@ -0,0 +1,34 @@ +import { mkdir, mkdirSync, stat, statSync, } from 'fs'; +export const optsArg = (opts) => { + if (!opts) { + opts = { mode: 0o777 }; + } + else if (typeof opts === 'object') { + opts = { mode: 0o777, ...opts }; + } + else if (typeof opts === 'number') { + opts = { mode: opts }; + } + else if (typeof opts === 'string') { + opts = { mode: parseInt(opts, 8) }; + } + else { + throw new TypeError('invalid options argument'); + } + const resolved = opts; + const optsFs = opts.fs || {}; + opts.mkdir = opts.mkdir || optsFs.mkdir || mkdir; + opts.mkdirAsync = opts.mkdirAsync + ? opts.mkdirAsync + : async (path, options) => { + return new Promise((res, rej) => resolved.mkdir(path, options, (er, made) => er ? rej(er) : res(made))); + }; + opts.stat = opts.stat || optsFs.stat || stat; + opts.statAsync = opts.statAsync + ? opts.statAsync + : async (path) => new Promise((res, rej) => resolved.stat(path, (err, stats) => (err ? rej(err) : res(stats)))); + opts.statSync = opts.statSync || optsFs.statSync || statSync; + opts.mkdirSync = opts.mkdirSync || optsFs.mkdirSync || mkdirSync; + return resolved; +}; +//# sourceMappingURL=opts-arg.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.js.map new file mode 100644 index 00000000000000..663286dc7212ed --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/opts-arg.js.map @@ -0,0 +1 @@ +{"version":3,"file":"opts-arg.js","sourceRoot":"","sources":["../../src/opts-arg.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,EACL,SAAS,EACT,IAAI,EAEJ,QAAQ,GACT,MAAM,IAAI,CAAA;AAwDX,MAAM,CAAC,MAAM,OAAO,GAAG,CAAC,IAAoB,EAAyB,EAAE;IACrE,IAAI,CAAC,IAAI,EAAE;QACT,IAAI,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,CAAA;KACvB;SAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE;QACnC,IAAI,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,IAAI,EAAE,CAAA;KAChC;SAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE;QACnC,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;KACtB;SAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE;QACnC,IAAI,GAAG,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,CAAA;KACnC;SAAM;QACL,MAAM,IAAI,SAAS,CAAC,0BAA0B,CAAC,CAAA;KAChD;IAED,MAAM,QAAQ,GAAG,IAA6B,CAAA;IAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,EAAE,IAAI,EAAE,CAAA;IAE5B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,IAAI,KAAK,CAAA;IAEhD,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU;QAC/B,CAAC,CAAC,IAAI,CAAC,UAAU;QACjB,CAAC,CAAC,KAAK,EACH,IAAY,EACZ,OAAuD,EAC1B,EAAE;YAC/B,OAAO,IAAI,OAAO,CAAqB,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAClD,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE,CACzC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CACzB,CACF,CAAA;QACH,CAAC,CAAA;IAEL,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,IAAI,IAAI,CAAA;IAC5C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS;QAC7B,CAAC,CAAC,IAAI,CAAC,SAAS;QAChB,CAAC,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE,CACrB,IAAI,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CACvB,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CACnE,CAAA;IAEP,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,MAAM,CAAC,QAAQ,IAAI,QAAQ,CAAA;IAC5D,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,MAAM,CAAC,SAAS,IAAI,SAAS,CAAA;IAEhE,OAAO,QAAQ,CAAA;AACjB,CAAC,CAAA","sourcesContent":["import {\n MakeDirectoryOptions,\n mkdir,\n mkdirSync,\n stat,\n Stats,\n statSync,\n} from 'fs'\n\nexport interface FsProvider {\n stat?: (\n path: string,\n callback: (err: NodeJS.ErrnoException | null, stats: Stats) => any\n ) => any\n mkdir?: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean },\n callback: (err: NodeJS.ErrnoException | null, made?: string) => any\n ) => any\n statSync?: (path: string) => Stats\n mkdirSync?: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean }\n ) => string | undefined\n}\n\ninterface Options extends FsProvider {\n mode?: number | string\n fs?: FsProvider\n mkdirAsync?: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean }\n ) => Promise\n statAsync?: (path: string) => Promise\n}\n\nexport type MkdirpOptions = Options | number | string\n\nexport interface MkdirpOptionsResolved {\n mode: number\n fs: FsProvider\n mkdirAsync: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean }\n ) => Promise\n statAsync: (path: string) => Promise\n stat: (\n path: string,\n callback: (err: NodeJS.ErrnoException | null, stats: Stats) => any\n ) => any\n mkdir: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean },\n callback: (err: NodeJS.ErrnoException | null, made?: string) => any\n ) => any\n statSync: (path: string) => Stats\n mkdirSync: (\n path: string,\n opts: MakeDirectoryOptions & { recursive?: boolean }\n ) => string | undefined\n recursive?: boolean\n}\n\nexport const optsArg = (opts?: MkdirpOptions): MkdirpOptionsResolved => {\n if (!opts) {\n opts = { mode: 0o777 }\n } else if (typeof opts === 'object') {\n opts = { mode: 0o777, ...opts }\n } else if (typeof opts === 'number') {\n opts = { mode: opts }\n } else if (typeof opts === 'string') {\n opts = { mode: parseInt(opts, 8) }\n } else {\n throw new TypeError('invalid options argument')\n }\n\n const resolved = opts as MkdirpOptionsResolved\n const optsFs = opts.fs || {}\n\n opts.mkdir = opts.mkdir || optsFs.mkdir || mkdir\n\n opts.mkdirAsync = opts.mkdirAsync\n ? opts.mkdirAsync\n : async (\n path: string,\n options: MakeDirectoryOptions & { recursive?: boolean }\n ): Promise => {\n return new Promise((res, rej) =>\n resolved.mkdir(path, options, (er, made) =>\n er ? rej(er) : res(made)\n )\n )\n }\n\n opts.stat = opts.stat || optsFs.stat || stat\n opts.statAsync = opts.statAsync\n ? opts.statAsync\n : async (path: string) =>\n new Promise((res, rej) =>\n resolved.stat(path, (err, stats) => (err ? rej(err) : res(stats)))\n )\n\n opts.statSync = opts.statSync || optsFs.statSync || statSync\n opts.mkdirSync = opts.mkdirSync || optsFs.mkdirSync || mkdirSync\n\n return resolved\n}\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/package.json b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/package.json new file mode 100644 index 00000000000000..3dbc1ca591c055 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/package.json @@ -0,0 +1,3 @@ +{ + "type": "module" +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.d.ts new file mode 100644 index 00000000000000..ad0ccfc482a485 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.d.ts @@ -0,0 +1,2 @@ +export declare const pathArg: (path: string) => string; +//# sourceMappingURL=path-arg.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.d.ts.map new file mode 100644 index 00000000000000..801799e766fabc --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"path-arg.d.ts","sourceRoot":"","sources":["../../src/path-arg.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,OAAO,SAAU,MAAM,WAyBnC,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.js new file mode 100644 index 00000000000000..03539cc5a94f98 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.js @@ -0,0 +1,24 @@ +const platform = process.env.__TESTING_MKDIRP_PLATFORM__ || process.platform; +import { parse, resolve } from 'path'; +export const pathArg = (path) => { + if (/\0/.test(path)) { + // simulate same failure that node raises + throw Object.assign(new TypeError('path must be a string without null bytes'), { + path, + code: 'ERR_INVALID_ARG_VALUE', + }); + } + path = resolve(path); + if (platform === 'win32') { + const badWinChars = /[*|"<>?:]/; + const { root } = parse(path); + if (badWinChars.test(path.substring(root.length))) { + throw Object.assign(new Error('Illegal characters in path.'), { + path, + code: 'EINVAL', + }); + } + } + return path; +}; +//# sourceMappingURL=path-arg.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.js.map new file mode 100644 index 00000000000000..43efe1e3a9976f --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/path-arg.js.map @@ -0,0 +1 @@ +{"version":3,"file":"path-arg.js","sourceRoot":"","sources":["../../src/path-arg.ts"],"names":[],"mappings":"AAAA,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,OAAO,CAAC,QAAQ,CAAA;AAC5E,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,MAAM,CAAA;AACrC,MAAM,CAAC,MAAM,OAAO,GAAG,CAAC,IAAY,EAAE,EAAE;IACtC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;QACnB,yCAAyC;QACzC,MAAM,MAAM,CAAC,MAAM,CACjB,IAAI,SAAS,CAAC,0CAA0C,CAAC,EACzD;YACE,IAAI;YACJ,IAAI,EAAE,uBAAuB;SAC9B,CACF,CAAA;KACF;IAED,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IACpB,IAAI,QAAQ,KAAK,OAAO,EAAE;QACxB,MAAM,WAAW,GAAG,WAAW,CAAA;QAC/B,MAAM,EAAE,IAAI,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,CAAA;QAC5B,IAAI,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE;YACjD,MAAM,MAAM,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,6BAA6B,CAAC,EAAE;gBAC5D,IAAI;gBACJ,IAAI,EAAE,QAAQ;aACf,CAAC,CAAA;SACH;KACF;IAED,OAAO,IAAI,CAAA;AACb,CAAC,CAAA","sourcesContent":["const platform = process.env.__TESTING_MKDIRP_PLATFORM__ || process.platform\nimport { parse, resolve } from 'path'\nexport const pathArg = (path: string) => {\n if (/\\0/.test(path)) {\n // simulate same failure that node raises\n throw Object.assign(\n new TypeError('path must be a string without null bytes'),\n {\n path,\n code: 'ERR_INVALID_ARG_VALUE',\n }\n )\n }\n\n path = resolve(path)\n if (platform === 'win32') {\n const badWinChars = /[*|\"<>?:]/\n const { root } = parse(path)\n if (badWinChars.test(path.substring(root.length))) {\n throw Object.assign(new Error('Illegal characters in path.'), {\n path,\n code: 'EINVAL',\n })\n }\n }\n\n return path\n}\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.d.ts b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.d.ts new file mode 100644 index 00000000000000..1c6cb619e30405 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.d.ts @@ -0,0 +1,6 @@ +import { MkdirpOptions } from './opts-arg.js'; +export declare const useNativeSync: (opts?: MkdirpOptions) => boolean; +export declare const useNative: ((opts?: MkdirpOptions) => boolean) & { + sync: (opts?: MkdirpOptions) => boolean; +}; +//# sourceMappingURL=use-native.d.ts.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.d.ts.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.d.ts.map new file mode 100644 index 00000000000000..e2484228a04472 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"use-native.d.ts","sourceRoot":"","sources":["../../src/use-native.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAW,MAAM,eAAe,CAAA;AAMtD,eAAO,MAAM,aAAa,UAEd,aAAa,YAA0C,CAAA;AAEnE,eAAO,MAAM,SAAS,WAGR,aAAa;kBALf,aAAa;CASxB,CAAA"} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.js b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.js new file mode 100644 index 00000000000000..ad2093867eb74e --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.js @@ -0,0 +1,14 @@ +import { mkdir, mkdirSync } from 'fs'; +import { optsArg } from './opts-arg.js'; +const version = process.env.__TESTING_MKDIRP_NODE_VERSION__ || process.version; +const versArr = version.replace(/^v/, '').split('.'); +const hasNative = +versArr[0] > 10 || (+versArr[0] === 10 && +versArr[1] >= 12); +export const useNativeSync = !hasNative + ? () => false + : (opts) => optsArg(opts).mkdirSync === mkdirSync; +export const useNative = Object.assign(!hasNative + ? () => false + : (opts) => optsArg(opts).mkdir === mkdir, { + sync: useNativeSync, +}); +//# sourceMappingURL=use-native.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.js.map b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.js.map new file mode 100644 index 00000000000000..08c616d365510f --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/dist/mjs/use-native.js.map @@ -0,0 +1 @@ +{"version":3,"file":"use-native.js","sourceRoot":"","sources":["../../src/use-native.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAA;AACrC,OAAO,EAAiB,OAAO,EAAE,MAAM,eAAe,CAAA;AAEtD,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,OAAO,CAAC,OAAO,CAAA;AAC9E,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;AACpD,MAAM,SAAS,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;AAE/E,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,SAAS;IACrC,CAAC,CAAC,GAAG,EAAE,CAAC,KAAK;IACb,CAAC,CAAC,CAAC,IAAoB,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,SAAS,KAAK,SAAS,CAAA;AAEnE,MAAM,CAAC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CACpC,CAAC,SAAS;IACR,CAAC,CAAC,GAAG,EAAE,CAAC,KAAK;IACb,CAAC,CAAC,CAAC,IAAoB,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,KAAK,KAAK,EAC3D;IACE,IAAI,EAAE,aAAa;CACpB,CACF,CAAA","sourcesContent":["import { mkdir, mkdirSync } from 'fs'\nimport { MkdirpOptions, optsArg } from './opts-arg.js'\n\nconst version = process.env.__TESTING_MKDIRP_NODE_VERSION__ || process.version\nconst versArr = version.replace(/^v/, '').split('.')\nconst hasNative = +versArr[0] > 10 || (+versArr[0] === 10 && +versArr[1] >= 12)\n\nexport const useNativeSync = !hasNative\n ? () => false\n : (opts?: MkdirpOptions) => optsArg(opts).mkdirSync === mkdirSync\n\nexport const useNative = Object.assign(\n !hasNative\n ? () => false\n : (opts?: MkdirpOptions) => optsArg(opts).mkdir === mkdir,\n {\n sync: useNativeSync,\n }\n)\n"]} \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/package.json b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/package.json new file mode 100644 index 00000000000000..f31ac3314d6f6a --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/package.json @@ -0,0 +1,91 @@ +{ + "name": "mkdirp", + "description": "Recursively mkdir, like `mkdir -p`", + "version": "3.0.1", + "keywords": [ + "mkdir", + "directory", + "make dir", + "make", + "dir", + "recursive", + "native" + ], + "bin": "./dist/cjs/src/bin.js", + "main": "./dist/cjs/src/index.js", + "module": "./dist/mjs/index.js", + "types": "./dist/mjs/index.d.ts", + "exports": { + ".": { + "import": { + "types": "./dist/mjs/index.d.ts", + "default": "./dist/mjs/index.js" + }, + "require": { + "types": "./dist/cjs/src/index.d.ts", + "default": "./dist/cjs/src/index.js" + } + } + }, + "files": [ + "dist" + ], + "scripts": { + "preversion": "npm test", + "postversion": "npm publish", + "prepublishOnly": "git push origin --follow-tags", + "preprepare": "rm -rf dist", + "prepare": "tsc -p tsconfig.json && tsc -p tsconfig-esm.json", + "postprepare": "bash fixup.sh", + "pretest": "npm run prepare", + "presnap": "npm run prepare", + "test": "c8 tap", + "snap": "c8 tap", + "format": "prettier --write . --loglevel warn", + "benchmark": "node benchmark/index.js", + "typedoc": "typedoc --tsconfig tsconfig-esm.json ./src/*.ts" + }, + "prettier": { + "semi": false, + "printWidth": 80, + "tabWidth": 2, + "useTabs": false, + "singleQuote": true, + "jsxSingleQuote": false, + "bracketSameLine": true, + "arrowParens": "avoid", + "endOfLine": "lf" + }, + "devDependencies": { + "@types/brace-expansion": "^1.1.0", + "@types/node": "^18.11.9", + "@types/tap": "^15.0.7", + "c8": "^7.12.0", + "eslint-config-prettier": "^8.6.0", + "prettier": "^2.8.2", + "tap": "^16.3.3", + "ts-node": "^10.9.1", + "typedoc": "^0.23.21", + "typescript": "^4.9.3" + }, + "tap": { + "coverage": false, + "node-arg": [ + "--no-warnings", + "--loader", + "ts-node/esm" + ], + "ts": false + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "repository": { + "type": "git", + "url": "https://github.com/isaacs/node-mkdirp.git" + }, + "license": "MIT", + "engines": { + "node": ">=10" + } +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/mkdirp/readme.markdown b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/readme.markdown new file mode 100644 index 00000000000000..df654b808755f5 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/mkdirp/readme.markdown @@ -0,0 +1,281 @@ +# mkdirp + +Like `mkdir -p`, but in Node.js! + +Now with a modern API and no\* bugs! + +\* may contain some bugs + +# example + +## pow.js + +```js +// hybrid module, import or require() both work +import { mkdirp } from 'mkdirp' +// or: +const { mkdirp } = require('mkdirp') + +// return value is a Promise resolving to the first directory created +mkdirp('/tmp/foo/bar/baz').then(made => + console.log(`made directories, starting with ${made}`) +) +``` + +Output (where `/tmp/foo` already exists) + +``` +made directories, starting with /tmp/foo/bar +``` + +Or, if you don't have time to wait around for promises: + +```js +import { mkdirp } from 'mkdirp' + +// return value is the first directory created +const made = mkdirp.sync('/tmp/foo/bar/baz') +console.log(`made directories, starting with ${made}`) +``` + +And now /tmp/foo/bar/baz exists, huzzah! + +# methods + +```js +import { mkdirp } from 'mkdirp' +``` + +## `mkdirp(dir: string, opts?: MkdirpOptions) => Promise` + +Create a new directory and any necessary subdirectories at `dir` +with octal permission string `opts.mode`. If `opts` is a string +or number, it will be treated as the `opts.mode`. + +If `opts.mode` isn't specified, it defaults to `0o777`. + +Promise resolves to first directory `made` that had to be +created, or `undefined` if everything already exists. Promise +rejects if any errors are encountered. Note that, in the case of +promise rejection, some directories _may_ have been created, as +recursive directory creation is not an atomic operation. + +You can optionally pass in an alternate `fs` implementation by +passing in `opts.fs`. Your implementation should have +`opts.fs.mkdir(path, opts, cb)` and `opts.fs.stat(path, cb)`. + +You can also override just one or the other of `mkdir` and `stat` +by passing in `opts.stat` or `opts.mkdir`, or providing an `fs` +option that only overrides one of these. + +## `mkdirp.sync(dir: string, opts: MkdirpOptions) => string|undefined` + +Synchronously create a new directory and any necessary +subdirectories at `dir` with octal permission string `opts.mode`. +If `opts` is a string or number, it will be treated as the +`opts.mode`. + +If `opts.mode` isn't specified, it defaults to `0o777`. + +Returns the first directory that had to be created, or undefined +if everything already exists. + +You can optionally pass in an alternate `fs` implementation by +passing in `opts.fs`. Your implementation should have +`opts.fs.mkdirSync(path, mode)` and `opts.fs.statSync(path)`. + +You can also override just one or the other of `mkdirSync` and +`statSync` by passing in `opts.statSync` or `opts.mkdirSync`, or +providing an `fs` option that only overrides one of these. + +## `mkdirp.manual`, `mkdirp.manualSync` + +Use the manual implementation (not the native one). This is the +default when the native implementation is not available or the +stat/mkdir implementation is overridden. + +## `mkdirp.native`, `mkdirp.nativeSync` + +Use the native implementation (not the manual one). This is the +default when the native implementation is available and +stat/mkdir are not overridden. + +# implementation + +On Node.js v10.12.0 and above, use the native `fs.mkdir(p, +{recursive:true})` option, unless `fs.mkdir`/`fs.mkdirSync` has +been overridden by an option. + +## native implementation + +- If the path is a root directory, then pass it to the underlying + implementation and return the result/error. (In this case, + it'll either succeed or fail, but we aren't actually creating + any dirs.) +- Walk up the path statting each directory, to find the first + path that will be created, `made`. +- Call `fs.mkdir(path, { recursive: true })` (or `fs.mkdirSync`) +- If error, raise it to the caller. +- Return `made`. + +## manual implementation + +- Call underlying `fs.mkdir` implementation, with `recursive: +false` +- If error: + - If path is a root directory, raise to the caller and do not + handle it + - If ENOENT, mkdirp parent dir, store result as `made` + - stat(path) + - If error, raise original `mkdir` error + - If directory, return `made` + - Else, raise original `mkdir` error +- else + - return `undefined` if a root dir, or `made` if set, or `path` + +## windows vs unix caveat + +On Windows file systems, attempts to create a root directory (ie, +a drive letter or root UNC path) will fail. If the root +directory exists, then it will fail with `EPERM`. If the root +directory does not exist, then it will fail with `ENOENT`. + +On posix file systems, attempts to create a root directory (in +recursive mode) will succeed silently, as it is treated like just +another directory that already exists. (In non-recursive mode, +of course, it fails with `EEXIST`.) + +In order to preserve this system-specific behavior (and because +it's not as if we can create the parent of a root directory +anyway), attempts to create a root directory are passed directly +to the `fs` implementation, and any errors encountered are not +handled. + +## native error caveat + +The native implementation (as of at least Node.js v13.4.0) does +not provide appropriate errors in some cases (see +[nodejs/node#31481](https://github.com/nodejs/node/issues/31481) +and +[nodejs/node#28015](https://github.com/nodejs/node/issues/28015)). + +In order to work around this issue, the native implementation +will fall back to the manual implementation if an `ENOENT` error +is encountered. + +# choosing a recursive mkdir implementation + +There are a few to choose from! Use the one that suits your +needs best :D + +## use `fs.mkdir(path, {recursive: true}, cb)` if: + +- You wish to optimize performance even at the expense of other + factors. +- You don't need to know the first dir created. +- You are ok with getting `ENOENT` as the error when some other + problem is the actual cause. +- You can limit your platforms to Node.js v10.12 and above. +- You're ok with using callbacks instead of promises. +- You don't need/want a CLI. +- You don't need to override the `fs` methods in use. + +## use this module (mkdirp 1.x or 2.x) if: + +- You need to know the first directory that was created. +- You wish to use the native implementation if available, but + fall back when it's not. +- You prefer promise-returning APIs to callback-taking APIs. +- You want more useful error messages than the native recursive + mkdir provides (at least as of Node.js v13.4), and are ok with + re-trying on `ENOENT` to achieve this. +- You need (or at least, are ok with) a CLI. +- You need to override the `fs` methods in use. + +## use [`make-dir`](http://npm.im/make-dir) if: + +- You do not need to know the first dir created (and wish to save + a few `stat` calls when using the native implementation for + this reason). +- You wish to use the native implementation if available, but + fall back when it's not. +- You prefer promise-returning APIs to callback-taking APIs. +- You are ok with occasionally getting `ENOENT` errors for + failures that are actually related to something other than a + missing file system entry. +- You don't need/want a CLI. +- You need to override the `fs` methods in use. + +## use mkdirp 0.x if: + +- You need to know the first directory that was created. +- You need (or at least, are ok with) a CLI. +- You need to override the `fs` methods in use. +- You're ok with using callbacks instead of promises. +- You are not running on Windows, where the root-level ENOENT + errors can lead to infinite regress. +- You think vinyl just sounds warmer and richer for some weird + reason. +- You are supporting truly ancient Node.js versions, before even + the advent of a `Promise` language primitive. (Please don't. + You deserve better.) + +# cli + +This package also ships with a `mkdirp` command. + +``` +$ mkdirp -h + +usage: mkdirp [DIR1,DIR2..] {OPTIONS} + + Create each supplied directory including any necessary parent directories + that don't yet exist. + + If the directory already exists, do nothing. + +OPTIONS are: + + -m If a directory needs to be created, set the mode as an octal + --mode= permission string. + + -v --version Print the mkdirp version number + + -h --help Print this helpful banner + + -p --print Print the first directories created for each path provided + + --manual Use manual implementation, even if native is available +``` + +# install + +With [npm](http://npmjs.org) do: + +``` +npm install mkdirp +``` + +to get the library locally, or + +``` +npm install -g mkdirp +``` + +to get the command everywhere, or + +``` +npx mkdirp ... +``` + +to run the command without installing it globally. + +# platform support + +This module works on node v8, but only v10 and above are officially +supported, as Node v8 reached its LTS end of life 2020-01-01, which is in +the past, as of this writing. + +# license + +MIT diff --git a/deps/npm/node_modules/node-gyp/node_modules/nopt/README.md b/deps/npm/node_modules/node-gyp/node_modules/nopt/README.md deleted file mode 100644 index a99531c04655fe..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/nopt/README.md +++ /dev/null @@ -1,213 +0,0 @@ -If you want to write an option parser, and have it be good, there are -two ways to do it. The Right Way, and the Wrong Way. - -The Wrong Way is to sit down and write an option parser. We've all done -that. - -The Right Way is to write some complex configurable program with so many -options that you hit the limit of your frustration just trying to -manage them all, and defer it with duct-tape solutions until you see -exactly to the core of the problem, and finally snap and write an -awesome option parser. - -If you want to write an option parser, don't write an option parser. -Write a package manager, or a source control system, or a service -restarter, or an operating system. You probably won't end up with a -good one of those, but if you don't give up, and you are relentless and -diligent enough in your procrastination, you may just end up with a very -nice option parser. - -## USAGE - -```javascript -// my-program.js -var nopt = require("nopt") - , Stream = require("stream").Stream - , path = require("path") - , knownOpts = { "foo" : [String, null] - , "bar" : [Stream, Number] - , "baz" : path - , "bloo" : [ "big", "medium", "small" ] - , "flag" : Boolean - , "pick" : Boolean - , "many1" : [String, Array] - , "many2" : [path, Array] - } - , shortHands = { "foofoo" : ["--foo", "Mr. Foo"] - , "b7" : ["--bar", "7"] - , "m" : ["--bloo", "medium"] - , "p" : ["--pick"] - , "f" : ["--flag"] - } - // everything is optional. - // knownOpts and shorthands default to {} - // arg list defaults to process.argv - // slice defaults to 2 - , parsed = nopt(knownOpts, shortHands, process.argv, 2) -console.log(parsed) -``` - -This would give you support for any of the following: - -```console -$ node my-program.js --foo "blerp" --no-flag -{ "foo" : "blerp", "flag" : false } - -$ node my-program.js ---bar 7 --foo "Mr. Hand" --flag -{ bar: 7, foo: "Mr. Hand", flag: true } - -$ node my-program.js --foo "blerp" -f -----p -{ foo: "blerp", flag: true, pick: true } - -$ node my-program.js -fp --foofoo -{ foo: "Mr. Foo", flag: true, pick: true } - -$ node my-program.js --foofoo -- -fp # -- stops the flag parsing. -{ foo: "Mr. Foo", argv: { remain: ["-fp"] } } - -$ node my-program.js --blatzk -fp # unknown opts are ok. -{ blatzk: true, flag: true, pick: true } - -$ node my-program.js --blatzk=1000 -fp # but you need to use = if they have a value -{ blatzk: 1000, flag: true, pick: true } - -$ node my-program.js --no-blatzk -fp # unless they start with "no-" -{ blatzk: false, flag: true, pick: true } - -$ node my-program.js --baz b/a/z # known paths are resolved. -{ baz: "/Users/isaacs/b/a/z" } - -# if Array is one of the types, then it can take many -# values, and will always be an array. The other types provided -# specify what types are allowed in the list. - -$ node my-program.js --many1 5 --many1 null --many1 foo -{ many1: ["5", "null", "foo"] } - -$ node my-program.js --many2 foo --many2 bar -{ many2: ["/path/to/foo", "path/to/bar"] } -``` - -Read the tests at the bottom of `lib/nopt.js` for more examples of -what this puppy can do. - -## Types - -The following types are supported, and defined on `nopt.typeDefs` - -* String: A normal string. No parsing is done. -* path: A file system path. Gets resolved against cwd if not absolute. -* url: A url. If it doesn't parse, it isn't accepted. -* Number: Must be numeric. -* Date: Must parse as a date. If it does, and `Date` is one of the options, - then it will return a Date object, not a string. -* Boolean: Must be either `true` or `false`. If an option is a boolean, - then it does not need a value, and its presence will imply `true` as - the value. To negate boolean flags, do `--no-whatever` or `--whatever - false` -* NaN: Means that the option is strictly not allowed. Any value will - fail. -* Stream: An object matching the "Stream" class in node. Valuable - for use when validating programmatically. (npm uses this to let you - supply any WriteStream on the `outfd` and `logfd` config options.) -* Array: If `Array` is specified as one of the types, then the value - will be parsed as a list of options. This means that multiple values - can be specified, and that the value will always be an array. - -If a type is an array of values not on this list, then those are -considered valid values. For instance, in the example above, the -`--bloo` option can only be one of `"big"`, `"medium"`, or `"small"`, -and any other value will be rejected. - -When parsing unknown fields, `"true"`, `"false"`, and `"null"` will be -interpreted as their JavaScript equivalents. - -You can also mix types and values, or multiple types, in a list. For -instance `{ blah: [Number, null] }` would allow a value to be set to -either a Number or null. When types are ordered, this implies a -preference, and the first type that can be used to properly interpret -the value will be used. - -To define a new type, add it to `nopt.typeDefs`. Each item in that -hash is an object with a `type` member and a `validate` method. The -`type` member is an object that matches what goes in the type list. The -`validate` method is a function that gets called with `validate(data, -key, val)`. Validate methods should assign `data[key]` to the valid -value of `val` if it can be handled properly, or return boolean -`false` if it cannot. - -You can also call `nopt.clean(data, types, typeDefs)` to clean up a -config object and remove its invalid properties. - -## Error Handling - -By default, nopt outputs a warning to standard error when invalid values for -known options are found. You can change this behavior by assigning a method -to `nopt.invalidHandler`. This method will be called with -the offending `nopt.invalidHandler(key, val, types)`. - -If no `nopt.invalidHandler` is assigned, then it will console.error -its whining. If it is assigned to boolean `false` then the warning is -suppressed. - -## Abbreviations - -Yes, they are supported. If you define options like this: - -```javascript -{ "foolhardyelephants" : Boolean -, "pileofmonkeys" : Boolean } -``` - -Then this will work: - -```bash -node program.js --foolhar --pil -node program.js --no-f --pileofmon -# etc. -``` - -## Shorthands - -Shorthands are a hash of shorter option names to a snippet of args that -they expand to. - -If multiple one-character shorthands are all combined, and the -combination does not unambiguously match any other option or shorthand, -then they will be broken up into their constituent parts. For example: - -```json -{ "s" : ["--loglevel", "silent"] -, "g" : "--global" -, "f" : "--force" -, "p" : "--parseable" -, "l" : "--long" -} -``` - -```bash -npm ls -sgflp -# just like doing this: -npm ls --loglevel silent --global --force --long --parseable -``` - -## The Rest of the args - -The config object returned by nopt is given a special member called -`argv`, which is an object with the following fields: - -* `remain`: The remaining args after all the parsing has occurred. -* `original`: The args as they originally appeared. -* `cooked`: The args after flags and shorthands are expanded. - -## Slicing - -Node programs are called with more or less the exact argv as it appears -in C land, after the v8 and node-specific options have been plucked off. -As such, `argv[0]` is always `node` and `argv[1]` is always the -JavaScript program being run. - -That's usually not very useful to you. So they're sliced off by -default. If you want them, then you can pass in `0` as the last -argument, or any other number that you'd like to slice off the start of -the list. diff --git a/deps/npm/node_modules/node-gyp/node_modules/nopt/bin/nopt.js b/deps/npm/node_modules/node-gyp/node_modules/nopt/bin/nopt.js deleted file mode 100755 index 6ed2082064b5ea..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/nopt/bin/nopt.js +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env node -const nopt = require('../lib/nopt') -const path = require('path') -console.log('parsed', nopt({ - num: Number, - bool: Boolean, - help: Boolean, - list: Array, - 'num-list': [Number, Array], - 'str-list': [String, Array], - 'bool-list': [Boolean, Array], - str: String, - clear: Boolean, - config: Boolean, - length: Number, - file: path, -}, { - s: ['--str', 'astring'], - b: ['--bool'], - nb: ['--no-bool'], - tft: ['--bool-list', '--no-bool-list', '--bool-list', 'true'], - '?': ['--help'], - h: ['--help'], - H: ['--help'], - n: ['--num', '125'], - c: ['--config'], - l: ['--length'], - f: ['--file'], -}, process.argv, 2)) diff --git a/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/debug.js b/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/debug.js deleted file mode 100644 index 544ab382ca85c0..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/debug.js +++ /dev/null @@ -1,5 +0,0 @@ -/* istanbul ignore next */ -module.exports = process.env.DEBUG_NOPT || process.env.NOPT_DEBUG - // eslint-disable-next-line no-console - ? (...a) => console.error(...a) - : () => {} diff --git a/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/nopt-lib.js b/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/nopt-lib.js deleted file mode 100644 index d3d1de0255ba9b..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/nopt-lib.js +++ /dev/null @@ -1,479 +0,0 @@ -const abbrev = require('abbrev') -const debug = require('./debug') -const defaultTypeDefs = require('./type-defs') - -const hasOwn = (o, k) => Object.prototype.hasOwnProperty.call(o, k) - -const getType = (k, { types, dynamicTypes }) => { - let hasType = hasOwn(types, k) - let type = types[k] - if (!hasType && typeof dynamicTypes === 'function') { - const matchedType = dynamicTypes(k) - if (matchedType !== undefined) { - type = matchedType - hasType = true - } - } - return [hasType, type] -} - -const isTypeDef = (type, def) => def && type === def -const hasTypeDef = (type, def) => def && type.indexOf(def) !== -1 -const doesNotHaveTypeDef = (type, def) => def && !hasTypeDef(type, def) - -function nopt (args, { - types, - shorthands, - typeDefs, - invalidHandler, - typeDefault, - dynamicTypes, -} = {}) { - debug(types, shorthands, args, typeDefs) - - const data = {} - const argv = { - remain: [], - cooked: args, - original: args.slice(0), - } - - parse(args, data, argv.remain, { typeDefs, types, dynamicTypes, shorthands }) - - // now data is full - clean(data, { types, dynamicTypes, typeDefs, invalidHandler, typeDefault }) - data.argv = argv - - Object.defineProperty(data.argv, 'toString', { - value: function () { - return this.original.map(JSON.stringify).join(' ') - }, - enumerable: false, - }) - - return data -} - -function clean (data, { - types = {}, - typeDefs = {}, - dynamicTypes, - invalidHandler, - typeDefault, -} = {}) { - const StringType = typeDefs.String?.type - const NumberType = typeDefs.Number?.type - const ArrayType = typeDefs.Array?.type - const BooleanType = typeDefs.Boolean?.type - const DateType = typeDefs.Date?.type - - const hasTypeDefault = typeof typeDefault !== 'undefined' - if (!hasTypeDefault) { - typeDefault = [false, true, null] - if (StringType) { - typeDefault.push(StringType) - } - if (ArrayType) { - typeDefault.push(ArrayType) - } - } - - const remove = {} - - Object.keys(data).forEach((k) => { - if (k === 'argv') { - return - } - let val = data[k] - debug('val=%j', val) - const isArray = Array.isArray(val) - let [hasType, rawType] = getType(k, { types, dynamicTypes }) - let type = rawType - if (!isArray) { - val = [val] - } - if (!type) { - type = typeDefault - } - if (isTypeDef(type, ArrayType)) { - type = typeDefault.concat(ArrayType) - } - if (!Array.isArray(type)) { - type = [type] - } - - debug('val=%j', val) - debug('types=', type) - val = val.map((v) => { - // if it's an unknown value, then parse false/true/null/numbers/dates - if (typeof v === 'string') { - debug('string %j', v) - v = v.trim() - if ((v === 'null' && ~type.indexOf(null)) - || (v === 'true' && - (~type.indexOf(true) || hasTypeDef(type, BooleanType))) - || (v === 'false' && - (~type.indexOf(false) || hasTypeDef(type, BooleanType)))) { - v = JSON.parse(v) - debug('jsonable %j', v) - } else if (hasTypeDef(type, NumberType) && !isNaN(v)) { - debug('convert to number', v) - v = +v - } else if (hasTypeDef(type, DateType) && !isNaN(Date.parse(v))) { - debug('convert to date', v) - v = new Date(v) - } - } - - if (!hasType) { - if (!hasTypeDefault) { - return v - } - // if the default type has been passed in then we want to validate the - // unknown data key instead of bailing out earlier. we also set the raw - // type which is passed to the invalid handler so that it can be - // determined if during validation if it is unknown vs invalid - rawType = typeDefault - } - - // allow `--no-blah` to set 'blah' to null if null is allowed - if (v === false && ~type.indexOf(null) && - !(~type.indexOf(false) || hasTypeDef(type, BooleanType))) { - v = null - } - - const d = {} - d[k] = v - debug('prevalidated val', d, v, rawType) - if (!validate(d, k, v, rawType, { typeDefs })) { - if (invalidHandler) { - invalidHandler(k, v, rawType, data) - } else if (invalidHandler !== false) { - debug('invalid: ' + k + '=' + v, rawType) - } - return remove - } - debug('validated v', d, v, rawType) - return d[k] - }).filter((v) => v !== remove) - - // if we allow Array specifically, then an empty array is how we - // express 'no value here', not null. Allow it. - if (!val.length && doesNotHaveTypeDef(type, ArrayType)) { - debug('VAL HAS NO LENGTH, DELETE IT', val, k, type.indexOf(ArrayType)) - delete data[k] - } else if (isArray) { - debug(isArray, data[k], val) - data[k] = val - } else { - data[k] = val[0] - } - - debug('k=%s val=%j', k, val, data[k]) - }) -} - -function validate (data, k, val, type, { typeDefs } = {}) { - const ArrayType = typeDefs?.Array?.type - // arrays are lists of types. - if (Array.isArray(type)) { - for (let i = 0, l = type.length; i < l; i++) { - if (isTypeDef(type[i], ArrayType)) { - continue - } - if (validate(data, k, val, type[i], { typeDefs })) { - return true - } - } - delete data[k] - return false - } - - // an array of anything? - if (isTypeDef(type, ArrayType)) { - return true - } - - // Original comment: - // NaN is poisonous. Means that something is not allowed. - // New comment: Changing this to an isNaN check breaks a lot of tests. - // Something is being assumed here that is not actually what happens in - // practice. Fixing it is outside the scope of getting linting to pass in - // this repo. Leaving as-is for now. - /* eslint-disable-next-line no-self-compare */ - if (type !== type) { - debug('Poison NaN', k, val, type) - delete data[k] - return false - } - - // explicit list of values - if (val === type) { - debug('Explicitly allowed %j', val) - data[k] = val - return true - } - - // now go through the list of typeDefs, validate against each one. - let ok = false - const types = Object.keys(typeDefs) - for (let i = 0, l = types.length; i < l; i++) { - debug('test type %j %j %j', k, val, types[i]) - const t = typeDefs[types[i]] - if (t && ( - (type && type.name && t.type && t.type.name) ? - (type.name === t.type.name) : - (type === t.type) - )) { - const d = {} - ok = t.validate(d, k, val) !== false - val = d[k] - if (ok) { - data[k] = val - break - } - } - } - debug('OK? %j (%j %j %j)', ok, k, val, types[types.length - 1]) - - if (!ok) { - delete data[k] - } - return ok -} - -function parse (args, data, remain, { - types = {}, - typeDefs = {}, - shorthands = {}, - dynamicTypes, -} = {}) { - const StringType = typeDefs.String?.type - const NumberType = typeDefs.Number?.type - const ArrayType = typeDefs.Array?.type - const BooleanType = typeDefs.Boolean?.type - - debug('parse', args, data, remain) - - const abbrevs = abbrev(Object.keys(types)) - debug('abbrevs=%j', abbrevs) - const shortAbbr = abbrev(Object.keys(shorthands)) - - for (let i = 0; i < args.length; i++) { - let arg = args[i] - debug('arg', arg) - - if (arg.match(/^-{2,}$/)) { - // done with keys. - // the rest are args. - remain.push.apply(remain, args.slice(i + 1)) - args[i] = '--' - break - } - let hadEq = false - if (arg.charAt(0) === '-' && arg.length > 1) { - const at = arg.indexOf('=') - if (at > -1) { - hadEq = true - const v = arg.slice(at + 1) - arg = arg.slice(0, at) - args.splice(i, 1, arg, v) - } - - // see if it's a shorthand - // if so, splice and back up to re-parse it. - const shRes = resolveShort(arg, shortAbbr, abbrevs, { shorthands }) - debug('arg=%j shRes=%j', arg, shRes) - if (shRes) { - args.splice.apply(args, [i, 1].concat(shRes)) - if (arg !== shRes[0]) { - i-- - continue - } - } - arg = arg.replace(/^-+/, '') - let no = null - while (arg.toLowerCase().indexOf('no-') === 0) { - no = !no - arg = arg.slice(3) - } - - if (abbrevs[arg]) { - arg = abbrevs[arg] - } - - let [hasType, argType] = getType(arg, { types, dynamicTypes }) - let isTypeArray = Array.isArray(argType) - if (isTypeArray && argType.length === 1) { - isTypeArray = false - argType = argType[0] - } - - let isArray = isTypeDef(argType, ArrayType) || - isTypeArray && hasTypeDef(argType, ArrayType) - - // allow unknown things to be arrays if specified multiple times. - if (!hasType && hasOwn(data, arg)) { - if (!Array.isArray(data[arg])) { - data[arg] = [data[arg]] - } - isArray = true - } - - let val - let la = args[i + 1] - - const isBool = typeof no === 'boolean' || - isTypeDef(argType, BooleanType) || - isTypeArray && hasTypeDef(argType, BooleanType) || - (typeof argType === 'undefined' && !hadEq) || - (la === 'false' && - (argType === null || - isTypeArray && ~argType.indexOf(null))) - - if (isBool) { - // just set and move along - val = !no - // however, also support --bool true or --bool false - if (la === 'true' || la === 'false') { - val = JSON.parse(la) - la = null - if (no) { - val = !val - } - i++ - } - - // also support "foo":[Boolean, "bar"] and "--foo bar" - if (isTypeArray && la) { - if (~argType.indexOf(la)) { - // an explicit type - val = la - i++ - } else if (la === 'null' && ~argType.indexOf(null)) { - // null allowed - val = null - i++ - } else if (!la.match(/^-{2,}[^-]/) && - !isNaN(la) && - hasTypeDef(argType, NumberType)) { - // number - val = +la - i++ - } else if (!la.match(/^-[^-]/) && hasTypeDef(argType, StringType)) { - // string - val = la - i++ - } - } - - if (isArray) { - (data[arg] = data[arg] || []).push(val) - } else { - data[arg] = val - } - - continue - } - - if (isTypeDef(argType, StringType)) { - if (la === undefined) { - la = '' - } else if (la.match(/^-{1,2}[^-]+/)) { - la = '' - i-- - } - } - - if (la && la.match(/^-{2,}$/)) { - la = undefined - i-- - } - - val = la === undefined ? true : la - if (isArray) { - (data[arg] = data[arg] || []).push(val) - } else { - data[arg] = val - } - - i++ - continue - } - remain.push(arg) - } -} - -const SINGLES = Symbol('singles') -const singleCharacters = (arg, shorthands) => { - let singles = shorthands[SINGLES] - if (!singles) { - singles = Object.keys(shorthands).filter((s) => s.length === 1).reduce((l, r) => { - l[r] = true - return l - }, {}) - shorthands[SINGLES] = singles - debug('shorthand singles', singles) - } - const chrs = arg.split('').filter((c) => singles[c]) - return chrs.join('') === arg ? chrs : null -} - -function resolveShort (arg, ...rest) { - const { types = {}, shorthands = {} } = rest.length ? rest.pop() : {} - const shortAbbr = rest[0] ?? abbrev(Object.keys(shorthands)) - const abbrevs = rest[1] ?? abbrev(Object.keys(types)) - - // handle single-char shorthands glommed together, like - // npm ls -glp, but only if there is one dash, and only if - // all of the chars are single-char shorthands, and it's - // not a match to some other abbrev. - arg = arg.replace(/^-+/, '') - - // if it's an exact known option, then don't go any further - if (abbrevs[arg] === arg) { - return null - } - - // if it's an exact known shortopt, same deal - if (shorthands[arg]) { - // make it an array, if it's a list of words - if (shorthands[arg] && !Array.isArray(shorthands[arg])) { - shorthands[arg] = shorthands[arg].split(/\s+/) - } - - return shorthands[arg] - } - - // first check to see if this arg is a set of single-char shorthands - const chrs = singleCharacters(arg, shorthands) - if (chrs) { - return chrs.map((c) => shorthands[c]).reduce((l, r) => l.concat(r), []) - } - - // if it's an arg abbrev, and not a literal shorthand, then prefer the arg - if (abbrevs[arg] && !shorthands[arg]) { - return null - } - - // if it's an abbr for a shorthand, then use that - if (shortAbbr[arg]) { - arg = shortAbbr[arg] - } - - // make it an array, if it's a list of words - if (shorthands[arg] && !Array.isArray(shorthands[arg])) { - shorthands[arg] = shorthands[arg].split(/\s+/) - } - - return shorthands[arg] -} - -module.exports = { - nopt, - clean, - parse, - validate, - resolveShort, - typeDefs: defaultTypeDefs, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/nopt.js b/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/nopt.js deleted file mode 100644 index 37f01a08783f87..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/nopt.js +++ /dev/null @@ -1,30 +0,0 @@ -const lib = require('./nopt-lib') -const defaultTypeDefs = require('./type-defs') - -// This is the version of nopt's API that requires setting typeDefs and invalidHandler -// on the required `nopt` object since it is a singleton. To not do a breaking change -// an API that requires all options be passed in is located in `nopt-lib.js` and -// exported here as lib. -// TODO(breaking): make API only work in non-singleton mode - -module.exports = exports = nopt -exports.clean = clean -exports.typeDefs = defaultTypeDefs -exports.lib = lib - -function nopt (types, shorthands, args = process.argv, slice = 2) { - return lib.nopt(args.slice(slice), { - types: types || {}, - shorthands: shorthands || {}, - typeDefs: exports.typeDefs, - invalidHandler: exports.invalidHandler, - }) -} - -function clean (data, types, typeDefs = exports.typeDefs) { - return lib.clean(data, { - types: types || {}, - typeDefs, - invalidHandler: exports.invalidHandler, - }) -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/type-defs.js b/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/type-defs.js deleted file mode 100644 index 608352ee248cc4..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/nopt/lib/type-defs.js +++ /dev/null @@ -1,91 +0,0 @@ -const url = require('url') -const path = require('path') -const Stream = require('stream').Stream -const os = require('os') -const debug = require('./debug') - -function validateString (data, k, val) { - data[k] = String(val) -} - -function validatePath (data, k, val) { - if (val === true) { - return false - } - if (val === null) { - return true - } - - val = String(val) - - const isWin = process.platform === 'win32' - const homePattern = isWin ? /^~(\/|\\)/ : /^~\// - const home = os.homedir() - - if (home && val.match(homePattern)) { - data[k] = path.resolve(home, val.slice(2)) - } else { - data[k] = path.resolve(val) - } - return true -} - -function validateNumber (data, k, val) { - debug('validate Number %j %j %j', k, val, isNaN(val)) - if (isNaN(val)) { - return false - } - data[k] = +val -} - -function validateDate (data, k, val) { - const s = Date.parse(val) - debug('validate Date %j %j %j', k, val, s) - if (isNaN(s)) { - return false - } - data[k] = new Date(val) -} - -function validateBoolean (data, k, val) { - if (typeof val === 'string') { - if (!isNaN(val)) { - val = !!(+val) - } else if (val === 'null' || val === 'false') { - val = false - } else { - val = true - } - } else { - val = !!val - } - data[k] = val -} - -function validateUrl (data, k, val) { - // Changing this would be a breaking change in the npm cli - /* eslint-disable-next-line node/no-deprecated-api */ - val = url.parse(String(val)) - if (!val.host) { - return false - } - data[k] = val.href -} - -function validateStream (data, k, val) { - if (!(val instanceof Stream)) { - return false - } - data[k] = val -} - -module.exports = { - String: { type: String, validate: validateString }, - Boolean: { type: Boolean, validate: validateBoolean }, - url: { type: url, validate: validateUrl }, - Number: { type: Number, validate: validateNumber }, - path: { type: path, validate: validatePath }, - Stream: { type: Stream, validate: validateStream }, - Date: { type: Date, validate: validateDate }, - Array: { type: Array }, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/nopt/package.json b/deps/npm/node_modules/node-gyp/node_modules/nopt/package.json deleted file mode 100644 index 37b770ad487711..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/nopt/package.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "name": "nopt", - "version": "7.2.1", - "description": "Option parsing for Node, supporting types, shorthands, etc. Used by npm.", - "author": "GitHub Inc.", - "main": "lib/nopt.js", - "scripts": { - "test": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/nopt.git" - }, - "bin": { - "nopt": "bin/nopt.js" - }, - "license": "ISC", - "dependencies": { - "abbrev": "^2.0.0" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.3.0" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "windowsCI": false, - "version": "4.22.0", - "publish": true - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/proc-log/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/proc-log/LICENSE deleted file mode 100644 index 83837797202b70..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/proc-log/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -The ISC License - -Copyright (c) GitHub, Inc. - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR -IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/proc-log/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/proc-log/lib/index.js deleted file mode 100644 index 86d90861078dab..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/proc-log/lib/index.js +++ /dev/null @@ -1,153 +0,0 @@ -const META = Symbol('proc-log.meta') -module.exports = { - META: META, - output: { - LEVELS: [ - 'standard', - 'error', - 'buffer', - 'flush', - ], - KEYS: { - standard: 'standard', - error: 'error', - buffer: 'buffer', - flush: 'flush', - }, - standard: function (...args) { - return process.emit('output', 'standard', ...args) - }, - error: function (...args) { - return process.emit('output', 'error', ...args) - }, - buffer: function (...args) { - return process.emit('output', 'buffer', ...args) - }, - flush: function (...args) { - return process.emit('output', 'flush', ...args) - }, - }, - log: { - LEVELS: [ - 'notice', - 'error', - 'warn', - 'info', - 'verbose', - 'http', - 'silly', - 'timing', - 'pause', - 'resume', - ], - KEYS: { - notice: 'notice', - error: 'error', - warn: 'warn', - info: 'info', - verbose: 'verbose', - http: 'http', - silly: 'silly', - timing: 'timing', - pause: 'pause', - resume: 'resume', - }, - error: function (...args) { - return process.emit('log', 'error', ...args) - }, - notice: function (...args) { - return process.emit('log', 'notice', ...args) - }, - warn: function (...args) { - return process.emit('log', 'warn', ...args) - }, - info: function (...args) { - return process.emit('log', 'info', ...args) - }, - verbose: function (...args) { - return process.emit('log', 'verbose', ...args) - }, - http: function (...args) { - return process.emit('log', 'http', ...args) - }, - silly: function (...args) { - return process.emit('log', 'silly', ...args) - }, - timing: function (...args) { - return process.emit('log', 'timing', ...args) - }, - pause: function () { - return process.emit('log', 'pause') - }, - resume: function () { - return process.emit('log', 'resume') - }, - }, - time: { - LEVELS: [ - 'start', - 'end', - ], - KEYS: { - start: 'start', - end: 'end', - }, - start: function (name, fn) { - process.emit('time', 'start', name) - function end () { - return process.emit('time', 'end', name) - } - if (typeof fn === 'function') { - const res = fn() - if (res && res.finally) { - return res.finally(end) - } - end() - return res - } - return end - }, - end: function (name) { - return process.emit('time', 'end', name) - }, - }, - input: { - LEVELS: [ - 'start', - 'end', - 'read', - ], - KEYS: { - start: 'start', - end: 'end', - read: 'read', - }, - start: function (fn) { - process.emit('input', 'start') - function end () { - return process.emit('input', 'end') - } - if (typeof fn === 'function') { - const res = fn() - if (res && res.finally) { - return res.finally(end) - } - end() - return res - } - return end - }, - end: function () { - return process.emit('input', 'end') - }, - read: function (...args) { - let resolve, reject - const promise = new Promise((_resolve, _reject) => { - resolve = _resolve - reject = _reject - }) - process.emit('input', 'read', resolve, reject, ...args) - return promise - }, - }, -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/proc-log/package.json b/deps/npm/node_modules/node-gyp/node_modules/proc-log/package.json deleted file mode 100644 index 4ab89102ecc9b5..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/proc-log/package.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "name": "proc-log", - "version": "4.2.0", - "files": [ - "bin/", - "lib/" - ], - "main": "lib/index.js", - "description": "just emit 'log' events on the process object", - "repository": { - "type": "git", - "url": "https://github.com/npm/proc-log.git" - }, - "author": "GitHub Inc.", - "license": "ISC", - "scripts": { - "test": "tap", - "snap": "tap", - "posttest": "npm run lint", - "postsnap": "eslint index.js test/*.js --fix", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "lintfix": "npm run lint -- --fix", - "template-oss-apply": "template-oss-apply --force" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.21.3", - "tap": "^16.0.1" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.21.3", - "publish": true - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/ssri/LICENSE.md b/deps/npm/node_modules/node-gyp/node_modules/ssri/LICENSE.md deleted file mode 100644 index e335388869f50f..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/ssri/LICENSE.md +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright 2021 (c) npm, Inc. - -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted, provided that the -above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE COPYRIGHT HOLDER DISCLAIMS -ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/ssri/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/ssri/lib/index.js deleted file mode 100644 index 7d749ed480fb98..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/ssri/lib/index.js +++ /dev/null @@ -1,580 +0,0 @@ -'use strict' - -const crypto = require('crypto') -const { Minipass } = require('minipass') - -const SPEC_ALGORITHMS = ['sha512', 'sha384', 'sha256'] -const DEFAULT_ALGORITHMS = ['sha512'] - -// TODO: this should really be a hardcoded list of algorithms we support, -// rather than [a-z0-9]. -const BASE64_REGEX = /^[a-z0-9+/]+(?:=?=?)$/i -const SRI_REGEX = /^([a-z0-9]+)-([^?]+)([?\S*]*)$/ -const STRICT_SRI_REGEX = /^([a-z0-9]+)-([A-Za-z0-9+/=]{44,88})(\?[\x21-\x7E]*)?$/ -const VCHAR_REGEX = /^[\x21-\x7E]+$/ - -const getOptString = options => options?.length ? `?${options.join('?')}` : '' - -class IntegrityStream extends Minipass { - #emittedIntegrity - #emittedSize - #emittedVerified - - constructor (opts) { - super() - this.size = 0 - this.opts = opts - - // may be overridden later, but set now for class consistency - this.#getOptions() - - // options used for calculating stream. can't be changed. - if (opts?.algorithms) { - this.algorithms = [...opts.algorithms] - } else { - this.algorithms = [...DEFAULT_ALGORITHMS] - } - if (this.algorithm !== null && !this.algorithms.includes(this.algorithm)) { - this.algorithms.push(this.algorithm) - } - - this.hashes = this.algorithms.map(crypto.createHash) - } - - #getOptions () { - // For verification - this.sri = this.opts?.integrity ? parse(this.opts?.integrity, this.opts) : null - this.expectedSize = this.opts?.size - - if (!this.sri) { - this.algorithm = null - } else if (this.sri.isHash) { - this.goodSri = true - this.algorithm = this.sri.algorithm - } else { - this.goodSri = !this.sri.isEmpty() - this.algorithm = this.sri.pickAlgorithm(this.opts) - } - - this.digests = this.goodSri ? this.sri[this.algorithm] : null - this.optString = getOptString(this.opts?.options) - } - - on (ev, handler) { - if (ev === 'size' && this.#emittedSize) { - return handler(this.#emittedSize) - } - - if (ev === 'integrity' && this.#emittedIntegrity) { - return handler(this.#emittedIntegrity) - } - - if (ev === 'verified' && this.#emittedVerified) { - return handler(this.#emittedVerified) - } - - return super.on(ev, handler) - } - - emit (ev, data) { - if (ev === 'end') { - this.#onEnd() - } - return super.emit(ev, data) - } - - write (data) { - this.size += data.length - this.hashes.forEach(h => h.update(data)) - return super.write(data) - } - - #onEnd () { - if (!this.goodSri) { - this.#getOptions() - } - const newSri = parse(this.hashes.map((h, i) => { - return `${this.algorithms[i]}-${h.digest('base64')}${this.optString}` - }).join(' '), this.opts) - // Integrity verification mode - const match = this.goodSri && newSri.match(this.sri, this.opts) - if (typeof this.expectedSize === 'number' && this.size !== this.expectedSize) { - /* eslint-disable-next-line max-len */ - const err = new Error(`stream size mismatch when checking ${this.sri}.\n Wanted: ${this.expectedSize}\n Found: ${this.size}`) - err.code = 'EBADSIZE' - err.found = this.size - err.expected = this.expectedSize - err.sri = this.sri - this.emit('error', err) - } else if (this.sri && !match) { - /* eslint-disable-next-line max-len */ - const err = new Error(`${this.sri} integrity checksum failed when using ${this.algorithm}: wanted ${this.digests} but got ${newSri}. (${this.size} bytes)`) - err.code = 'EINTEGRITY' - err.found = newSri - err.expected = this.digests - err.algorithm = this.algorithm - err.sri = this.sri - this.emit('error', err) - } else { - this.#emittedSize = this.size - this.emit('size', this.size) - this.#emittedIntegrity = newSri - this.emit('integrity', newSri) - if (match) { - this.#emittedVerified = match - this.emit('verified', match) - } - } - } -} - -class Hash { - get isHash () { - return true - } - - constructor (hash, opts) { - const strict = opts?.strict - this.source = hash.trim() - - // set default values so that we make V8 happy to - // always see a familiar object template. - this.digest = '' - this.algorithm = '' - this.options = [] - - // 3.1. Integrity metadata (called "Hash" by ssri) - // https://w3c.github.io/webappsec-subresource-integrity/#integrity-metadata-description - const match = this.source.match( - strict - ? STRICT_SRI_REGEX - : SRI_REGEX - ) - if (!match) { - return - } - if (strict && !SPEC_ALGORITHMS.includes(match[1])) { - return - } - this.algorithm = match[1] - this.digest = match[2] - - const rawOpts = match[3] - if (rawOpts) { - this.options = rawOpts.slice(1).split('?') - } - } - - hexDigest () { - return this.digest && Buffer.from(this.digest, 'base64').toString('hex') - } - - toJSON () { - return this.toString() - } - - match (integrity, opts) { - const other = parse(integrity, opts) - if (!other) { - return false - } - if (other.isIntegrity) { - const algo = other.pickAlgorithm(opts, [this.algorithm]) - - if (!algo) { - return false - } - - const foundHash = other[algo].find(hash => hash.digest === this.digest) - - if (foundHash) { - return foundHash - } - - return false - } - return other.digest === this.digest ? other : false - } - - toString (opts) { - if (opts?.strict) { - // Strict mode enforces the standard as close to the foot of the - // letter as it can. - if (!( - // The spec has very restricted productions for algorithms. - // https://www.w3.org/TR/CSP2/#source-list-syntax - SPEC_ALGORITHMS.includes(this.algorithm) && - // Usually, if someone insists on using a "different" base64, we - // leave it as-is, since there's multiple standards, and the - // specified is not a URL-safe variant. - // https://www.w3.org/TR/CSP2/#base64_value - this.digest.match(BASE64_REGEX) && - // Option syntax is strictly visual chars. - // https://w3c.github.io/webappsec-subresource-integrity/#grammardef-option-expression - // https://tools.ietf.org/html/rfc5234#appendix-B.1 - this.options.every(opt => opt.match(VCHAR_REGEX)) - )) { - return '' - } - } - return `${this.algorithm}-${this.digest}${getOptString(this.options)}` - } -} - -function integrityHashToString (toString, sep, opts, hashes) { - const toStringIsNotEmpty = toString !== '' - - let shouldAddFirstSep = false - let complement = '' - - const lastIndex = hashes.length - 1 - - for (let i = 0; i < lastIndex; i++) { - const hashString = Hash.prototype.toString.call(hashes[i], opts) - - if (hashString) { - shouldAddFirstSep = true - - complement += hashString - complement += sep - } - } - - const finalHashString = Hash.prototype.toString.call(hashes[lastIndex], opts) - - if (finalHashString) { - shouldAddFirstSep = true - complement += finalHashString - } - - if (toStringIsNotEmpty && shouldAddFirstSep) { - return toString + sep + complement - } - - return toString + complement -} - -class Integrity { - get isIntegrity () { - return true - } - - toJSON () { - return this.toString() - } - - isEmpty () { - return Object.keys(this).length === 0 - } - - toString (opts) { - let sep = opts?.sep || ' ' - let toString = '' - - if (opts?.strict) { - // Entries must be separated by whitespace, according to spec. - sep = sep.replace(/\S+/g, ' ') - - for (const hash of SPEC_ALGORITHMS) { - if (this[hash]) { - toString = integrityHashToString(toString, sep, opts, this[hash]) - } - } - } else { - for (const hash of Object.keys(this)) { - toString = integrityHashToString(toString, sep, opts, this[hash]) - } - } - - return toString - } - - concat (integrity, opts) { - const other = typeof integrity === 'string' - ? integrity - : stringify(integrity, opts) - return parse(`${this.toString(opts)} ${other}`, opts) - } - - hexDigest () { - return parse(this, { single: true }).hexDigest() - } - - // add additional hashes to an integrity value, but prevent - // *changing* an existing integrity hash. - merge (integrity, opts) { - const other = parse(integrity, opts) - for (const algo in other) { - if (this[algo]) { - if (!this[algo].find(hash => - other[algo].find(otherhash => - hash.digest === otherhash.digest))) { - throw new Error('hashes do not match, cannot update integrity') - } - } else { - this[algo] = other[algo] - } - } - } - - match (integrity, opts) { - const other = parse(integrity, opts) - if (!other) { - return false - } - const algo = other.pickAlgorithm(opts, Object.keys(this)) - return ( - !!algo && - this[algo] && - other[algo] && - this[algo].find(hash => - other[algo].find(otherhash => - hash.digest === otherhash.digest - ) - ) - ) || false - } - - // Pick the highest priority algorithm present, optionally also limited to a - // set of hashes found in another integrity. When limiting it may return - // nothing. - pickAlgorithm (opts, hashes) { - const pickAlgorithm = opts?.pickAlgorithm || getPrioritizedHash - const keys = Object.keys(this).filter(k => { - if (hashes?.length) { - return hashes.includes(k) - } - return true - }) - if (keys.length) { - return keys.reduce((acc, algo) => pickAlgorithm(acc, algo) || acc) - } - // no intersection between this and hashes, - return null - } -} - -module.exports.parse = parse -function parse (sri, opts) { - if (!sri) { - return null - } - if (typeof sri === 'string') { - return _parse(sri, opts) - } else if (sri.algorithm && sri.digest) { - const fullSri = new Integrity() - fullSri[sri.algorithm] = [sri] - return _parse(stringify(fullSri, opts), opts) - } else { - return _parse(stringify(sri, opts), opts) - } -} - -function _parse (integrity, opts) { - // 3.4.3. Parse metadata - // https://w3c.github.io/webappsec-subresource-integrity/#parse-metadata - if (opts?.single) { - return new Hash(integrity, opts) - } - const hashes = integrity.trim().split(/\s+/).reduce((acc, string) => { - const hash = new Hash(string, opts) - if (hash.algorithm && hash.digest) { - const algo = hash.algorithm - if (!acc[algo]) { - acc[algo] = [] - } - acc[algo].push(hash) - } - return acc - }, new Integrity()) - return hashes.isEmpty() ? null : hashes -} - -module.exports.stringify = stringify -function stringify (obj, opts) { - if (obj.algorithm && obj.digest) { - return Hash.prototype.toString.call(obj, opts) - } else if (typeof obj === 'string') { - return stringify(parse(obj, opts), opts) - } else { - return Integrity.prototype.toString.call(obj, opts) - } -} - -module.exports.fromHex = fromHex -function fromHex (hexDigest, algorithm, opts) { - const optString = getOptString(opts?.options) - return parse( - `${algorithm}-${ - Buffer.from(hexDigest, 'hex').toString('base64') - }${optString}`, opts - ) -} - -module.exports.fromData = fromData -function fromData (data, opts) { - const algorithms = opts?.algorithms || [...DEFAULT_ALGORITHMS] - const optString = getOptString(opts?.options) - return algorithms.reduce((acc, algo) => { - const digest = crypto.createHash(algo).update(data).digest('base64') - const hash = new Hash( - `${algo}-${digest}${optString}`, - opts - ) - /* istanbul ignore else - it would be VERY strange if the string we - * just calculated with an algo did not have an algo or digest. - */ - if (hash.algorithm && hash.digest) { - const hashAlgo = hash.algorithm - if (!acc[hashAlgo]) { - acc[hashAlgo] = [] - } - acc[hashAlgo].push(hash) - } - return acc - }, new Integrity()) -} - -module.exports.fromStream = fromStream -function fromStream (stream, opts) { - const istream = integrityStream(opts) - return new Promise((resolve, reject) => { - stream.pipe(istream) - stream.on('error', reject) - istream.on('error', reject) - let sri - istream.on('integrity', s => { - sri = s - }) - istream.on('end', () => resolve(sri)) - istream.resume() - }) -} - -module.exports.checkData = checkData -function checkData (data, sri, opts) { - sri = parse(sri, opts) - if (!sri || !Object.keys(sri).length) { - if (opts?.error) { - throw Object.assign( - new Error('No valid integrity hashes to check against'), { - code: 'EINTEGRITY', - } - ) - } else { - return false - } - } - const algorithm = sri.pickAlgorithm(opts) - const digest = crypto.createHash(algorithm).update(data).digest('base64') - const newSri = parse({ algorithm, digest }) - const match = newSri.match(sri, opts) - opts = opts || {} - if (match || !(opts.error)) { - return match - } else if (typeof opts.size === 'number' && (data.length !== opts.size)) { - /* eslint-disable-next-line max-len */ - const err = new Error(`data size mismatch when checking ${sri}.\n Wanted: ${opts.size}\n Found: ${data.length}`) - err.code = 'EBADSIZE' - err.found = data.length - err.expected = opts.size - err.sri = sri - throw err - } else { - /* eslint-disable-next-line max-len */ - const err = new Error(`Integrity checksum failed when using ${algorithm}: Wanted ${sri}, but got ${newSri}. (${data.length} bytes)`) - err.code = 'EINTEGRITY' - err.found = newSri - err.expected = sri - err.algorithm = algorithm - err.sri = sri - throw err - } -} - -module.exports.checkStream = checkStream -function checkStream (stream, sri, opts) { - opts = opts || Object.create(null) - opts.integrity = sri - sri = parse(sri, opts) - if (!sri || !Object.keys(sri).length) { - return Promise.reject(Object.assign( - new Error('No valid integrity hashes to check against'), { - code: 'EINTEGRITY', - } - )) - } - const checker = integrityStream(opts) - return new Promise((resolve, reject) => { - stream.pipe(checker) - stream.on('error', reject) - checker.on('error', reject) - let verified - checker.on('verified', s => { - verified = s - }) - checker.on('end', () => resolve(verified)) - checker.resume() - }) -} - -module.exports.integrityStream = integrityStream -function integrityStream (opts = Object.create(null)) { - return new IntegrityStream(opts) -} - -module.exports.create = createIntegrity -function createIntegrity (opts) { - const algorithms = opts?.algorithms || [...DEFAULT_ALGORITHMS] - const optString = getOptString(opts?.options) - - const hashes = algorithms.map(crypto.createHash) - - return { - update: function (chunk, enc) { - hashes.forEach(h => h.update(chunk, enc)) - return this - }, - digest: function () { - const integrity = algorithms.reduce((acc, algo) => { - const digest = hashes.shift().digest('base64') - const hash = new Hash( - `${algo}-${digest}${optString}`, - opts - ) - /* istanbul ignore else - it would be VERY strange if the hash we - * just calculated with an algo did not have an algo or digest. - */ - if (hash.algorithm && hash.digest) { - const hashAlgo = hash.algorithm - if (!acc[hashAlgo]) { - acc[hashAlgo] = [] - } - acc[hashAlgo].push(hash) - } - return acc - }, new Integrity()) - - return integrity - }, - } -} - -const NODE_HASHES = crypto.getHashes() - -// This is a Best Effort™ at a reasonable priority for hash algos -const DEFAULT_PRIORITY = [ - 'md5', 'whirlpool', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', - // TODO - it's unclear _which_ of these Node will actually use as its name - // for the algorithm, so we guesswork it based on the OpenSSL names. - 'sha3', - 'sha3-256', 'sha3-384', 'sha3-512', - 'sha3_256', 'sha3_384', 'sha3_512', -].filter(algo => NODE_HASHES.includes(algo)) - -function getPrioritizedHash (algo1, algo2) { - /* eslint-disable-next-line max-len */ - return DEFAULT_PRIORITY.indexOf(algo1.toLowerCase()) >= DEFAULT_PRIORITY.indexOf(algo2.toLowerCase()) - ? algo1 - : algo2 -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/ssri/package.json b/deps/npm/node_modules/node-gyp/node_modules/ssri/package.json deleted file mode 100644 index 28395414e4643c..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/ssri/package.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "name": "ssri", - "version": "10.0.6", - "description": "Standard Subresource Integrity library -- parses, serializes, generates, and verifies integrity metadata according to the SRI spec.", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "prerelease": "npm t", - "postrelease": "npm publish", - "posttest": "npm run lint", - "test": "tap", - "coverage": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap" - }, - "tap": { - "check-coverage": true, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/ssri.git" - }, - "keywords": [ - "w3c", - "web", - "security", - "integrity", - "checksum", - "hashing", - "subresource integrity", - "sri", - "sri hash", - "sri string", - "sri generator", - "html" - ], - "author": "GitHub Inc.", - "license": "ISC", - "dependencies": { - "minipass": "^7.0.3" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.0.1" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.22.0", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/nopt/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/tar/LICENSE similarity index 100% rename from deps/npm/node_modules/node-gyp/node_modules/nopt/LICENSE rename to deps/npm/node_modules/node-gyp/node_modules/tar/LICENSE diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/create.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/create.js new file mode 100644 index 00000000000000..3190afc48318f9 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/create.js @@ -0,0 +1,83 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.create = void 0; +const fs_minipass_1 = require("@isaacs/fs-minipass"); +const node_path_1 = __importDefault(require("node:path")); +const list_js_1 = require("./list.js"); +const make_command_js_1 = require("./make-command.js"); +const pack_js_1 = require("./pack.js"); +const createFileSync = (opt, files) => { + const p = new pack_js_1.PackSync(opt); + const stream = new fs_minipass_1.WriteStreamSync(opt.file, { + mode: opt.mode || 0o666, + }); + p.pipe(stream); + addFilesSync(p, files); +}; +const createFile = (opt, files) => { + const p = new pack_js_1.Pack(opt); + const stream = new fs_minipass_1.WriteStream(opt.file, { + mode: opt.mode || 0o666, + }); + p.pipe(stream); + const promise = new Promise((res, rej) => { + stream.on('error', rej); + stream.on('close', res); + p.on('error', rej); + }); + addFilesAsync(p, files); + return promise; +}; +const addFilesSync = (p, files) => { + files.forEach(file => { + if (file.charAt(0) === '@') { + (0, list_js_1.list)({ + file: node_path_1.default.resolve(p.cwd, file.slice(1)), + sync: true, + noResume: true, + onReadEntry: entry => p.add(entry), + }); + } + else { + p.add(file); + } + }); + p.end(); +}; +const addFilesAsync = async (p, files) => { + for (let i = 0; i < files.length; i++) { + const file = String(files[i]); + if (file.charAt(0) === '@') { + await (0, list_js_1.list)({ + file: node_path_1.default.resolve(String(p.cwd), file.slice(1)), + noResume: true, + onReadEntry: entry => { + p.add(entry); + }, + }); + } + else { + p.add(file); + } + } + p.end(); +}; +const createSync = (opt, files) => { + const p = new pack_js_1.PackSync(opt); + addFilesSync(p, files); + return p; +}; +const createAsync = (opt, files) => { + const p = new pack_js_1.Pack(opt); + addFilesAsync(p, files); + return p; +}; +exports.create = (0, make_command_js_1.makeCommand)(createFileSync, createFile, createSync, createAsync, (_opt, files) => { + if (!files?.length) { + throw new TypeError('no paths specified to add to archive'); + } +}); +//# sourceMappingURL=create.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/cwd-error.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/cwd-error.js new file mode 100644 index 00000000000000..d703a7772be3a5 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/cwd-error.js @@ -0,0 +1,18 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.CwdError = void 0; +class CwdError extends Error { + path; + code; + syscall = 'chdir'; + constructor(path, code) { + super(`${code}: Cannot cd into '${path}'`); + this.path = path; + this.code = code; + } + get name() { + return 'CwdError'; + } +} +exports.CwdError = CwdError; +//# sourceMappingURL=cwd-error.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/extract.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/extract.js new file mode 100644 index 00000000000000..f848cbcbf779e8 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/extract.js @@ -0,0 +1,78 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.extract = void 0; +// tar -x +const fsm = __importStar(require("@isaacs/fs-minipass")); +const node_fs_1 = __importDefault(require("node:fs")); +const list_js_1 = require("./list.js"); +const make_command_js_1 = require("./make-command.js"); +const unpack_js_1 = require("./unpack.js"); +const extractFileSync = (opt) => { + const u = new unpack_js_1.UnpackSync(opt); + const file = opt.file; + const stat = node_fs_1.default.statSync(file); + // This trades a zero-byte read() syscall for a stat + // However, it will usually result in less memory allocation + const readSize = opt.maxReadSize || 16 * 1024 * 1024; + const stream = new fsm.ReadStreamSync(file, { + readSize: readSize, + size: stat.size, + }); + stream.pipe(u); +}; +const extractFile = (opt, _) => { + const u = new unpack_js_1.Unpack(opt); + const readSize = opt.maxReadSize || 16 * 1024 * 1024; + const file = opt.file; + const p = new Promise((resolve, reject) => { + u.on('error', reject); + u.on('close', resolve); + // This trades a zero-byte read() syscall for a stat + // However, it will usually result in less memory allocation + node_fs_1.default.stat(file, (er, stat) => { + if (er) { + reject(er); + } + else { + const stream = new fsm.ReadStream(file, { + readSize: readSize, + size: stat.size, + }); + stream.on('error', reject); + stream.pipe(u); + } + }); + }); + return p; +}; +exports.extract = (0, make_command_js_1.makeCommand)(extractFileSync, extractFile, opt => new unpack_js_1.UnpackSync(opt), opt => new unpack_js_1.Unpack(opt), (opt, files) => { + if (files?.length) + (0, list_js_1.filesFilter)(opt, files); +}); +//# sourceMappingURL=extract.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/get-write-flag.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/get-write-flag.js new file mode 100644 index 00000000000000..94add8f6b2231c --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/get-write-flag.js @@ -0,0 +1,29 @@ +"use strict"; +// Get the appropriate flag to use for creating files +// We use fmap on Windows platforms for files less than +// 512kb. This is a fairly low limit, but avoids making +// things slower in some cases. Since most of what this +// library is used for is extracting tarballs of many +// relatively small files in npm packages and the like, +// it can be a big boost on Windows platforms. +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.getWriteFlag = void 0; +const fs_1 = __importDefault(require("fs")); +const platform = process.env.__FAKE_PLATFORM__ || process.platform; +const isWindows = platform === 'win32'; +/* c8 ignore start */ +const { O_CREAT, O_TRUNC, O_WRONLY } = fs_1.default.constants; +const UV_FS_O_FILEMAP = Number(process.env.__FAKE_FS_O_FILENAME__) || + fs_1.default.constants.UV_FS_O_FILEMAP || + 0; +/* c8 ignore stop */ +const fMapEnabled = isWindows && !!UV_FS_O_FILEMAP; +const fMapLimit = 512 * 1024; +const fMapFlag = UV_FS_O_FILEMAP | O_TRUNC | O_CREAT | O_WRONLY; +exports.getWriteFlag = !fMapEnabled ? + () => 'w' + : (size) => (size < fMapLimit ? fMapFlag : 'w'); +//# sourceMappingURL=get-write-flag.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/header.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/header.js new file mode 100644 index 00000000000000..b3a48037b849ab --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/header.js @@ -0,0 +1,306 @@ +"use strict"; +// parse a 512-byte header block to a data object, or vice-versa +// encode returns `true` if a pax extended header is needed, because +// the data could not be faithfully encoded in a simple header. +// (Also, check header.needPax to see if it needs a pax header.) +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.Header = void 0; +const node_path_1 = require("node:path"); +const large = __importStar(require("./large-numbers.js")); +const types = __importStar(require("./types.js")); +class Header { + cksumValid = false; + needPax = false; + nullBlock = false; + block; + path; + mode; + uid; + gid; + size; + cksum; + #type = 'Unsupported'; + linkpath; + uname; + gname; + devmaj = 0; + devmin = 0; + atime; + ctime; + mtime; + charset; + comment; + constructor(data, off = 0, ex, gex) { + if (Buffer.isBuffer(data)) { + this.decode(data, off || 0, ex, gex); + } + else if (data) { + this.#slurp(data); + } + } + decode(buf, off, ex, gex) { + if (!off) { + off = 0; + } + if (!buf || !(buf.length >= off + 512)) { + throw new Error('need 512 bytes for header'); + } + this.path = decString(buf, off, 100); + this.mode = decNumber(buf, off + 100, 8); + this.uid = decNumber(buf, off + 108, 8); + this.gid = decNumber(buf, off + 116, 8); + this.size = decNumber(buf, off + 124, 12); + this.mtime = decDate(buf, off + 136, 12); + this.cksum = decNumber(buf, off + 148, 12); + // if we have extended or global extended headers, apply them now + // See https://github.com/npm/node-tar/pull/187 + // Apply global before local, so it overrides + if (gex) + this.#slurp(gex, true); + if (ex) + this.#slurp(ex); + // old tar versions marked dirs as a file with a trailing / + const t = decString(buf, off + 156, 1); + if (types.isCode(t)) { + this.#type = t || '0'; + } + if (this.#type === '0' && this.path.slice(-1) === '/') { + this.#type = '5'; + } + // tar implementations sometimes incorrectly put the stat(dir).size + // as the size in the tarball, even though Directory entries are + // not able to have any body at all. In the very rare chance that + // it actually DOES have a body, we weren't going to do anything with + // it anyway, and it'll just be a warning about an invalid header. + if (this.#type === '5') { + this.size = 0; + } + this.linkpath = decString(buf, off + 157, 100); + if (buf.subarray(off + 257, off + 265).toString() === + 'ustar\u000000') { + this.uname = decString(buf, off + 265, 32); + this.gname = decString(buf, off + 297, 32); + /* c8 ignore start */ + this.devmaj = decNumber(buf, off + 329, 8) ?? 0; + this.devmin = decNumber(buf, off + 337, 8) ?? 0; + /* c8 ignore stop */ + if (buf[off + 475] !== 0) { + // definitely a prefix, definitely >130 chars. + const prefix = decString(buf, off + 345, 155); + this.path = prefix + '/' + this.path; + } + else { + const prefix = decString(buf, off + 345, 130); + if (prefix) { + this.path = prefix + '/' + this.path; + } + this.atime = decDate(buf, off + 476, 12); + this.ctime = decDate(buf, off + 488, 12); + } + } + let sum = 8 * 0x20; + for (let i = off; i < off + 148; i++) { + sum += buf[i]; + } + for (let i = off + 156; i < off + 512; i++) { + sum += buf[i]; + } + this.cksumValid = sum === this.cksum; + if (this.cksum === undefined && sum === 8 * 0x20) { + this.nullBlock = true; + } + } + #slurp(ex, gex = false) { + Object.assign(this, Object.fromEntries(Object.entries(ex).filter(([k, v]) => { + // we slurp in everything except for the path attribute in + // a global extended header, because that's weird. Also, any + // null/undefined values are ignored. + return !(v === null || + v === undefined || + (k === 'path' && gex) || + (k === 'linkpath' && gex) || + k === 'global'); + }))); + } + encode(buf, off = 0) { + if (!buf) { + buf = this.block = Buffer.alloc(512); + } + if (this.#type === 'Unsupported') { + this.#type = '0'; + } + if (!(buf.length >= off + 512)) { + throw new Error('need 512 bytes for header'); + } + const prefixSize = this.ctime || this.atime ? 130 : 155; + const split = splitPrefix(this.path || '', prefixSize); + const path = split[0]; + const prefix = split[1]; + this.needPax = !!split[2]; + this.needPax = encString(buf, off, 100, path) || this.needPax; + this.needPax = + encNumber(buf, off + 100, 8, this.mode) || this.needPax; + this.needPax = + encNumber(buf, off + 108, 8, this.uid) || this.needPax; + this.needPax = + encNumber(buf, off + 116, 8, this.gid) || this.needPax; + this.needPax = + encNumber(buf, off + 124, 12, this.size) || this.needPax; + this.needPax = + encDate(buf, off + 136, 12, this.mtime) || this.needPax; + buf[off + 156] = this.#type.charCodeAt(0); + this.needPax = + encString(buf, off + 157, 100, this.linkpath) || this.needPax; + buf.write('ustar\u000000', off + 257, 8); + this.needPax = + encString(buf, off + 265, 32, this.uname) || this.needPax; + this.needPax = + encString(buf, off + 297, 32, this.gname) || this.needPax; + this.needPax = + encNumber(buf, off + 329, 8, this.devmaj) || this.needPax; + this.needPax = + encNumber(buf, off + 337, 8, this.devmin) || this.needPax; + this.needPax = + encString(buf, off + 345, prefixSize, prefix) || this.needPax; + if (buf[off + 475] !== 0) { + this.needPax = + encString(buf, off + 345, 155, prefix) || this.needPax; + } + else { + this.needPax = + encString(buf, off + 345, 130, prefix) || this.needPax; + this.needPax = + encDate(buf, off + 476, 12, this.atime) || this.needPax; + this.needPax = + encDate(buf, off + 488, 12, this.ctime) || this.needPax; + } + let sum = 8 * 0x20; + for (let i = off; i < off + 148; i++) { + sum += buf[i]; + } + for (let i = off + 156; i < off + 512; i++) { + sum += buf[i]; + } + this.cksum = sum; + encNumber(buf, off + 148, 8, this.cksum); + this.cksumValid = true; + return this.needPax; + } + get type() { + return (this.#type === 'Unsupported' ? + this.#type + : types.name.get(this.#type)); + } + get typeKey() { + return this.#type; + } + set type(type) { + const c = String(types.code.get(type)); + if (types.isCode(c) || c === 'Unsupported') { + this.#type = c; + } + else if (types.isCode(type)) { + this.#type = type; + } + else { + throw new TypeError('invalid entry type: ' + type); + } + } +} +exports.Header = Header; +const splitPrefix = (p, prefixSize) => { + const pathSize = 100; + let pp = p; + let prefix = ''; + let ret = undefined; + const root = node_path_1.posix.parse(p).root || '.'; + if (Buffer.byteLength(pp) < pathSize) { + ret = [pp, prefix, false]; + } + else { + // first set prefix to the dir, and path to the base + prefix = node_path_1.posix.dirname(pp); + pp = node_path_1.posix.basename(pp); + do { + if (Buffer.byteLength(pp) <= pathSize && + Buffer.byteLength(prefix) <= prefixSize) { + // both fit! + ret = [pp, prefix, false]; + } + else if (Buffer.byteLength(pp) > pathSize && + Buffer.byteLength(prefix) <= prefixSize) { + // prefix fits in prefix, but path doesn't fit in path + ret = [pp.slice(0, pathSize - 1), prefix, true]; + } + else { + // make path take a bit from prefix + pp = node_path_1.posix.join(node_path_1.posix.basename(prefix), pp); + prefix = node_path_1.posix.dirname(prefix); + } + } while (prefix !== root && ret === undefined); + // at this point, found no resolution, just truncate + if (!ret) { + ret = [p.slice(0, pathSize - 1), '', true]; + } + } + return ret; +}; +const decString = (buf, off, size) => buf + .subarray(off, off + size) + .toString('utf8') + .replace(/\0.*/, ''); +const decDate = (buf, off, size) => numToDate(decNumber(buf, off, size)); +const numToDate = (num) => num === undefined ? undefined : new Date(num * 1000); +const decNumber = (buf, off, size) => Number(buf[off]) & 0x80 ? + large.parse(buf.subarray(off, off + size)) + : decSmallNumber(buf, off, size); +const nanUndef = (value) => (isNaN(value) ? undefined : value); +const decSmallNumber = (buf, off, size) => nanUndef(parseInt(buf + .subarray(off, off + size) + .toString('utf8') + .replace(/\0.*$/, '') + .trim(), 8)); +// the maximum encodable as a null-terminated octal, by field size +const MAXNUM = { + 12: 0o77777777777, + 8: 0o7777777, +}; +const encNumber = (buf, off, size, num) => num === undefined ? false + : num > MAXNUM[size] || num < 0 ? + (large.encode(num, buf.subarray(off, off + size)), true) + : (encSmallNumber(buf, off, size, num), false); +const encSmallNumber = (buf, off, size, num) => buf.write(octalString(num, size), off, size, 'ascii'); +const octalString = (num, size) => padOctal(Math.floor(num).toString(8), size); +const padOctal = (str, size) => (str.length === size - 1 ? + str + : new Array(size - str.length - 1).join('0') + str + ' ') + '\0'; +const encDate = (buf, off, size, date) => date === undefined ? false : (encNumber(buf, off, size, date.getTime() / 1000)); +// enough to fill the longest string we've got +const NULLS = new Array(156).join('\0'); +// pad with nulls, return true if it's longer or non-ascii +const encString = (buf, off, size, str) => str === undefined ? false : ((buf.write(str + NULLS, off, size, 'utf8'), + str.length !== Buffer.byteLength(str) || str.length > size)); +//# sourceMappingURL=header.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/index.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/index.js new file mode 100644 index 00000000000000..e93ed5ad54aa6e --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/index.js @@ -0,0 +1,54 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __exportStar = (this && this.__exportStar) || function(m, exports) { + for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); +}; +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.u = exports.types = exports.r = exports.t = exports.x = exports.c = void 0; +__exportStar(require("./create.js"), exports); +var create_js_1 = require("./create.js"); +Object.defineProperty(exports, "c", { enumerable: true, get: function () { return create_js_1.create; } }); +__exportStar(require("./extract.js"), exports); +var extract_js_1 = require("./extract.js"); +Object.defineProperty(exports, "x", { enumerable: true, get: function () { return extract_js_1.extract; } }); +__exportStar(require("./header.js"), exports); +__exportStar(require("./list.js"), exports); +var list_js_1 = require("./list.js"); +Object.defineProperty(exports, "t", { enumerable: true, get: function () { return list_js_1.list; } }); +// classes +__exportStar(require("./pack.js"), exports); +__exportStar(require("./parse.js"), exports); +__exportStar(require("./pax.js"), exports); +__exportStar(require("./read-entry.js"), exports); +__exportStar(require("./replace.js"), exports); +var replace_js_1 = require("./replace.js"); +Object.defineProperty(exports, "r", { enumerable: true, get: function () { return replace_js_1.replace; } }); +exports.types = __importStar(require("./types.js")); +__exportStar(require("./unpack.js"), exports); +__exportStar(require("./update.js"), exports); +var update_js_1 = require("./update.js"); +Object.defineProperty(exports, "u", { enumerable: true, get: function () { return update_js_1.update; } }); +__exportStar(require("./write-entry.js"), exports); +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/large-numbers.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/large-numbers.js new file mode 100644 index 00000000000000..5b07aa7f71b48d --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/large-numbers.js @@ -0,0 +1,99 @@ +"use strict"; +// Tar can encode large and negative numbers using a leading byte of +// 0xff for negative, and 0x80 for positive. +Object.defineProperty(exports, "__esModule", { value: true }); +exports.parse = exports.encode = void 0; +const encode = (num, buf) => { + if (!Number.isSafeInteger(num)) { + // The number is so large that javascript cannot represent it with integer + // precision. + throw Error('cannot encode number outside of javascript safe integer range'); + } + else if (num < 0) { + encodeNegative(num, buf); + } + else { + encodePositive(num, buf); + } + return buf; +}; +exports.encode = encode; +const encodePositive = (num, buf) => { + buf[0] = 0x80; + for (var i = buf.length; i > 1; i--) { + buf[i - 1] = num & 0xff; + num = Math.floor(num / 0x100); + } +}; +const encodeNegative = (num, buf) => { + buf[0] = 0xff; + var flipped = false; + num = num * -1; + for (var i = buf.length; i > 1; i--) { + var byte = num & 0xff; + num = Math.floor(num / 0x100); + if (flipped) { + buf[i - 1] = onesComp(byte); + } + else if (byte === 0) { + buf[i - 1] = 0; + } + else { + flipped = true; + buf[i - 1] = twosComp(byte); + } + } +}; +const parse = (buf) => { + const pre = buf[0]; + const value = pre === 0x80 ? pos(buf.subarray(1, buf.length)) + : pre === 0xff ? twos(buf) + : null; + if (value === null) { + throw Error('invalid base256 encoding'); + } + if (!Number.isSafeInteger(value)) { + // The number is so large that javascript cannot represent it with integer + // precision. + throw Error('parsed number outside of javascript safe integer range'); + } + return value; +}; +exports.parse = parse; +const twos = (buf) => { + var len = buf.length; + var sum = 0; + var flipped = false; + for (var i = len - 1; i > -1; i--) { + var byte = Number(buf[i]); + var f; + if (flipped) { + f = onesComp(byte); + } + else if (byte === 0) { + f = byte; + } + else { + flipped = true; + f = twosComp(byte); + } + if (f !== 0) { + sum -= f * Math.pow(256, len - i - 1); + } + } + return sum; +}; +const pos = (buf) => { + var len = buf.length; + var sum = 0; + for (var i = len - 1; i > -1; i--) { + var byte = Number(buf[i]); + if (byte !== 0) { + sum += byte * Math.pow(256, len - i - 1); + } + } + return sum; +}; +const onesComp = (byte) => (0xff ^ byte) & 0xff; +const twosComp = (byte) => ((0xff ^ byte) + 1) & 0xff; +//# sourceMappingURL=large-numbers.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/list.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/list.js new file mode 100644 index 00000000000000..3cd34bb4bad481 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/list.js @@ -0,0 +1,136 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.list = exports.filesFilter = void 0; +// tar -t +const fsm = __importStar(require("@isaacs/fs-minipass")); +const node_fs_1 = __importDefault(require("node:fs")); +const path_1 = require("path"); +const make_command_js_1 = require("./make-command.js"); +const parse_js_1 = require("./parse.js"); +const strip_trailing_slashes_js_1 = require("./strip-trailing-slashes.js"); +const onReadEntryFunction = (opt) => { + const onReadEntry = opt.onReadEntry; + opt.onReadEntry = + onReadEntry ? + e => { + onReadEntry(e); + e.resume(); + } + : e => e.resume(); +}; +// construct a filter that limits the file entries listed +// include child entries if a dir is included +const filesFilter = (opt, files) => { + const map = new Map(files.map(f => [(0, strip_trailing_slashes_js_1.stripTrailingSlashes)(f), true])); + const filter = opt.filter; + const mapHas = (file, r = '') => { + const root = r || (0, path_1.parse)(file).root || '.'; + let ret; + if (file === root) + ret = false; + else { + const m = map.get(file); + if (m !== undefined) { + ret = m; + } + else { + ret = mapHas((0, path_1.dirname)(file), root); + } + } + map.set(file, ret); + return ret; + }; + opt.filter = + filter ? + (file, entry) => filter(file, entry) && mapHas((0, strip_trailing_slashes_js_1.stripTrailingSlashes)(file)) + : file => mapHas((0, strip_trailing_slashes_js_1.stripTrailingSlashes)(file)); +}; +exports.filesFilter = filesFilter; +const listFileSync = (opt) => { + const p = new parse_js_1.Parser(opt); + const file = opt.file; + let fd; + try { + const stat = node_fs_1.default.statSync(file); + const readSize = opt.maxReadSize || 16 * 1024 * 1024; + if (stat.size < readSize) { + p.end(node_fs_1.default.readFileSync(file)); + } + else { + let pos = 0; + const buf = Buffer.allocUnsafe(readSize); + fd = node_fs_1.default.openSync(file, 'r'); + while (pos < stat.size) { + const bytesRead = node_fs_1.default.readSync(fd, buf, 0, readSize, pos); + pos += bytesRead; + p.write(buf.subarray(0, bytesRead)); + } + p.end(); + } + } + finally { + if (typeof fd === 'number') { + try { + node_fs_1.default.closeSync(fd); + /* c8 ignore next */ + } + catch (er) { } + } + } +}; +const listFile = (opt, _files) => { + const parse = new parse_js_1.Parser(opt); + const readSize = opt.maxReadSize || 16 * 1024 * 1024; + const file = opt.file; + const p = new Promise((resolve, reject) => { + parse.on('error', reject); + parse.on('end', resolve); + node_fs_1.default.stat(file, (er, stat) => { + if (er) { + reject(er); + } + else { + const stream = new fsm.ReadStream(file, { + readSize: readSize, + size: stat.size, + }); + stream.on('error', reject); + stream.pipe(parse); + } + }); + }); + return p; +}; +exports.list = (0, make_command_js_1.makeCommand)(listFileSync, listFile, opt => new parse_js_1.Parser(opt), opt => new parse_js_1.Parser(opt), (opt, files) => { + if (files?.length) + (0, exports.filesFilter)(opt, files); + if (!opt.noResume) + onReadEntryFunction(opt); +}); +//# sourceMappingURL=list.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/make-command.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/make-command.js new file mode 100644 index 00000000000000..1814319e78bc62 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/make-command.js @@ -0,0 +1,61 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.makeCommand = void 0; +const options_js_1 = require("./options.js"); +const makeCommand = (syncFile, asyncFile, syncNoFile, asyncNoFile, validate) => { + return Object.assign((opt_ = [], entries, cb) => { + if (Array.isArray(opt_)) { + entries = opt_; + opt_ = {}; + } + if (typeof entries === 'function') { + cb = entries; + entries = undefined; + } + if (!entries) { + entries = []; + } + else { + entries = Array.from(entries); + } + const opt = (0, options_js_1.dealias)(opt_); + validate?.(opt, entries); + if ((0, options_js_1.isSyncFile)(opt)) { + if (typeof cb === 'function') { + throw new TypeError('callback not supported for sync tar functions'); + } + return syncFile(opt, entries); + } + else if ((0, options_js_1.isAsyncFile)(opt)) { + const p = asyncFile(opt, entries); + // weirdness to make TS happy + const c = cb ? cb : undefined; + return c ? p.then(() => c(), c) : p; + } + else if ((0, options_js_1.isSyncNoFile)(opt)) { + if (typeof cb === 'function') { + throw new TypeError('callback not supported for sync tar functions'); + } + return syncNoFile(opt, entries); + } + else if ((0, options_js_1.isAsyncNoFile)(opt)) { + if (typeof cb === 'function') { + throw new TypeError('callback only supported with file option'); + } + return asyncNoFile(opt, entries); + /* c8 ignore start */ + } + else { + throw new Error('impossible options??'); + } + /* c8 ignore stop */ + }, { + syncFile, + asyncFile, + syncNoFile, + asyncNoFile, + validate, + }); +}; +exports.makeCommand = makeCommand; +//# sourceMappingURL=make-command.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/mkdir.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/mkdir.js new file mode 100644 index 00000000000000..2b13ecbab6723e --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/mkdir.js @@ -0,0 +1,209 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.mkdirSync = exports.mkdir = void 0; +const chownr_1 = require("chownr"); +const fs_1 = __importDefault(require("fs")); +const mkdirp_1 = require("mkdirp"); +const node_path_1 = __importDefault(require("node:path")); +const cwd_error_js_1 = require("./cwd-error.js"); +const normalize_windows_path_js_1 = require("./normalize-windows-path.js"); +const symlink_error_js_1 = require("./symlink-error.js"); +const cGet = (cache, key) => cache.get((0, normalize_windows_path_js_1.normalizeWindowsPath)(key)); +const cSet = (cache, key, val) => cache.set((0, normalize_windows_path_js_1.normalizeWindowsPath)(key), val); +const checkCwd = (dir, cb) => { + fs_1.default.stat(dir, (er, st) => { + if (er || !st.isDirectory()) { + er = new cwd_error_js_1.CwdError(dir, er?.code || 'ENOTDIR'); + } + cb(er); + }); +}; +/** + * Wrapper around mkdirp for tar's needs. + * + * The main purpose is to avoid creating directories if we know that + * they already exist (and track which ones exist for this purpose), + * and prevent entries from being extracted into symlinked folders, + * if `preservePaths` is not set. + */ +const mkdir = (dir, opt, cb) => { + dir = (0, normalize_windows_path_js_1.normalizeWindowsPath)(dir); + // if there's any overlap between mask and mode, + // then we'll need an explicit chmod + /* c8 ignore next */ + const umask = opt.umask ?? 0o22; + const mode = opt.mode | 0o0700; + const needChmod = (mode & umask) !== 0; + const uid = opt.uid; + const gid = opt.gid; + const doChown = typeof uid === 'number' && + typeof gid === 'number' && + (uid !== opt.processUid || gid !== opt.processGid); + const preserve = opt.preserve; + const unlink = opt.unlink; + const cache = opt.cache; + const cwd = (0, normalize_windows_path_js_1.normalizeWindowsPath)(opt.cwd); + const done = (er, created) => { + if (er) { + cb(er); + } + else { + cSet(cache, dir, true); + if (created && doChown) { + (0, chownr_1.chownr)(created, uid, gid, er => done(er)); + } + else if (needChmod) { + fs_1.default.chmod(dir, mode, cb); + } + else { + cb(); + } + } + }; + if (cache && cGet(cache, dir) === true) { + return done(); + } + if (dir === cwd) { + return checkCwd(dir, done); + } + if (preserve) { + return (0, mkdirp_1.mkdirp)(dir, { mode }).then(made => done(null, made ?? undefined), // oh, ts + done); + } + const sub = (0, normalize_windows_path_js_1.normalizeWindowsPath)(node_path_1.default.relative(cwd, dir)); + const parts = sub.split('/'); + mkdir_(cwd, parts, mode, cache, unlink, cwd, undefined, done); +}; +exports.mkdir = mkdir; +const mkdir_ = (base, parts, mode, cache, unlink, cwd, created, cb) => { + if (!parts.length) { + return cb(null, created); + } + const p = parts.shift(); + const part = (0, normalize_windows_path_js_1.normalizeWindowsPath)(node_path_1.default.resolve(base + '/' + p)); + if (cGet(cache, part)) { + return mkdir_(part, parts, mode, cache, unlink, cwd, created, cb); + } + fs_1.default.mkdir(part, mode, onmkdir(part, parts, mode, cache, unlink, cwd, created, cb)); +}; +const onmkdir = (part, parts, mode, cache, unlink, cwd, created, cb) => (er) => { + if (er) { + fs_1.default.lstat(part, (statEr, st) => { + if (statEr) { + statEr.path = + statEr.path && (0, normalize_windows_path_js_1.normalizeWindowsPath)(statEr.path); + cb(statEr); + } + else if (st.isDirectory()) { + mkdir_(part, parts, mode, cache, unlink, cwd, created, cb); + } + else if (unlink) { + fs_1.default.unlink(part, er => { + if (er) { + return cb(er); + } + fs_1.default.mkdir(part, mode, onmkdir(part, parts, mode, cache, unlink, cwd, created, cb)); + }); + } + else if (st.isSymbolicLink()) { + return cb(new symlink_error_js_1.SymlinkError(part, part + '/' + parts.join('/'))); + } + else { + cb(er); + } + }); + } + else { + created = created || part; + mkdir_(part, parts, mode, cache, unlink, cwd, created, cb); + } +}; +const checkCwdSync = (dir) => { + let ok = false; + let code = undefined; + try { + ok = fs_1.default.statSync(dir).isDirectory(); + } + catch (er) { + code = er?.code; + } + finally { + if (!ok) { + throw new cwd_error_js_1.CwdError(dir, code ?? 'ENOTDIR'); + } + } +}; +const mkdirSync = (dir, opt) => { + dir = (0, normalize_windows_path_js_1.normalizeWindowsPath)(dir); + // if there's any overlap between mask and mode, + // then we'll need an explicit chmod + /* c8 ignore next */ + const umask = opt.umask ?? 0o22; + const mode = opt.mode | 0o700; + const needChmod = (mode & umask) !== 0; + const uid = opt.uid; + const gid = opt.gid; + const doChown = typeof uid === 'number' && + typeof gid === 'number' && + (uid !== opt.processUid || gid !== opt.processGid); + const preserve = opt.preserve; + const unlink = opt.unlink; + const cache = opt.cache; + const cwd = (0, normalize_windows_path_js_1.normalizeWindowsPath)(opt.cwd); + const done = (created) => { + cSet(cache, dir, true); + if (created && doChown) { + (0, chownr_1.chownrSync)(created, uid, gid); + } + if (needChmod) { + fs_1.default.chmodSync(dir, mode); + } + }; + if (cache && cGet(cache, dir) === true) { + return done(); + } + if (dir === cwd) { + checkCwdSync(cwd); + return done(); + } + if (preserve) { + return done((0, mkdirp_1.mkdirpSync)(dir, mode) ?? undefined); + } + const sub = (0, normalize_windows_path_js_1.normalizeWindowsPath)(node_path_1.default.relative(cwd, dir)); + const parts = sub.split('/'); + let created = undefined; + for (let p = parts.shift(), part = cwd; p && (part += '/' + p); p = parts.shift()) { + part = (0, normalize_windows_path_js_1.normalizeWindowsPath)(node_path_1.default.resolve(part)); + if (cGet(cache, part)) { + continue; + } + try { + fs_1.default.mkdirSync(part, mode); + created = created || part; + cSet(cache, part, true); + } + catch (er) { + const st = fs_1.default.lstatSync(part); + if (st.isDirectory()) { + cSet(cache, part, true); + continue; + } + else if (unlink) { + fs_1.default.unlinkSync(part); + fs_1.default.mkdirSync(part, mode); + created = created || part; + cSet(cache, part, true); + continue; + } + else if (st.isSymbolicLink()) { + return new symlink_error_js_1.SymlinkError(part, part + '/' + parts.join('/')); + } + } + } + return done(created); +}; +exports.mkdirSync = mkdirSync; +//# sourceMappingURL=mkdir.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/mode-fix.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/mode-fix.js new file mode 100644 index 00000000000000..49dd727961d290 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/mode-fix.js @@ -0,0 +1,29 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.modeFix = void 0; +const modeFix = (mode, isDir, portable) => { + mode &= 0o7777; + // in portable mode, use the minimum reasonable umask + // if this system creates files with 0o664 by default + // (as some linux distros do), then we'll write the + // archive with 0o644 instead. Also, don't ever create + // a file that is not readable/writable by the owner. + if (portable) { + mode = (mode | 0o600) & ~0o22; + } + // if dirs are readable, then they should be listable + if (isDir) { + if (mode & 0o400) { + mode |= 0o100; + } + if (mode & 0o40) { + mode |= 0o10; + } + if (mode & 0o4) { + mode |= 0o1; + } + } + return mode; +}; +exports.modeFix = modeFix; +//# sourceMappingURL=mode-fix.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/normalize-unicode.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/normalize-unicode.js new file mode 100644 index 00000000000000..2f08ce46d98c4c --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/normalize-unicode.js @@ -0,0 +1,17 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.normalizeUnicode = void 0; +// warning: extremely hot code path. +// This has been meticulously optimized for use +// within npm install on large package trees. +// Do not edit without careful benchmarking. +const normalizeCache = Object.create(null); +const { hasOwnProperty } = Object.prototype; +const normalizeUnicode = (s) => { + if (!hasOwnProperty.call(normalizeCache, s)) { + normalizeCache[s] = s.normalize('NFD'); + } + return normalizeCache[s]; +}; +exports.normalizeUnicode = normalizeUnicode; +//# sourceMappingURL=normalize-unicode.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/normalize-windows-path.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/normalize-windows-path.js new file mode 100644 index 00000000000000..b0c7aaa9f2d175 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/normalize-windows-path.js @@ -0,0 +1,12 @@ +"use strict"; +// on windows, either \ or / are valid directory separators. +// on unix, \ is a valid character in filenames. +// so, on windows, and only on windows, we replace all \ chars with /, +// so that we can use / as our one and only directory separator char. +Object.defineProperty(exports, "__esModule", { value: true }); +exports.normalizeWindowsPath = void 0; +const platform = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform; +exports.normalizeWindowsPath = platform !== 'win32' ? + (p) => p + : (p) => p && p.replace(/\\/g, '/'); +//# sourceMappingURL=normalize-windows-path.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/options.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/options.js new file mode 100644 index 00000000000000..4cd06505bc72b2 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/options.js @@ -0,0 +1,66 @@ +"use strict"; +// turn tar(1) style args like `C` into the more verbose things like `cwd` +Object.defineProperty(exports, "__esModule", { value: true }); +exports.dealias = exports.isNoFile = exports.isFile = exports.isAsync = exports.isSync = exports.isAsyncNoFile = exports.isSyncNoFile = exports.isAsyncFile = exports.isSyncFile = void 0; +const argmap = new Map([ + ['C', 'cwd'], + ['f', 'file'], + ['z', 'gzip'], + ['P', 'preservePaths'], + ['U', 'unlink'], + ['strip-components', 'strip'], + ['stripComponents', 'strip'], + ['keep-newer', 'newer'], + ['keepNewer', 'newer'], + ['keep-newer-files', 'newer'], + ['keepNewerFiles', 'newer'], + ['k', 'keep'], + ['keep-existing', 'keep'], + ['keepExisting', 'keep'], + ['m', 'noMtime'], + ['no-mtime', 'noMtime'], + ['p', 'preserveOwner'], + ['L', 'follow'], + ['h', 'follow'], + ['onentry', 'onReadEntry'], +]); +const isSyncFile = (o) => !!o.sync && !!o.file; +exports.isSyncFile = isSyncFile; +const isAsyncFile = (o) => !o.sync && !!o.file; +exports.isAsyncFile = isAsyncFile; +const isSyncNoFile = (o) => !!o.sync && !o.file; +exports.isSyncNoFile = isSyncNoFile; +const isAsyncNoFile = (o) => !o.sync && !o.file; +exports.isAsyncNoFile = isAsyncNoFile; +const isSync = (o) => !!o.sync; +exports.isSync = isSync; +const isAsync = (o) => !o.sync; +exports.isAsync = isAsync; +const isFile = (o) => !!o.file; +exports.isFile = isFile; +const isNoFile = (o) => !o.file; +exports.isNoFile = isNoFile; +const dealiasKey = (k) => { + const d = argmap.get(k); + if (d) + return d; + return k; +}; +const dealias = (opt = {}) => { + if (!opt) + return {}; + const result = {}; + for (const [key, v] of Object.entries(opt)) { + // TS doesn't know that aliases are going to always be the same type + const k = dealiasKey(key); + result[k] = v; + } + // affordance for deprecated noChmod -> chmod + if (result.chmod === undefined && result.noChmod === false) { + result.chmod = true; + } + delete result.noChmod; + return result; +}; +exports.dealias = dealias; +//# sourceMappingURL=options.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/pack.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/pack.js new file mode 100644 index 00000000000000..303e93063c2db4 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/pack.js @@ -0,0 +1,477 @@ +"use strict"; +// A readable tar stream creator +// Technically, this is a transform stream that you write paths into, +// and tar format comes out of. +// The `add()` method is like `write()` but returns this, +// and end() return `this` as well, so you can +// do `new Pack(opt).add('files').add('dir').end().pipe(output) +// You could also do something like: +// streamOfPaths().pipe(new Pack()).pipe(new fs.WriteStream('out.tar')) +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.PackSync = exports.Pack = exports.PackJob = void 0; +const fs_1 = __importDefault(require("fs")); +const write_entry_js_1 = require("./write-entry.js"); +class PackJob { + path; + absolute; + entry; + stat; + readdir; + pending = false; + ignore = false; + piped = false; + constructor(path, absolute) { + this.path = path || './'; + this.absolute = absolute; + } +} +exports.PackJob = PackJob; +const minipass_1 = require("minipass"); +const zlib = __importStar(require("minizlib")); +const yallist_1 = require("yallist"); +const read_entry_js_1 = require("./read-entry.js"); +const warn_method_js_1 = require("./warn-method.js"); +const EOF = Buffer.alloc(1024); +const ONSTAT = Symbol('onStat'); +const ENDED = Symbol('ended'); +const QUEUE = Symbol('queue'); +const CURRENT = Symbol('current'); +const PROCESS = Symbol('process'); +const PROCESSING = Symbol('processing'); +const PROCESSJOB = Symbol('processJob'); +const JOBS = Symbol('jobs'); +const JOBDONE = Symbol('jobDone'); +const ADDFSENTRY = Symbol('addFSEntry'); +const ADDTARENTRY = Symbol('addTarEntry'); +const STAT = Symbol('stat'); +const READDIR = Symbol('readdir'); +const ONREADDIR = Symbol('onreaddir'); +const PIPE = Symbol('pipe'); +const ENTRY = Symbol('entry'); +const ENTRYOPT = Symbol('entryOpt'); +const WRITEENTRYCLASS = Symbol('writeEntryClass'); +const WRITE = Symbol('write'); +const ONDRAIN = Symbol('ondrain'); +const path_1 = __importDefault(require("path")); +const normalize_windows_path_js_1 = require("./normalize-windows-path.js"); +class Pack extends minipass_1.Minipass { + opt; + cwd; + maxReadSize; + preservePaths; + strict; + noPax; + prefix; + linkCache; + statCache; + file; + portable; + zip; + readdirCache; + noDirRecurse; + follow; + noMtime; + mtime; + filter; + jobs; + [WRITEENTRYCLASS]; + onWriteEntry; + [QUEUE]; + [JOBS] = 0; + [PROCESSING] = false; + [ENDED] = false; + constructor(opt = {}) { + //@ts-ignore + super(); + this.opt = opt; + this.file = opt.file || ''; + this.cwd = opt.cwd || process.cwd(); + this.maxReadSize = opt.maxReadSize; + this.preservePaths = !!opt.preservePaths; + this.strict = !!opt.strict; + this.noPax = !!opt.noPax; + this.prefix = (0, normalize_windows_path_js_1.normalizeWindowsPath)(opt.prefix || ''); + this.linkCache = opt.linkCache || new Map(); + this.statCache = opt.statCache || new Map(); + this.readdirCache = opt.readdirCache || new Map(); + this.onWriteEntry = opt.onWriteEntry; + this[WRITEENTRYCLASS] = write_entry_js_1.WriteEntry; + if (typeof opt.onwarn === 'function') { + this.on('warn', opt.onwarn); + } + this.portable = !!opt.portable; + if (opt.gzip || opt.brotli) { + if (opt.gzip && opt.brotli) { + throw new TypeError('gzip and brotli are mutually exclusive'); + } + if (opt.gzip) { + if (typeof opt.gzip !== 'object') { + opt.gzip = {}; + } + if (this.portable) { + opt.gzip.portable = true; + } + this.zip = new zlib.Gzip(opt.gzip); + } + if (opt.brotli) { + if (typeof opt.brotli !== 'object') { + opt.brotli = {}; + } + this.zip = new zlib.BrotliCompress(opt.brotli); + } + /* c8 ignore next */ + if (!this.zip) + throw new Error('impossible'); + const zip = this.zip; + zip.on('data', chunk => super.write(chunk)); + zip.on('end', () => super.end()); + zip.on('drain', () => this[ONDRAIN]()); + this.on('resume', () => zip.resume()); + } + else { + this.on('drain', this[ONDRAIN]); + } + this.noDirRecurse = !!opt.noDirRecurse; + this.follow = !!opt.follow; + this.noMtime = !!opt.noMtime; + if (opt.mtime) + this.mtime = opt.mtime; + this.filter = + typeof opt.filter === 'function' ? opt.filter : () => true; + this[QUEUE] = new yallist_1.Yallist(); + this[JOBS] = 0; + this.jobs = Number(opt.jobs) || 4; + this[PROCESSING] = false; + this[ENDED] = false; + } + [WRITE](chunk) { + return super.write(chunk); + } + add(path) { + this.write(path); + return this; + } + end(path, encoding, cb) { + /* c8 ignore start */ + if (typeof path === 'function') { + cb = path; + path = undefined; + } + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + /* c8 ignore stop */ + if (path) { + this.add(path); + } + this[ENDED] = true; + this[PROCESS](); + /* c8 ignore next */ + if (cb) + cb(); + return this; + } + write(path) { + if (this[ENDED]) { + throw new Error('write after end'); + } + if (path instanceof read_entry_js_1.ReadEntry) { + this[ADDTARENTRY](path); + } + else { + this[ADDFSENTRY](path); + } + return this.flowing; + } + [ADDTARENTRY](p) { + const absolute = (0, normalize_windows_path_js_1.normalizeWindowsPath)(path_1.default.resolve(this.cwd, p.path)); + // in this case, we don't have to wait for the stat + if (!this.filter(p.path, p)) { + p.resume(); + } + else { + const job = new PackJob(p.path, absolute); + job.entry = new write_entry_js_1.WriteEntryTar(p, this[ENTRYOPT](job)); + job.entry.on('end', () => this[JOBDONE](job)); + this[JOBS] += 1; + this[QUEUE].push(job); + } + this[PROCESS](); + } + [ADDFSENTRY](p) { + const absolute = (0, normalize_windows_path_js_1.normalizeWindowsPath)(path_1.default.resolve(this.cwd, p)); + this[QUEUE].push(new PackJob(p, absolute)); + this[PROCESS](); + } + [STAT](job) { + job.pending = true; + this[JOBS] += 1; + const stat = this.follow ? 'stat' : 'lstat'; + fs_1.default[stat](job.absolute, (er, stat) => { + job.pending = false; + this[JOBS] -= 1; + if (er) { + this.emit('error', er); + } + else { + this[ONSTAT](job, stat); + } + }); + } + [ONSTAT](job, stat) { + this.statCache.set(job.absolute, stat); + job.stat = stat; + // now we have the stat, we can filter it. + if (!this.filter(job.path, stat)) { + job.ignore = true; + } + this[PROCESS](); + } + [READDIR](job) { + job.pending = true; + this[JOBS] += 1; + fs_1.default.readdir(job.absolute, (er, entries) => { + job.pending = false; + this[JOBS] -= 1; + if (er) { + return this.emit('error', er); + } + this[ONREADDIR](job, entries); + }); + } + [ONREADDIR](job, entries) { + this.readdirCache.set(job.absolute, entries); + job.readdir = entries; + this[PROCESS](); + } + [PROCESS]() { + if (this[PROCESSING]) { + return; + } + this[PROCESSING] = true; + for (let w = this[QUEUE].head; !!w && this[JOBS] < this.jobs; w = w.next) { + this[PROCESSJOB](w.value); + if (w.value.ignore) { + const p = w.next; + this[QUEUE].removeNode(w); + w.next = p; + } + } + this[PROCESSING] = false; + if (this[ENDED] && !this[QUEUE].length && this[JOBS] === 0) { + if (this.zip) { + this.zip.end(EOF); + } + else { + super.write(EOF); + super.end(); + } + } + } + get [CURRENT]() { + return this[QUEUE] && this[QUEUE].head && this[QUEUE].head.value; + } + [JOBDONE](_job) { + this[QUEUE].shift(); + this[JOBS] -= 1; + this[PROCESS](); + } + [PROCESSJOB](job) { + if (job.pending) { + return; + } + if (job.entry) { + if (job === this[CURRENT] && !job.piped) { + this[PIPE](job); + } + return; + } + if (!job.stat) { + const sc = this.statCache.get(job.absolute); + if (sc) { + this[ONSTAT](job, sc); + } + else { + this[STAT](job); + } + } + if (!job.stat) { + return; + } + // filtered out! + if (job.ignore) { + return; + } + if (!this.noDirRecurse && + job.stat.isDirectory() && + !job.readdir) { + const rc = this.readdirCache.get(job.absolute); + if (rc) { + this[ONREADDIR](job, rc); + } + else { + this[READDIR](job); + } + if (!job.readdir) { + return; + } + } + // we know it doesn't have an entry, because that got checked above + job.entry = this[ENTRY](job); + if (!job.entry) { + job.ignore = true; + return; + } + if (job === this[CURRENT] && !job.piped) { + this[PIPE](job); + } + } + [ENTRYOPT](job) { + return { + onwarn: (code, msg, data) => this.warn(code, msg, data), + noPax: this.noPax, + cwd: this.cwd, + absolute: job.absolute, + preservePaths: this.preservePaths, + maxReadSize: this.maxReadSize, + strict: this.strict, + portable: this.portable, + linkCache: this.linkCache, + statCache: this.statCache, + noMtime: this.noMtime, + mtime: this.mtime, + prefix: this.prefix, + onWriteEntry: this.onWriteEntry, + }; + } + [ENTRY](job) { + this[JOBS] += 1; + try { + const e = new this[WRITEENTRYCLASS](job.path, this[ENTRYOPT](job)); + return e + .on('end', () => this[JOBDONE](job)) + .on('error', er => this.emit('error', er)); + } + catch (er) { + this.emit('error', er); + } + } + [ONDRAIN]() { + if (this[CURRENT] && this[CURRENT].entry) { + this[CURRENT].entry.resume(); + } + } + // like .pipe() but using super, because our write() is special + [PIPE](job) { + job.piped = true; + if (job.readdir) { + job.readdir.forEach(entry => { + const p = job.path; + const base = p === './' ? '' : p.replace(/\/*$/, '/'); + this[ADDFSENTRY](base + entry); + }); + } + const source = job.entry; + const zip = this.zip; + /* c8 ignore start */ + if (!source) + throw new Error('cannot pipe without source'); + /* c8 ignore stop */ + if (zip) { + source.on('data', chunk => { + if (!zip.write(chunk)) { + source.pause(); + } + }); + } + else { + source.on('data', chunk => { + if (!super.write(chunk)) { + source.pause(); + } + }); + } + } + pause() { + if (this.zip) { + this.zip.pause(); + } + return super.pause(); + } + warn(code, message, data = {}) { + (0, warn_method_js_1.warnMethod)(this, code, message, data); + } +} +exports.Pack = Pack; +class PackSync extends Pack { + sync = true; + constructor(opt) { + super(opt); + this[WRITEENTRYCLASS] = write_entry_js_1.WriteEntrySync; + } + // pause/resume are no-ops in sync streams. + pause() { } + resume() { } + [STAT](job) { + const stat = this.follow ? 'statSync' : 'lstatSync'; + this[ONSTAT](job, fs_1.default[stat](job.absolute)); + } + [READDIR](job) { + this[ONREADDIR](job, fs_1.default.readdirSync(job.absolute)); + } + // gotta get it all in this tick + [PIPE](job) { + const source = job.entry; + const zip = this.zip; + if (job.readdir) { + job.readdir.forEach(entry => { + const p = job.path; + const base = p === './' ? '' : p.replace(/\/*$/, '/'); + this[ADDFSENTRY](base + entry); + }); + } + /* c8 ignore start */ + if (!source) + throw new Error('Cannot pipe without source'); + /* c8 ignore stop */ + if (zip) { + source.on('data', chunk => { + zip.write(chunk); + }); + } + else { + source.on('data', chunk => { + super[WRITE](chunk); + }); + } + } +} +exports.PackSync = PackSync; +//# sourceMappingURL=pack.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/package.json b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/package.json new file mode 100644 index 00000000000000..5bbefffbabee39 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/package.json @@ -0,0 +1,3 @@ +{ + "type": "commonjs" +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/parse.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/parse.js new file mode 100644 index 00000000000000..1f7e5fd65e869f --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/parse.js @@ -0,0 +1,599 @@ +"use strict"; +// this[BUFFER] is the remainder of a chunk if we're waiting for +// the full 512 bytes of a header to come in. We will Buffer.concat() +// it to the next write(), which is a mem copy, but a small one. +// +// this[QUEUE] is a Yallist of entries that haven't been emitted +// yet this can only get filled up if the user keeps write()ing after +// a write() returns false, or does a write() with more than one entry +// +// We don't buffer chunks, we always parse them and either create an +// entry, or push it into the active entry. The ReadEntry class knows +// to throw data away if .ignore=true +// +// Shift entry off the buffer when it emits 'end', and emit 'entry' for +// the next one in the list. +// +// At any time, we're pushing body chunks into the entry at WRITEENTRY, +// and waiting for 'end' on the entry at READENTRY +// +// ignored entries get .resume() called on them straight away +Object.defineProperty(exports, "__esModule", { value: true }); +exports.Parser = void 0; +const events_1 = require("events"); +const minizlib_1 = require("minizlib"); +const yallist_1 = require("yallist"); +const header_js_1 = require("./header.js"); +const pax_js_1 = require("./pax.js"); +const read_entry_js_1 = require("./read-entry.js"); +const warn_method_js_1 = require("./warn-method.js"); +const maxMetaEntrySize = 1024 * 1024; +const gzipHeader = Buffer.from([0x1f, 0x8b]); +const STATE = Symbol('state'); +const WRITEENTRY = Symbol('writeEntry'); +const READENTRY = Symbol('readEntry'); +const NEXTENTRY = Symbol('nextEntry'); +const PROCESSENTRY = Symbol('processEntry'); +const EX = Symbol('extendedHeader'); +const GEX = Symbol('globalExtendedHeader'); +const META = Symbol('meta'); +const EMITMETA = Symbol('emitMeta'); +const BUFFER = Symbol('buffer'); +const QUEUE = Symbol('queue'); +const ENDED = Symbol('ended'); +const EMITTEDEND = Symbol('emittedEnd'); +const EMIT = Symbol('emit'); +const UNZIP = Symbol('unzip'); +const CONSUMECHUNK = Symbol('consumeChunk'); +const CONSUMECHUNKSUB = Symbol('consumeChunkSub'); +const CONSUMEBODY = Symbol('consumeBody'); +const CONSUMEMETA = Symbol('consumeMeta'); +const CONSUMEHEADER = Symbol('consumeHeader'); +const CONSUMING = Symbol('consuming'); +const BUFFERCONCAT = Symbol('bufferConcat'); +const MAYBEEND = Symbol('maybeEnd'); +const WRITING = Symbol('writing'); +const ABORTED = Symbol('aborted'); +const DONE = Symbol('onDone'); +const SAW_VALID_ENTRY = Symbol('sawValidEntry'); +const SAW_NULL_BLOCK = Symbol('sawNullBlock'); +const SAW_EOF = Symbol('sawEOF'); +const CLOSESTREAM = Symbol('closeStream'); +const noop = () => true; +class Parser extends events_1.EventEmitter { + file; + strict; + maxMetaEntrySize; + filter; + brotli; + writable = true; + readable = false; + [QUEUE] = new yallist_1.Yallist(); + [BUFFER]; + [READENTRY]; + [WRITEENTRY]; + [STATE] = 'begin'; + [META] = ''; + [EX]; + [GEX]; + [ENDED] = false; + [UNZIP]; + [ABORTED] = false; + [SAW_VALID_ENTRY]; + [SAW_NULL_BLOCK] = false; + [SAW_EOF] = false; + [WRITING] = false; + [CONSUMING] = false; + [EMITTEDEND] = false; + constructor(opt = {}) { + super(); + this.file = opt.file || ''; + // these BADARCHIVE errors can't be detected early. listen on DONE. + this.on(DONE, () => { + if (this[STATE] === 'begin' || + this[SAW_VALID_ENTRY] === false) { + // either less than 1 block of data, or all entries were invalid. + // Either way, probably not even a tarball. + this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format'); + } + }); + if (opt.ondone) { + this.on(DONE, opt.ondone); + } + else { + this.on(DONE, () => { + this.emit('prefinish'); + this.emit('finish'); + this.emit('end'); + }); + } + this.strict = !!opt.strict; + this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize; + this.filter = typeof opt.filter === 'function' ? opt.filter : noop; + // Unlike gzip, brotli doesn't have any magic bytes to identify it + // Users need to explicitly tell us they're extracting a brotli file + // Or we infer from the file extension + const isTBR = opt.file && + (opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')); + // if it's a tbr file it MIGHT be brotli, but we don't know until + // we look at it and verify it's not a valid tar file. + this.brotli = + !opt.gzip && opt.brotli !== undefined ? opt.brotli + : isTBR ? undefined + : false; + // have to set this so that streams are ok piping into it + this.on('end', () => this[CLOSESTREAM]()); + if (typeof opt.onwarn === 'function') { + this.on('warn', opt.onwarn); + } + if (typeof opt.onReadEntry === 'function') { + this.on('entry', opt.onReadEntry); + } + } + warn(code, message, data = {}) { + (0, warn_method_js_1.warnMethod)(this, code, message, data); + } + [CONSUMEHEADER](chunk, position) { + if (this[SAW_VALID_ENTRY] === undefined) { + this[SAW_VALID_ENTRY] = false; + } + let header; + try { + header = new header_js_1.Header(chunk, position, this[EX], this[GEX]); + } + catch (er) { + return this.warn('TAR_ENTRY_INVALID', er); + } + if (header.nullBlock) { + if (this[SAW_NULL_BLOCK]) { + this[SAW_EOF] = true; + // ending an archive with no entries. pointless, but legal. + if (this[STATE] === 'begin') { + this[STATE] = 'header'; + } + this[EMIT]('eof'); + } + else { + this[SAW_NULL_BLOCK] = true; + this[EMIT]('nullBlock'); + } + } + else { + this[SAW_NULL_BLOCK] = false; + if (!header.cksumValid) { + this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header }); + } + else if (!header.path) { + this.warn('TAR_ENTRY_INVALID', 'path is required', { header }); + } + else { + const type = header.type; + if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) { + this.warn('TAR_ENTRY_INVALID', 'linkpath required', { + header, + }); + } + else if (!/^(Symbolic)?Link$/.test(type) && + !/^(Global)?ExtendedHeader$/.test(type) && + header.linkpath) { + this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', { + header, + }); + } + else { + const entry = (this[WRITEENTRY] = new read_entry_js_1.ReadEntry(header, this[EX], this[GEX])); + // we do this for meta & ignored entries as well, because they + // are still valid tar, or else we wouldn't know to ignore them + if (!this[SAW_VALID_ENTRY]) { + if (entry.remain) { + // this might be the one! + const onend = () => { + if (!entry.invalid) { + this[SAW_VALID_ENTRY] = true; + } + }; + entry.on('end', onend); + } + else { + this[SAW_VALID_ENTRY] = true; + } + } + if (entry.meta) { + if (entry.size > this.maxMetaEntrySize) { + entry.ignore = true; + this[EMIT]('ignoredEntry', entry); + this[STATE] = 'ignore'; + entry.resume(); + } + else if (entry.size > 0) { + this[META] = ''; + entry.on('data', c => (this[META] += c)); + this[STATE] = 'meta'; + } + } + else { + this[EX] = undefined; + entry.ignore = + entry.ignore || !this.filter(entry.path, entry); + if (entry.ignore) { + // probably valid, just not something we care about + this[EMIT]('ignoredEntry', entry); + this[STATE] = entry.remain ? 'ignore' : 'header'; + entry.resume(); + } + else { + if (entry.remain) { + this[STATE] = 'body'; + } + else { + this[STATE] = 'header'; + entry.end(); + } + if (!this[READENTRY]) { + this[QUEUE].push(entry); + this[NEXTENTRY](); + } + else { + this[QUEUE].push(entry); + } + } + } + } + } + } + } + [CLOSESTREAM]() { + queueMicrotask(() => this.emit('close')); + } + [PROCESSENTRY](entry) { + let go = true; + if (!entry) { + this[READENTRY] = undefined; + go = false; + } + else if (Array.isArray(entry)) { + const [ev, ...args] = entry; + this.emit(ev, ...args); + } + else { + this[READENTRY] = entry; + this.emit('entry', entry); + if (!entry.emittedEnd) { + entry.on('end', () => this[NEXTENTRY]()); + go = false; + } + } + return go; + } + [NEXTENTRY]() { + do { } while (this[PROCESSENTRY](this[QUEUE].shift())); + if (!this[QUEUE].length) { + // At this point, there's nothing in the queue, but we may have an + // entry which is being consumed (readEntry). + // If we don't, then we definitely can handle more data. + // If we do, and either it's flowing, or it has never had any data + // written to it, then it needs more. + // The only other possibility is that it has returned false from a + // write() call, so we wait for the next drain to continue. + const re = this[READENTRY]; + const drainNow = !re || re.flowing || re.size === re.remain; + if (drainNow) { + if (!this[WRITING]) { + this.emit('drain'); + } + } + else { + re.once('drain', () => this.emit('drain')); + } + } + } + [CONSUMEBODY](chunk, position) { + // write up to but no more than writeEntry.blockRemain + const entry = this[WRITEENTRY]; + /* c8 ignore start */ + if (!entry) { + throw new Error('attempt to consume body without entry??'); + } + const br = entry.blockRemain ?? 0; + /* c8 ignore stop */ + const c = br >= chunk.length && position === 0 ? + chunk + : chunk.subarray(position, position + br); + entry.write(c); + if (!entry.blockRemain) { + this[STATE] = 'header'; + this[WRITEENTRY] = undefined; + entry.end(); + } + return c.length; + } + [CONSUMEMETA](chunk, position) { + const entry = this[WRITEENTRY]; + const ret = this[CONSUMEBODY](chunk, position); + // if we finished, then the entry is reset + if (!this[WRITEENTRY] && entry) { + this[EMITMETA](entry); + } + return ret; + } + [EMIT](ev, data, extra) { + if (!this[QUEUE].length && !this[READENTRY]) { + this.emit(ev, data, extra); + } + else { + this[QUEUE].push([ev, data, extra]); + } + } + [EMITMETA](entry) { + this[EMIT]('meta', this[META]); + switch (entry.type) { + case 'ExtendedHeader': + case 'OldExtendedHeader': + this[EX] = pax_js_1.Pax.parse(this[META], this[EX], false); + break; + case 'GlobalExtendedHeader': + this[GEX] = pax_js_1.Pax.parse(this[META], this[GEX], true); + break; + case 'NextFileHasLongPath': + case 'OldGnuLongPath': { + const ex = this[EX] ?? Object.create(null); + this[EX] = ex; + ex.path = this[META].replace(/\0.*/, ''); + break; + } + case 'NextFileHasLongLinkpath': { + const ex = this[EX] || Object.create(null); + this[EX] = ex; + ex.linkpath = this[META].replace(/\0.*/, ''); + break; + } + /* c8 ignore start */ + default: + throw new Error('unknown meta: ' + entry.type); + /* c8 ignore stop */ + } + } + abort(error) { + this[ABORTED] = true; + this.emit('abort', error); + // always throws, even in non-strict mode + this.warn('TAR_ABORT', error, { recoverable: false }); + } + write(chunk, encoding, cb) { + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, + /* c8 ignore next */ + typeof encoding === 'string' ? encoding : 'utf8'); + } + if (this[ABORTED]) { + /* c8 ignore next */ + cb?.(); + return false; + } + // first write, might be gzipped + const needSniff = this[UNZIP] === undefined || + (this.brotli === undefined && this[UNZIP] === false); + if (needSniff && chunk) { + if (this[BUFFER]) { + chunk = Buffer.concat([this[BUFFER], chunk]); + this[BUFFER] = undefined; + } + if (chunk.length < gzipHeader.length) { + this[BUFFER] = chunk; + /* c8 ignore next */ + cb?.(); + return true; + } + // look for gzip header + for (let i = 0; this[UNZIP] === undefined && i < gzipHeader.length; i++) { + if (chunk[i] !== gzipHeader[i]) { + this[UNZIP] = false; + } + } + const maybeBrotli = this.brotli === undefined; + if (this[UNZIP] === false && maybeBrotli) { + // read the first header to see if it's a valid tar file. If so, + // we can safely assume that it's not actually brotli, despite the + // .tbr or .tar.br file extension. + // if we ended before getting a full chunk, yes, def brotli + if (chunk.length < 512) { + if (this[ENDED]) { + this.brotli = true; + } + else { + this[BUFFER] = chunk; + /* c8 ignore next */ + cb?.(); + return true; + } + } + else { + // if it's tar, it's pretty reliably not brotli, chances of + // that happening are astronomical. + try { + new header_js_1.Header(chunk.subarray(0, 512)); + this.brotli = false; + } + catch (_) { + this.brotli = true; + } + } + } + if (this[UNZIP] === undefined || + (this[UNZIP] === false && this.brotli)) { + const ended = this[ENDED]; + this[ENDED] = false; + this[UNZIP] = + this[UNZIP] === undefined ? + new minizlib_1.Unzip({}) + : new minizlib_1.BrotliDecompress({}); + this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk)); + this[UNZIP].on('error', er => this.abort(er)); + this[UNZIP].on('end', () => { + this[ENDED] = true; + this[CONSUMECHUNK](); + }); + this[WRITING] = true; + const ret = !!this[UNZIP][ended ? 'end' : 'write'](chunk); + this[WRITING] = false; + cb?.(); + return ret; + } + } + this[WRITING] = true; + if (this[UNZIP]) { + this[UNZIP].write(chunk); + } + else { + this[CONSUMECHUNK](chunk); + } + this[WRITING] = false; + // return false if there's a queue, or if the current entry isn't flowing + const ret = this[QUEUE].length ? false + : this[READENTRY] ? this[READENTRY].flowing + : true; + // if we have no queue, then that means a clogged READENTRY + if (!ret && !this[QUEUE].length) { + this[READENTRY]?.once('drain', () => this.emit('drain')); + } + /* c8 ignore next */ + cb?.(); + return ret; + } + [BUFFERCONCAT](c) { + if (c && !this[ABORTED]) { + this[BUFFER] = + this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c; + } + } + [MAYBEEND]() { + if (this[ENDED] && + !this[EMITTEDEND] && + !this[ABORTED] && + !this[CONSUMING]) { + this[EMITTEDEND] = true; + const entry = this[WRITEENTRY]; + if (entry && entry.blockRemain) { + // truncated, likely a damaged file + const have = this[BUFFER] ? this[BUFFER].length : 0; + this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${entry.blockRemain} more bytes, only ${have} available)`, { entry }); + if (this[BUFFER]) { + entry.write(this[BUFFER]); + } + entry.end(); + } + this[EMIT](DONE); + } + } + [CONSUMECHUNK](chunk) { + if (this[CONSUMING] && chunk) { + this[BUFFERCONCAT](chunk); + } + else if (!chunk && !this[BUFFER]) { + this[MAYBEEND](); + } + else if (chunk) { + this[CONSUMING] = true; + if (this[BUFFER]) { + this[BUFFERCONCAT](chunk); + const c = this[BUFFER]; + this[BUFFER] = undefined; + this[CONSUMECHUNKSUB](c); + } + else { + this[CONSUMECHUNKSUB](chunk); + } + while (this[BUFFER] && + this[BUFFER]?.length >= 512 && + !this[ABORTED] && + !this[SAW_EOF]) { + const c = this[BUFFER]; + this[BUFFER] = undefined; + this[CONSUMECHUNKSUB](c); + } + this[CONSUMING] = false; + } + if (!this[BUFFER] || this[ENDED]) { + this[MAYBEEND](); + } + } + [CONSUMECHUNKSUB](chunk) { + // we know that we are in CONSUMING mode, so anything written goes into + // the buffer. Advance the position and put any remainder in the buffer. + let position = 0; + const length = chunk.length; + while (position + 512 <= length && + !this[ABORTED] && + !this[SAW_EOF]) { + switch (this[STATE]) { + case 'begin': + case 'header': + this[CONSUMEHEADER](chunk, position); + position += 512; + break; + case 'ignore': + case 'body': + position += this[CONSUMEBODY](chunk, position); + break; + case 'meta': + position += this[CONSUMEMETA](chunk, position); + break; + /* c8 ignore start */ + default: + throw new Error('invalid state: ' + this[STATE]); + /* c8 ignore stop */ + } + } + if (position < length) { + if (this[BUFFER]) { + this[BUFFER] = Buffer.concat([ + chunk.subarray(position), + this[BUFFER], + ]); + } + else { + this[BUFFER] = chunk.subarray(position); + } + } + } + end(chunk, encoding, cb) { + if (typeof chunk === 'function') { + cb = chunk; + encoding = undefined; + chunk = undefined; + } + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, encoding); + } + if (cb) + this.once('finish', cb); + if (!this[ABORTED]) { + if (this[UNZIP]) { + /* c8 ignore start */ + if (chunk) + this[UNZIP].write(chunk); + /* c8 ignore stop */ + this[UNZIP].end(); + } + else { + this[ENDED] = true; + if (this.brotli === undefined) + chunk = chunk || Buffer.alloc(0); + if (chunk) + this.write(chunk); + this[MAYBEEND](); + } + } + return this; + } +} +exports.Parser = Parser; +//# sourceMappingURL=parse.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/path-reservations.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/path-reservations.js new file mode 100644 index 00000000000000..9ff391c44092c7 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/path-reservations.js @@ -0,0 +1,170 @@ +"use strict"; +// A path exclusive reservation system +// reserve([list, of, paths], fn) +// When the fn is first in line for all its paths, it +// is called with a cb that clears the reservation. +// +// Used by async unpack to avoid clobbering paths in use, +// while still allowing maximal safe parallelization. +Object.defineProperty(exports, "__esModule", { value: true }); +exports.PathReservations = void 0; +const node_path_1 = require("node:path"); +const normalize_unicode_js_1 = require("./normalize-unicode.js"); +const strip_trailing_slashes_js_1 = require("./strip-trailing-slashes.js"); +const platform = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform; +const isWindows = platform === 'win32'; +// return a set of parent dirs for a given path +// '/a/b/c/d' -> ['/', '/a', '/a/b', '/a/b/c', '/a/b/c/d'] +const getDirs = (path) => { + const dirs = path + .split('/') + .slice(0, -1) + .reduce((set, path) => { + const s = set[set.length - 1]; + if (s !== undefined) { + path = (0, node_path_1.join)(s, path); + } + set.push(path || '/'); + return set; + }, []); + return dirs; +}; +class PathReservations { + // path => [function or Set] + // A Set object means a directory reservation + // A fn is a direct reservation on that path + #queues = new Map(); + // fn => {paths:[path,...], dirs:[path, ...]} + #reservations = new Map(); + // functions currently running + #running = new Set(); + reserve(paths, fn) { + paths = + isWindows ? + ['win32 parallelization disabled'] + : paths.map(p => { + // don't need normPath, because we skip this entirely for windows + return (0, strip_trailing_slashes_js_1.stripTrailingSlashes)((0, node_path_1.join)((0, normalize_unicode_js_1.normalizeUnicode)(p))).toLowerCase(); + }); + const dirs = new Set(paths.map(path => getDirs(path)).reduce((a, b) => a.concat(b))); + this.#reservations.set(fn, { dirs, paths }); + for (const p of paths) { + const q = this.#queues.get(p); + if (!q) { + this.#queues.set(p, [fn]); + } + else { + q.push(fn); + } + } + for (const dir of dirs) { + const q = this.#queues.get(dir); + if (!q) { + this.#queues.set(dir, [new Set([fn])]); + } + else { + const l = q[q.length - 1]; + if (l instanceof Set) { + l.add(fn); + } + else { + q.push(new Set([fn])); + } + } + } + return this.#run(fn); + } + // return the queues for each path the function cares about + // fn => {paths, dirs} + #getQueues(fn) { + const res = this.#reservations.get(fn); + /* c8 ignore start */ + if (!res) { + throw new Error('function does not have any path reservations'); + } + /* c8 ignore stop */ + return { + paths: res.paths.map((path) => this.#queues.get(path)), + dirs: [...res.dirs].map(path => this.#queues.get(path)), + }; + } + // check if fn is first in line for all its paths, and is + // included in the first set for all its dir queues + check(fn) { + const { paths, dirs } = this.#getQueues(fn); + return (paths.every(q => q && q[0] === fn) && + dirs.every(q => q && q[0] instanceof Set && q[0].has(fn))); + } + // run the function if it's first in line and not already running + #run(fn) { + if (this.#running.has(fn) || !this.check(fn)) { + return false; + } + this.#running.add(fn); + fn(() => this.#clear(fn)); + return true; + } + #clear(fn) { + if (!this.#running.has(fn)) { + return false; + } + const res = this.#reservations.get(fn); + /* c8 ignore start */ + if (!res) { + throw new Error('invalid reservation'); + } + /* c8 ignore stop */ + const { paths, dirs } = res; + const next = new Set(); + for (const path of paths) { + const q = this.#queues.get(path); + /* c8 ignore start */ + if (!q || q?.[0] !== fn) { + continue; + } + /* c8 ignore stop */ + const q0 = q[1]; + if (!q0) { + this.#queues.delete(path); + continue; + } + q.shift(); + if (typeof q0 === 'function') { + next.add(q0); + } + else { + for (const f of q0) { + next.add(f); + } + } + } + for (const dir of dirs) { + const q = this.#queues.get(dir); + const q0 = q?.[0]; + /* c8 ignore next - type safety only */ + if (!q || !(q0 instanceof Set)) + continue; + if (q0.size === 1 && q.length === 1) { + this.#queues.delete(dir); + continue; + } + else if (q0.size === 1) { + q.shift(); + // next one must be a function, + // or else the Set would've been reused + const n = q[0]; + if (typeof n === 'function') { + next.add(n); + } + } + else { + q0.delete(fn); + } + } + this.#running.delete(fn); + next.forEach(fn => this.#run(fn)); + return true; + } +} +exports.PathReservations = PathReservations; +//# sourceMappingURL=path-reservations.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/pax.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/pax.js new file mode 100644 index 00000000000000..d30c0f3efbe9ea --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/pax.js @@ -0,0 +1,158 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.Pax = void 0; +const node_path_1 = require("node:path"); +const header_js_1 = require("./header.js"); +class Pax { + atime; + mtime; + ctime; + charset; + comment; + gid; + uid; + gname; + uname; + linkpath; + dev; + ino; + nlink; + path; + size; + mode; + global; + constructor(obj, global = false) { + this.atime = obj.atime; + this.charset = obj.charset; + this.comment = obj.comment; + this.ctime = obj.ctime; + this.dev = obj.dev; + this.gid = obj.gid; + this.global = global; + this.gname = obj.gname; + this.ino = obj.ino; + this.linkpath = obj.linkpath; + this.mtime = obj.mtime; + this.nlink = obj.nlink; + this.path = obj.path; + this.size = obj.size; + this.uid = obj.uid; + this.uname = obj.uname; + } + encode() { + const body = this.encodeBody(); + if (body === '') { + return Buffer.allocUnsafe(0); + } + const bodyLen = Buffer.byteLength(body); + // round up to 512 bytes + // add 512 for header + const bufLen = 512 * Math.ceil(1 + bodyLen / 512); + const buf = Buffer.allocUnsafe(bufLen); + // 0-fill the header section, it might not hit every field + for (let i = 0; i < 512; i++) { + buf[i] = 0; + } + new header_js_1.Header({ + // XXX split the path + // then the path should be PaxHeader + basename, but less than 99, + // prepend with the dirname + /* c8 ignore start */ + path: ('PaxHeader/' + (0, node_path_1.basename)(this.path ?? '')).slice(0, 99), + /* c8 ignore stop */ + mode: this.mode || 0o644, + uid: this.uid, + gid: this.gid, + size: bodyLen, + mtime: this.mtime, + type: this.global ? 'GlobalExtendedHeader' : 'ExtendedHeader', + linkpath: '', + uname: this.uname || '', + gname: this.gname || '', + devmaj: 0, + devmin: 0, + atime: this.atime, + ctime: this.ctime, + }).encode(buf); + buf.write(body, 512, bodyLen, 'utf8'); + // null pad after the body + for (let i = bodyLen + 512; i < buf.length; i++) { + buf[i] = 0; + } + return buf; + } + encodeBody() { + return (this.encodeField('path') + + this.encodeField('ctime') + + this.encodeField('atime') + + this.encodeField('dev') + + this.encodeField('ino') + + this.encodeField('nlink') + + this.encodeField('charset') + + this.encodeField('comment') + + this.encodeField('gid') + + this.encodeField('gname') + + this.encodeField('linkpath') + + this.encodeField('mtime') + + this.encodeField('size') + + this.encodeField('uid') + + this.encodeField('uname')); + } + encodeField(field) { + if (this[field] === undefined) { + return ''; + } + const r = this[field]; + const v = r instanceof Date ? r.getTime() / 1000 : r; + const s = ' ' + + (field === 'dev' || field === 'ino' || field === 'nlink' ? + 'SCHILY.' + : '') + + field + + '=' + + v + + '\n'; + const byteLen = Buffer.byteLength(s); + // the digits includes the length of the digits in ascii base-10 + // so if it's 9 characters, then adding 1 for the 9 makes it 10 + // which makes it 11 chars. + let digits = Math.floor(Math.log(byteLen) / Math.log(10)) + 1; + if (byteLen + digits >= Math.pow(10, digits)) { + digits += 1; + } + const len = digits + byteLen; + return len + s; + } + static parse(str, ex, g = false) { + return new Pax(merge(parseKV(str), ex), g); + } +} +exports.Pax = Pax; +const merge = (a, b) => b ? Object.assign({}, b, a) : a; +const parseKV = (str) => str + .replace(/\n$/, '') + .split('\n') + .reduce(parseKVLine, Object.create(null)); +const parseKVLine = (set, line) => { + const n = parseInt(line, 10); + // XXX Values with \n in them will fail this. + // Refactor to not be a naive line-by-line parse. + if (n !== Buffer.byteLength(line) + 1) { + return set; + } + line = line.slice((n + ' ').length); + const kv = line.split('='); + const r = kv.shift(); + if (!r) { + return set; + } + const k = r.replace(/^SCHILY\.(dev|ino|nlink)/, '$1'); + const v = kv.join('='); + set[k] = + /^([A-Z]+\.)?([mac]|birth|creation)time$/.test(k) ? + new Date(Number(v) * 1000) + : /^[0-9]+$/.test(v) ? +v + : v; + return set; +}; +//# sourceMappingURL=pax.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/read-entry.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/read-entry.js new file mode 100644 index 00000000000000..15e2d55c938a43 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/read-entry.js @@ -0,0 +1,140 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.ReadEntry = void 0; +const minipass_1 = require("minipass"); +const normalize_windows_path_js_1 = require("./normalize-windows-path.js"); +class ReadEntry extends minipass_1.Minipass { + extended; + globalExtended; + header; + startBlockSize; + blockRemain; + remain; + type; + meta = false; + ignore = false; + path; + mode; + uid; + gid; + uname; + gname; + size = 0; + mtime; + atime; + ctime; + linkpath; + dev; + ino; + nlink; + invalid = false; + absolute; + unsupported = false; + constructor(header, ex, gex) { + super({}); + // read entries always start life paused. this is to avoid the + // situation where Minipass's auto-ending empty streams results + // in an entry ending before we're ready for it. + this.pause(); + this.extended = ex; + this.globalExtended = gex; + this.header = header; + /* c8 ignore start */ + this.remain = header.size ?? 0; + /* c8 ignore stop */ + this.startBlockSize = 512 * Math.ceil(this.remain / 512); + this.blockRemain = this.startBlockSize; + this.type = header.type; + switch (this.type) { + case 'File': + case 'OldFile': + case 'Link': + case 'SymbolicLink': + case 'CharacterDevice': + case 'BlockDevice': + case 'Directory': + case 'FIFO': + case 'ContiguousFile': + case 'GNUDumpDir': + break; + case 'NextFileHasLongLinkpath': + case 'NextFileHasLongPath': + case 'OldGnuLongPath': + case 'GlobalExtendedHeader': + case 'ExtendedHeader': + case 'OldExtendedHeader': + this.meta = true; + break; + // NOTE: gnutar and bsdtar treat unrecognized types as 'File' + // it may be worth doing the same, but with a warning. + default: + this.ignore = true; + } + /* c8 ignore start */ + if (!header.path) { + throw new Error('no path provided for tar.ReadEntry'); + } + /* c8 ignore stop */ + this.path = (0, normalize_windows_path_js_1.normalizeWindowsPath)(header.path); + this.mode = header.mode; + if (this.mode) { + this.mode = this.mode & 0o7777; + } + this.uid = header.uid; + this.gid = header.gid; + this.uname = header.uname; + this.gname = header.gname; + this.size = this.remain; + this.mtime = header.mtime; + this.atime = header.atime; + this.ctime = header.ctime; + /* c8 ignore start */ + this.linkpath = + header.linkpath ? + (0, normalize_windows_path_js_1.normalizeWindowsPath)(header.linkpath) + : undefined; + /* c8 ignore stop */ + this.uname = header.uname; + this.gname = header.gname; + if (ex) { + this.#slurp(ex); + } + if (gex) { + this.#slurp(gex, true); + } + } + write(data) { + const writeLen = data.length; + if (writeLen > this.blockRemain) { + throw new Error('writing more to entry than is appropriate'); + } + const r = this.remain; + const br = this.blockRemain; + this.remain = Math.max(0, r - writeLen); + this.blockRemain = Math.max(0, br - writeLen); + if (this.ignore) { + return true; + } + if (r >= writeLen) { + return super.write(data); + } + // r < writeLen + return super.write(data.subarray(0, r)); + } + #slurp(ex, gex = false) { + if (ex.path) + ex.path = (0, normalize_windows_path_js_1.normalizeWindowsPath)(ex.path); + if (ex.linkpath) + ex.linkpath = (0, normalize_windows_path_js_1.normalizeWindowsPath)(ex.linkpath); + Object.assign(this, Object.fromEntries(Object.entries(ex).filter(([k, v]) => { + // we slurp in everything except for the path attribute in + // a global extended header, because that's weird. Also, any + // null/undefined values are ignored. + return !(v === null || + v === undefined || + (k === 'path' && gex)); + }))); + } +} +exports.ReadEntry = ReadEntry; +//# sourceMappingURL=read-entry.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/replace.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/replace.js new file mode 100644 index 00000000000000..22eff246d4d75f --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/replace.js @@ -0,0 +1,231 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.replace = void 0; +// tar -r +const fs_minipass_1 = require("@isaacs/fs-minipass"); +const node_fs_1 = __importDefault(require("node:fs")); +const node_path_1 = __importDefault(require("node:path")); +const header_js_1 = require("./header.js"); +const list_js_1 = require("./list.js"); +const make_command_js_1 = require("./make-command.js"); +const options_js_1 = require("./options.js"); +const pack_js_1 = require("./pack.js"); +// starting at the head of the file, read a Header +// If the checksum is invalid, that's our position to start writing +// If it is, jump forward by the specified size (round up to 512) +// and try again. +// Write the new Pack stream starting there. +const replaceSync = (opt, files) => { + const p = new pack_js_1.PackSync(opt); + let threw = true; + let fd; + let position; + try { + try { + fd = node_fs_1.default.openSync(opt.file, 'r+'); + } + catch (er) { + if (er?.code === 'ENOENT') { + fd = node_fs_1.default.openSync(opt.file, 'w+'); + } + else { + throw er; + } + } + const st = node_fs_1.default.fstatSync(fd); + const headBuf = Buffer.alloc(512); + POSITION: for (position = 0; position < st.size; position += 512) { + for (let bufPos = 0, bytes = 0; bufPos < 512; bufPos += bytes) { + bytes = node_fs_1.default.readSync(fd, headBuf, bufPos, headBuf.length - bufPos, position + bufPos); + if (position === 0 && + headBuf[0] === 0x1f && + headBuf[1] === 0x8b) { + throw new Error('cannot append to compressed archives'); + } + if (!bytes) { + break POSITION; + } + } + const h = new header_js_1.Header(headBuf); + if (!h.cksumValid) { + break; + } + const entryBlockSize = 512 * Math.ceil((h.size || 0) / 512); + if (position + entryBlockSize + 512 > st.size) { + break; + } + // the 512 for the header we just parsed will be added as well + // also jump ahead all the blocks for the body + position += entryBlockSize; + if (opt.mtimeCache && h.mtime) { + opt.mtimeCache.set(String(h.path), h.mtime); + } + } + threw = false; + streamSync(opt, p, position, fd, files); + } + finally { + if (threw) { + try { + node_fs_1.default.closeSync(fd); + } + catch (er) { } + } + } +}; +const streamSync = (opt, p, position, fd, files) => { + const stream = new fs_minipass_1.WriteStreamSync(opt.file, { + fd: fd, + start: position, + }); + p.pipe(stream); + addFilesSync(p, files); +}; +const replaceAsync = (opt, files) => { + files = Array.from(files); + const p = new pack_js_1.Pack(opt); + const getPos = (fd, size, cb_) => { + const cb = (er, pos) => { + if (er) { + node_fs_1.default.close(fd, _ => cb_(er)); + } + else { + cb_(null, pos); + } + }; + let position = 0; + if (size === 0) { + return cb(null, 0); + } + let bufPos = 0; + const headBuf = Buffer.alloc(512); + const onread = (er, bytes) => { + if (er || typeof bytes === 'undefined') { + return cb(er); + } + bufPos += bytes; + if (bufPos < 512 && bytes) { + return node_fs_1.default.read(fd, headBuf, bufPos, headBuf.length - bufPos, position + bufPos, onread); + } + if (position === 0 && + headBuf[0] === 0x1f && + headBuf[1] === 0x8b) { + return cb(new Error('cannot append to compressed archives')); + } + // truncated header + if (bufPos < 512) { + return cb(null, position); + } + const h = new header_js_1.Header(headBuf); + if (!h.cksumValid) { + return cb(null, position); + } + /* c8 ignore next */ + const entryBlockSize = 512 * Math.ceil((h.size ?? 0) / 512); + if (position + entryBlockSize + 512 > size) { + return cb(null, position); + } + position += entryBlockSize + 512; + if (position >= size) { + return cb(null, position); + } + if (opt.mtimeCache && h.mtime) { + opt.mtimeCache.set(String(h.path), h.mtime); + } + bufPos = 0; + node_fs_1.default.read(fd, headBuf, 0, 512, position, onread); + }; + node_fs_1.default.read(fd, headBuf, 0, 512, position, onread); + }; + const promise = new Promise((resolve, reject) => { + p.on('error', reject); + let flag = 'r+'; + const onopen = (er, fd) => { + if (er && er.code === 'ENOENT' && flag === 'r+') { + flag = 'w+'; + return node_fs_1.default.open(opt.file, flag, onopen); + } + if (er || !fd) { + return reject(er); + } + node_fs_1.default.fstat(fd, (er, st) => { + if (er) { + return node_fs_1.default.close(fd, () => reject(er)); + } + getPos(fd, st.size, (er, position) => { + if (er) { + return reject(er); + } + const stream = new fs_minipass_1.WriteStream(opt.file, { + fd: fd, + start: position, + }); + p.pipe(stream); + stream.on('error', reject); + stream.on('close', resolve); + addFilesAsync(p, files); + }); + }); + }; + node_fs_1.default.open(opt.file, flag, onopen); + }); + return promise; +}; +const addFilesSync = (p, files) => { + files.forEach(file => { + if (file.charAt(0) === '@') { + (0, list_js_1.list)({ + file: node_path_1.default.resolve(p.cwd, file.slice(1)), + sync: true, + noResume: true, + onReadEntry: entry => p.add(entry), + }); + } + else { + p.add(file); + } + }); + p.end(); +}; +const addFilesAsync = async (p, files) => { + for (let i = 0; i < files.length; i++) { + const file = String(files[i]); + if (file.charAt(0) === '@') { + await (0, list_js_1.list)({ + file: node_path_1.default.resolve(String(p.cwd), file.slice(1)), + noResume: true, + onReadEntry: entry => p.add(entry), + }); + } + else { + p.add(file); + } + } + p.end(); +}; +exports.replace = (0, make_command_js_1.makeCommand)(replaceSync, replaceAsync, +/* c8 ignore start */ +() => { + throw new TypeError('file is required'); +}, () => { + throw new TypeError('file is required'); +}, +/* c8 ignore stop */ +(opt, entries) => { + if (!(0, options_js_1.isFile)(opt)) { + throw new TypeError('file is required'); + } + if (opt.gzip || + opt.brotli || + opt.file.endsWith('.br') || + opt.file.endsWith('.tbr')) { + throw new TypeError('cannot append to compressed archives'); + } + if (!entries?.length) { + throw new TypeError('no paths specified to add/replace'); + } +}); +//# sourceMappingURL=replace.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/strip-absolute-path.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/strip-absolute-path.js new file mode 100644 index 00000000000000..bb7639c35a1104 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/strip-absolute-path.js @@ -0,0 +1,29 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.stripAbsolutePath = void 0; +// unix absolute paths are also absolute on win32, so we use this for both +const node_path_1 = require("node:path"); +const { isAbsolute, parse } = node_path_1.win32; +// returns [root, stripped] +// Note that windows will think that //x/y/z/a has a "root" of //x/y, and in +// those cases, we want to sanitize it to x/y/z/a, not z/a, so we strip / +// explicitly if it's the first character. +// drive-specific relative paths on Windows get their root stripped off even +// though they are not absolute, so `c:../foo` becomes ['c:', '../foo'] +const stripAbsolutePath = (path) => { + let r = ''; + let parsed = parse(path); + while (isAbsolute(path) || parsed.root) { + // windows will think that //x/y/z has a "root" of //x/y/ + // but strip the //?/C:/ off of //?/C:/path + const root = path.charAt(0) === '/' && path.slice(0, 4) !== '//?/' ? + '/' + : parsed.root; + path = path.slice(root.length); + r += root; + parsed = parse(path); + } + return [r, path]; +}; +exports.stripAbsolutePath = stripAbsolutePath; +//# sourceMappingURL=strip-absolute-path.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/strip-trailing-slashes.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/strip-trailing-slashes.js new file mode 100644 index 00000000000000..6fa74ad6a4ac93 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/strip-trailing-slashes.js @@ -0,0 +1,18 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.stripTrailingSlashes = void 0; +// warning: extremely hot code path. +// This has been meticulously optimized for use +// within npm install on large package trees. +// Do not edit without careful benchmarking. +const stripTrailingSlashes = (str) => { + let i = str.length - 1; + let slashesStart = -1; + while (i > -1 && str.charAt(i) === '/') { + slashesStart = i; + i--; + } + return slashesStart === -1 ? str : str.slice(0, slashesStart); +}; +exports.stripTrailingSlashes = stripTrailingSlashes; +//# sourceMappingURL=strip-trailing-slashes.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/symlink-error.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/symlink-error.js new file mode 100644 index 00000000000000..cc19ac1a2e3c6b --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/symlink-error.js @@ -0,0 +1,19 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.SymlinkError = void 0; +class SymlinkError extends Error { + path; + symlink; + syscall = 'symlink'; + code = 'TAR_SYMLINK_ERROR'; + constructor(symlink, path) { + super('TAR_SYMLINK_ERROR: Cannot extract through symbolic link'); + this.symlink = symlink; + this.path = path; + } + get name() { + return 'SymlinkError'; + } +} +exports.SymlinkError = SymlinkError; +//# sourceMappingURL=symlink-error.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/types.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/types.js new file mode 100644 index 00000000000000..cb9b684e843b72 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/types.js @@ -0,0 +1,50 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.code = exports.name = exports.isName = exports.isCode = void 0; +const isCode = (c) => exports.name.has(c); +exports.isCode = isCode; +const isName = (c) => exports.code.has(c); +exports.isName = isName; +// map types from key to human-friendly name +exports.name = new Map([ + ['0', 'File'], + // same as File + ['', 'OldFile'], + ['1', 'Link'], + ['2', 'SymbolicLink'], + // Devices and FIFOs aren't fully supported + // they are parsed, but skipped when unpacking + ['3', 'CharacterDevice'], + ['4', 'BlockDevice'], + ['5', 'Directory'], + ['6', 'FIFO'], + // same as File + ['7', 'ContiguousFile'], + // pax headers + ['g', 'GlobalExtendedHeader'], + ['x', 'ExtendedHeader'], + // vendor-specific stuff + // skip + ['A', 'SolarisACL'], + // like 5, but with data, which should be skipped + ['D', 'GNUDumpDir'], + // metadata only, skip + ['I', 'Inode'], + // data = link path of next file + ['K', 'NextFileHasLongLinkpath'], + // data = path of next file + ['L', 'NextFileHasLongPath'], + // skip + ['M', 'ContinuationFile'], + // like L + ['N', 'OldGnuLongPath'], + // skip + ['S', 'SparseFile'], + // skip + ['V', 'TapeVolumeHeader'], + // like x + ['X', 'OldExtendedHeader'], +]); +// map the other direction +exports.code = new Map(Array.from(exports.name).map(kv => [kv[1], kv[0]])); +//# sourceMappingURL=types.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/unpack.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/unpack.js new file mode 100644 index 00000000000000..edf8acbb18c408 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/unpack.js @@ -0,0 +1,919 @@ +"use strict"; +// the PEND/UNPEND stuff tracks whether we're ready to emit end/close yet. +// but the path reservations are required to avoid race conditions where +// parallelized unpack ops may mess with one another, due to dependencies +// (like a Link depending on its target) or destructive operations (like +// clobbering an fs object to create one of a different type.) +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.UnpackSync = exports.Unpack = void 0; +const fsm = __importStar(require("@isaacs/fs-minipass")); +const node_assert_1 = __importDefault(require("node:assert")); +const node_crypto_1 = require("node:crypto"); +const node_fs_1 = __importDefault(require("node:fs")); +const node_path_1 = __importDefault(require("node:path")); +const get_write_flag_js_1 = require("./get-write-flag.js"); +const mkdir_js_1 = require("./mkdir.js"); +const normalize_unicode_js_1 = require("./normalize-unicode.js"); +const normalize_windows_path_js_1 = require("./normalize-windows-path.js"); +const parse_js_1 = require("./parse.js"); +const strip_absolute_path_js_1 = require("./strip-absolute-path.js"); +const strip_trailing_slashes_js_1 = require("./strip-trailing-slashes.js"); +const wc = __importStar(require("./winchars.js")); +const path_reservations_js_1 = require("./path-reservations.js"); +const ONENTRY = Symbol('onEntry'); +const CHECKFS = Symbol('checkFs'); +const CHECKFS2 = Symbol('checkFs2'); +const PRUNECACHE = Symbol('pruneCache'); +const ISREUSABLE = Symbol('isReusable'); +const MAKEFS = Symbol('makeFs'); +const FILE = Symbol('file'); +const DIRECTORY = Symbol('directory'); +const LINK = Symbol('link'); +const SYMLINK = Symbol('symlink'); +const HARDLINK = Symbol('hardlink'); +const UNSUPPORTED = Symbol('unsupported'); +const CHECKPATH = Symbol('checkPath'); +const MKDIR = Symbol('mkdir'); +const ONERROR = Symbol('onError'); +const PENDING = Symbol('pending'); +const PEND = Symbol('pend'); +const UNPEND = Symbol('unpend'); +const ENDED = Symbol('ended'); +const MAYBECLOSE = Symbol('maybeClose'); +const SKIP = Symbol('skip'); +const DOCHOWN = Symbol('doChown'); +const UID = Symbol('uid'); +const GID = Symbol('gid'); +const CHECKED_CWD = Symbol('checkedCwd'); +const platform = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform; +const isWindows = platform === 'win32'; +const DEFAULT_MAX_DEPTH = 1024; +// Unlinks on Windows are not atomic. +// +// This means that if you have a file entry, followed by another +// file entry with an identical name, and you cannot re-use the file +// (because it's a hardlink, or because unlink:true is set, or it's +// Windows, which does not have useful nlink values), then the unlink +// will be committed to the disk AFTER the new file has been written +// over the old one, deleting the new file. +// +// To work around this, on Windows systems, we rename the file and then +// delete the renamed file. It's a sloppy kludge, but frankly, I do not +// know of a better way to do this, given windows' non-atomic unlink +// semantics. +// +// See: https://github.com/npm/node-tar/issues/183 +/* c8 ignore start */ +const unlinkFile = (path, cb) => { + if (!isWindows) { + return node_fs_1.default.unlink(path, cb); + } + const name = path + '.DELETE.' + (0, node_crypto_1.randomBytes)(16).toString('hex'); + node_fs_1.default.rename(path, name, er => { + if (er) { + return cb(er); + } + node_fs_1.default.unlink(name, cb); + }); +}; +/* c8 ignore stop */ +/* c8 ignore start */ +const unlinkFileSync = (path) => { + if (!isWindows) { + return node_fs_1.default.unlinkSync(path); + } + const name = path + '.DELETE.' + (0, node_crypto_1.randomBytes)(16).toString('hex'); + node_fs_1.default.renameSync(path, name); + node_fs_1.default.unlinkSync(name); +}; +/* c8 ignore stop */ +// this.gid, entry.gid, this.processUid +const uint32 = (a, b, c) => a !== undefined && a === a >>> 0 ? a + : b !== undefined && b === b >>> 0 ? b + : c; +// clear the cache if it's a case-insensitive unicode-squashing match. +// we can't know if the current file system is case-sensitive or supports +// unicode fully, so we check for similarity on the maximally compatible +// representation. Err on the side of pruning, since all it's doing is +// preventing lstats, and it's not the end of the world if we get a false +// positive. +// Note that on windows, we always drop the entire cache whenever a +// symbolic link is encountered, because 8.3 filenames are impossible +// to reason about, and collisions are hazards rather than just failures. +const cacheKeyNormalize = (path) => (0, strip_trailing_slashes_js_1.stripTrailingSlashes)((0, normalize_windows_path_js_1.normalizeWindowsPath)((0, normalize_unicode_js_1.normalizeUnicode)(path))).toLowerCase(); +// remove all cache entries matching ${abs}/** +const pruneCache = (cache, abs) => { + abs = cacheKeyNormalize(abs); + for (const path of cache.keys()) { + const pnorm = cacheKeyNormalize(path); + if (pnorm === abs || pnorm.indexOf(abs + '/') === 0) { + cache.delete(path); + } + } +}; +const dropCache = (cache) => { + for (const key of cache.keys()) { + cache.delete(key); + } +}; +class Unpack extends parse_js_1.Parser { + [ENDED] = false; + [CHECKED_CWD] = false; + [PENDING] = 0; + reservations = new path_reservations_js_1.PathReservations(); + transform; + writable = true; + readable = false; + dirCache; + uid; + gid; + setOwner; + preserveOwner; + processGid; + processUid; + maxDepth; + forceChown; + win32; + newer; + keep; + noMtime; + preservePaths; + unlink; + cwd; + strip; + processUmask; + umask; + dmode; + fmode; + chmod; + constructor(opt = {}) { + opt.ondone = () => { + this[ENDED] = true; + this[MAYBECLOSE](); + }; + super(opt); + this.transform = opt.transform; + this.dirCache = opt.dirCache || new Map(); + this.chmod = !!opt.chmod; + if (typeof opt.uid === 'number' || typeof opt.gid === 'number') { + // need both or neither + if (typeof opt.uid !== 'number' || + typeof opt.gid !== 'number') { + throw new TypeError('cannot set owner without number uid and gid'); + } + if (opt.preserveOwner) { + throw new TypeError('cannot preserve owner in archive and also set owner explicitly'); + } + this.uid = opt.uid; + this.gid = opt.gid; + this.setOwner = true; + } + else { + this.uid = undefined; + this.gid = undefined; + this.setOwner = false; + } + // default true for root + if (opt.preserveOwner === undefined && + typeof opt.uid !== 'number') { + this.preserveOwner = !!(process.getuid && process.getuid() === 0); + } + else { + this.preserveOwner = !!opt.preserveOwner; + } + this.processUid = + (this.preserveOwner || this.setOwner) && process.getuid ? + process.getuid() + : undefined; + this.processGid = + (this.preserveOwner || this.setOwner) && process.getgid ? + process.getgid() + : undefined; + // prevent excessively deep nesting of subfolders + // set to `Infinity` to remove this restriction + this.maxDepth = + typeof opt.maxDepth === 'number' ? + opt.maxDepth + : DEFAULT_MAX_DEPTH; + // mostly just for testing, but useful in some cases. + // Forcibly trigger a chown on every entry, no matter what + this.forceChown = opt.forceChown === true; + // turn > this[ONENTRY](entry)); + } + // a bad or damaged archive is a warning for Parser, but an error + // when extracting. Mark those errors as unrecoverable, because + // the Unpack contract cannot be met. + warn(code, msg, data = {}) { + if (code === 'TAR_BAD_ARCHIVE' || code === 'TAR_ABORT') { + data.recoverable = false; + } + return super.warn(code, msg, data); + } + [MAYBECLOSE]() { + if (this[ENDED] && this[PENDING] === 0) { + this.emit('prefinish'); + this.emit('finish'); + this.emit('end'); + } + } + [CHECKPATH](entry) { + const p = (0, normalize_windows_path_js_1.normalizeWindowsPath)(entry.path); + const parts = p.split('/'); + if (this.strip) { + if (parts.length < this.strip) { + return false; + } + if (entry.type === 'Link') { + const linkparts = (0, normalize_windows_path_js_1.normalizeWindowsPath)(String(entry.linkpath)).split('/'); + if (linkparts.length >= this.strip) { + entry.linkpath = linkparts.slice(this.strip).join('/'); + } + else { + return false; + } + } + parts.splice(0, this.strip); + entry.path = parts.join('/'); + } + if (isFinite(this.maxDepth) && parts.length > this.maxDepth) { + this.warn('TAR_ENTRY_ERROR', 'path excessively deep', { + entry, + path: p, + depth: parts.length, + maxDepth: this.maxDepth, + }); + return false; + } + if (!this.preservePaths) { + if (parts.includes('..') || + /* c8 ignore next */ + (isWindows && /^[a-z]:\.\.$/i.test(parts[0] ?? ''))) { + this.warn('TAR_ENTRY_ERROR', `path contains '..'`, { + entry, + path: p, + }); + return false; + } + // strip off the root + const [root, stripped] = (0, strip_absolute_path_js_1.stripAbsolutePath)(p); + if (root) { + entry.path = String(stripped); + this.warn('TAR_ENTRY_INFO', `stripping ${root} from absolute path`, { + entry, + path: p, + }); + } + } + if (node_path_1.default.isAbsolute(entry.path)) { + entry.absolute = (0, normalize_windows_path_js_1.normalizeWindowsPath)(node_path_1.default.resolve(entry.path)); + } + else { + entry.absolute = (0, normalize_windows_path_js_1.normalizeWindowsPath)(node_path_1.default.resolve(this.cwd, entry.path)); + } + // if we somehow ended up with a path that escapes the cwd, and we are + // not in preservePaths mode, then something is fishy! This should have + // been prevented above, so ignore this for coverage. + /* c8 ignore start - defense in depth */ + if (!this.preservePaths && + typeof entry.absolute === 'string' && + entry.absolute.indexOf(this.cwd + '/') !== 0 && + entry.absolute !== this.cwd) { + this.warn('TAR_ENTRY_ERROR', 'path escaped extraction target', { + entry, + path: (0, normalize_windows_path_js_1.normalizeWindowsPath)(entry.path), + resolvedPath: entry.absolute, + cwd: this.cwd, + }); + return false; + } + /* c8 ignore stop */ + // an archive can set properties on the extraction directory, but it + // may not replace the cwd with a different kind of thing entirely. + if (entry.absolute === this.cwd && + entry.type !== 'Directory' && + entry.type !== 'GNUDumpDir') { + return false; + } + // only encode : chars that aren't drive letter indicators + if (this.win32) { + const { root: aRoot } = node_path_1.default.win32.parse(String(entry.absolute)); + entry.absolute = + aRoot + wc.encode(String(entry.absolute).slice(aRoot.length)); + const { root: pRoot } = node_path_1.default.win32.parse(entry.path); + entry.path = pRoot + wc.encode(entry.path.slice(pRoot.length)); + } + return true; + } + [ONENTRY](entry) { + if (!this[CHECKPATH](entry)) { + return entry.resume(); + } + node_assert_1.default.equal(typeof entry.absolute, 'string'); + switch (entry.type) { + case 'Directory': + case 'GNUDumpDir': + if (entry.mode) { + entry.mode = entry.mode | 0o700; + } + // eslint-disable-next-line no-fallthrough + case 'File': + case 'OldFile': + case 'ContiguousFile': + case 'Link': + case 'SymbolicLink': + return this[CHECKFS](entry); + case 'CharacterDevice': + case 'BlockDevice': + case 'FIFO': + default: + return this[UNSUPPORTED](entry); + } + } + [ONERROR](er, entry) { + // Cwd has to exist, or else nothing works. That's serious. + // Other errors are warnings, which raise the error in strict + // mode, but otherwise continue on. + if (er.name === 'CwdError') { + this.emit('error', er); + } + else { + this.warn('TAR_ENTRY_ERROR', er, { entry }); + this[UNPEND](); + entry.resume(); + } + } + [MKDIR](dir, mode, cb) { + (0, mkdir_js_1.mkdir)((0, normalize_windows_path_js_1.normalizeWindowsPath)(dir), { + uid: this.uid, + gid: this.gid, + processUid: this.processUid, + processGid: this.processGid, + umask: this.processUmask, + preserve: this.preservePaths, + unlink: this.unlink, + cache: this.dirCache, + cwd: this.cwd, + mode: mode, + }, cb); + } + [DOCHOWN](entry) { + // in preserve owner mode, chown if the entry doesn't match process + // in set owner mode, chown if setting doesn't match process + return (this.forceChown || + (this.preserveOwner && + ((typeof entry.uid === 'number' && + entry.uid !== this.processUid) || + (typeof entry.gid === 'number' && + entry.gid !== this.processGid))) || + (typeof this.uid === 'number' && + this.uid !== this.processUid) || + (typeof this.gid === 'number' && this.gid !== this.processGid)); + } + [UID](entry) { + return uint32(this.uid, entry.uid, this.processUid); + } + [GID](entry) { + return uint32(this.gid, entry.gid, this.processGid); + } + [FILE](entry, fullyDone) { + const mode = typeof entry.mode === 'number' ? + entry.mode & 0o7777 + : this.fmode; + const stream = new fsm.WriteStream(String(entry.absolute), { + // slight lie, but it can be numeric flags + flags: (0, get_write_flag_js_1.getWriteFlag)(entry.size), + mode: mode, + autoClose: false, + }); + stream.on('error', (er) => { + if (stream.fd) { + node_fs_1.default.close(stream.fd, () => { }); + } + // flush all the data out so that we aren't left hanging + // if the error wasn't actually fatal. otherwise the parse + // is blocked, and we never proceed. + stream.write = () => true; + this[ONERROR](er, entry); + fullyDone(); + }); + let actions = 1; + const done = (er) => { + if (er) { + /* c8 ignore start - we should always have a fd by now */ + if (stream.fd) { + node_fs_1.default.close(stream.fd, () => { }); + } + /* c8 ignore stop */ + this[ONERROR](er, entry); + fullyDone(); + return; + } + if (--actions === 0) { + if (stream.fd !== undefined) { + node_fs_1.default.close(stream.fd, er => { + if (er) { + this[ONERROR](er, entry); + } + else { + this[UNPEND](); + } + fullyDone(); + }); + } + } + }; + stream.on('finish', () => { + // if futimes fails, try utimes + // if utimes fails, fail with the original error + // same for fchown/chown + const abs = String(entry.absolute); + const fd = stream.fd; + if (typeof fd === 'number' && entry.mtime && !this.noMtime) { + actions++; + const atime = entry.atime || new Date(); + const mtime = entry.mtime; + node_fs_1.default.futimes(fd, atime, mtime, er => er ? + node_fs_1.default.utimes(abs, atime, mtime, er2 => done(er2 && er)) + : done()); + } + if (typeof fd === 'number' && this[DOCHOWN](entry)) { + actions++; + const uid = this[UID](entry); + const gid = this[GID](entry); + if (typeof uid === 'number' && typeof gid === 'number') { + node_fs_1.default.fchown(fd, uid, gid, er => er ? + node_fs_1.default.chown(abs, uid, gid, er2 => done(er2 && er)) + : done()); + } + } + done(); + }); + const tx = this.transform ? this.transform(entry) || entry : entry; + if (tx !== entry) { + tx.on('error', (er) => { + this[ONERROR](er, entry); + fullyDone(); + }); + entry.pipe(tx); + } + tx.pipe(stream); + } + [DIRECTORY](entry, fullyDone) { + const mode = typeof entry.mode === 'number' ? + entry.mode & 0o7777 + : this.dmode; + this[MKDIR](String(entry.absolute), mode, er => { + if (er) { + this[ONERROR](er, entry); + fullyDone(); + return; + } + let actions = 1; + const done = () => { + if (--actions === 0) { + fullyDone(); + this[UNPEND](); + entry.resume(); + } + }; + if (entry.mtime && !this.noMtime) { + actions++; + node_fs_1.default.utimes(String(entry.absolute), entry.atime || new Date(), entry.mtime, done); + } + if (this[DOCHOWN](entry)) { + actions++; + node_fs_1.default.chown(String(entry.absolute), Number(this[UID](entry)), Number(this[GID](entry)), done); + } + done(); + }); + } + [UNSUPPORTED](entry) { + entry.unsupported = true; + this.warn('TAR_ENTRY_UNSUPPORTED', `unsupported entry type: ${entry.type}`, { entry }); + entry.resume(); + } + [SYMLINK](entry, done) { + this[LINK](entry, String(entry.linkpath), 'symlink', done); + } + [HARDLINK](entry, done) { + const linkpath = (0, normalize_windows_path_js_1.normalizeWindowsPath)(node_path_1.default.resolve(this.cwd, String(entry.linkpath))); + this[LINK](entry, linkpath, 'link', done); + } + [PEND]() { + this[PENDING]++; + } + [UNPEND]() { + this[PENDING]--; + this[MAYBECLOSE](); + } + [SKIP](entry) { + this[UNPEND](); + entry.resume(); + } + // Check if we can reuse an existing filesystem entry safely and + // overwrite it, rather than unlinking and recreating + // Windows doesn't report a useful nlink, so we just never reuse entries + [ISREUSABLE](entry, st) { + return (entry.type === 'File' && + !this.unlink && + st.isFile() && + st.nlink <= 1 && + !isWindows); + } + // check if a thing is there, and if so, try to clobber it + [CHECKFS](entry) { + this[PEND](); + const paths = [entry.path]; + if (entry.linkpath) { + paths.push(entry.linkpath); + } + this.reservations.reserve(paths, done => this[CHECKFS2](entry, done)); + } + [PRUNECACHE](entry) { + // if we are not creating a directory, and the path is in the dirCache, + // then that means we are about to delete the directory we created + // previously, and it is no longer going to be a directory, and neither + // is any of its children. + // If a symbolic link is encountered, all bets are off. There is no + // reasonable way to sanitize the cache in such a way we will be able to + // avoid having filesystem collisions. If this happens with a non-symlink + // entry, it'll just fail to unpack, but a symlink to a directory, using an + // 8.3 shortname or certain unicode attacks, can evade detection and lead + // to arbitrary writes to anywhere on the system. + if (entry.type === 'SymbolicLink') { + dropCache(this.dirCache); + } + else if (entry.type !== 'Directory') { + pruneCache(this.dirCache, String(entry.absolute)); + } + } + [CHECKFS2](entry, fullyDone) { + this[PRUNECACHE](entry); + const done = (er) => { + this[PRUNECACHE](entry); + fullyDone(er); + }; + const checkCwd = () => { + this[MKDIR](this.cwd, this.dmode, er => { + if (er) { + this[ONERROR](er, entry); + done(); + return; + } + this[CHECKED_CWD] = true; + start(); + }); + }; + const start = () => { + if (entry.absolute !== this.cwd) { + const parent = (0, normalize_windows_path_js_1.normalizeWindowsPath)(node_path_1.default.dirname(String(entry.absolute))); + if (parent !== this.cwd) { + return this[MKDIR](parent, this.dmode, er => { + if (er) { + this[ONERROR](er, entry); + done(); + return; + } + afterMakeParent(); + }); + } + } + afterMakeParent(); + }; + const afterMakeParent = () => { + node_fs_1.default.lstat(String(entry.absolute), (lstatEr, st) => { + if (st && + (this.keep || + /* c8 ignore next */ + (this.newer && st.mtime > (entry.mtime ?? st.mtime)))) { + this[SKIP](entry); + done(); + return; + } + if (lstatEr || this[ISREUSABLE](entry, st)) { + return this[MAKEFS](null, entry, done); + } + if (st.isDirectory()) { + if (entry.type === 'Directory') { + const needChmod = this.chmod && + entry.mode && + (st.mode & 0o7777) !== entry.mode; + const afterChmod = (er) => this[MAKEFS](er ?? null, entry, done); + if (!needChmod) { + return afterChmod(); + } + return node_fs_1.default.chmod(String(entry.absolute), Number(entry.mode), afterChmod); + } + // Not a dir entry, have to remove it. + // NB: the only way to end up with an entry that is the cwd + // itself, in such a way that == does not detect, is a + // tricky windows absolute path with UNC or 8.3 parts (and + // preservePaths:true, or else it will have been stripped). + // In that case, the user has opted out of path protections + // explicitly, so if they blow away the cwd, c'est la vie. + if (entry.absolute !== this.cwd) { + return node_fs_1.default.rmdir(String(entry.absolute), (er) => this[MAKEFS](er ?? null, entry, done)); + } + } + // not a dir, and not reusable + // don't remove if the cwd, we want that error + if (entry.absolute === this.cwd) { + return this[MAKEFS](null, entry, done); + } + unlinkFile(String(entry.absolute), er => this[MAKEFS](er ?? null, entry, done)); + }); + }; + if (this[CHECKED_CWD]) { + start(); + } + else { + checkCwd(); + } + } + [MAKEFS](er, entry, done) { + if (er) { + this[ONERROR](er, entry); + done(); + return; + } + switch (entry.type) { + case 'File': + case 'OldFile': + case 'ContiguousFile': + return this[FILE](entry, done); + case 'Link': + return this[HARDLINK](entry, done); + case 'SymbolicLink': + return this[SYMLINK](entry, done); + case 'Directory': + case 'GNUDumpDir': + return this[DIRECTORY](entry, done); + } + } + [LINK](entry, linkpath, link, done) { + // XXX: get the type ('symlink' or 'junction') for windows + node_fs_1.default[link](linkpath, String(entry.absolute), er => { + if (er) { + this[ONERROR](er, entry); + } + else { + this[UNPEND](); + entry.resume(); + } + done(); + }); + } +} +exports.Unpack = Unpack; +const callSync = (fn) => { + try { + return [null, fn()]; + } + catch (er) { + return [er, null]; + } +}; +class UnpackSync extends Unpack { + sync = true; + [MAKEFS](er, entry) { + return super[MAKEFS](er, entry, () => { }); + } + [CHECKFS](entry) { + this[PRUNECACHE](entry); + if (!this[CHECKED_CWD]) { + const er = this[MKDIR](this.cwd, this.dmode); + if (er) { + return this[ONERROR](er, entry); + } + this[CHECKED_CWD] = true; + } + // don't bother to make the parent if the current entry is the cwd, + // we've already checked it. + if (entry.absolute !== this.cwd) { + const parent = (0, normalize_windows_path_js_1.normalizeWindowsPath)(node_path_1.default.dirname(String(entry.absolute))); + if (parent !== this.cwd) { + const mkParent = this[MKDIR](parent, this.dmode); + if (mkParent) { + return this[ONERROR](mkParent, entry); + } + } + } + const [lstatEr, st] = callSync(() => node_fs_1.default.lstatSync(String(entry.absolute))); + if (st && + (this.keep || + /* c8 ignore next */ + (this.newer && st.mtime > (entry.mtime ?? st.mtime)))) { + return this[SKIP](entry); + } + if (lstatEr || this[ISREUSABLE](entry, st)) { + return this[MAKEFS](null, entry); + } + if (st.isDirectory()) { + if (entry.type === 'Directory') { + const needChmod = this.chmod && + entry.mode && + (st.mode & 0o7777) !== entry.mode; + const [er] = needChmod ? + callSync(() => { + node_fs_1.default.chmodSync(String(entry.absolute), Number(entry.mode)); + }) + : []; + return this[MAKEFS](er, entry); + } + // not a dir entry, have to remove it + const [er] = callSync(() => node_fs_1.default.rmdirSync(String(entry.absolute))); + this[MAKEFS](er, entry); + } + // not a dir, and not reusable. + // don't remove if it's the cwd, since we want that error. + const [er] = entry.absolute === this.cwd ? + [] + : callSync(() => unlinkFileSync(String(entry.absolute))); + this[MAKEFS](er, entry); + } + [FILE](entry, done) { + const mode = typeof entry.mode === 'number' ? + entry.mode & 0o7777 + : this.fmode; + const oner = (er) => { + let closeError; + try { + node_fs_1.default.closeSync(fd); + } + catch (e) { + closeError = e; + } + if (er || closeError) { + this[ONERROR](er || closeError, entry); + } + done(); + }; + let fd; + try { + fd = node_fs_1.default.openSync(String(entry.absolute), (0, get_write_flag_js_1.getWriteFlag)(entry.size), mode); + } + catch (er) { + return oner(er); + } + const tx = this.transform ? this.transform(entry) || entry : entry; + if (tx !== entry) { + tx.on('error', (er) => this[ONERROR](er, entry)); + entry.pipe(tx); + } + tx.on('data', (chunk) => { + try { + node_fs_1.default.writeSync(fd, chunk, 0, chunk.length); + } + catch (er) { + oner(er); + } + }); + tx.on('end', () => { + let er = null; + // try both, falling futimes back to utimes + // if either fails, handle the first error + if (entry.mtime && !this.noMtime) { + const atime = entry.atime || new Date(); + const mtime = entry.mtime; + try { + node_fs_1.default.futimesSync(fd, atime, mtime); + } + catch (futimeser) { + try { + node_fs_1.default.utimesSync(String(entry.absolute), atime, mtime); + } + catch (utimeser) { + er = futimeser; + } + } + } + if (this[DOCHOWN](entry)) { + const uid = this[UID](entry); + const gid = this[GID](entry); + try { + node_fs_1.default.fchownSync(fd, Number(uid), Number(gid)); + } + catch (fchowner) { + try { + node_fs_1.default.chownSync(String(entry.absolute), Number(uid), Number(gid)); + } + catch (chowner) { + er = er || fchowner; + } + } + } + oner(er); + }); + } + [DIRECTORY](entry, done) { + const mode = typeof entry.mode === 'number' ? + entry.mode & 0o7777 + : this.dmode; + const er = this[MKDIR](String(entry.absolute), mode); + if (er) { + this[ONERROR](er, entry); + done(); + return; + } + if (entry.mtime && !this.noMtime) { + try { + node_fs_1.default.utimesSync(String(entry.absolute), entry.atime || new Date(), entry.mtime); + /* c8 ignore next */ + } + catch (er) { } + } + if (this[DOCHOWN](entry)) { + try { + node_fs_1.default.chownSync(String(entry.absolute), Number(this[UID](entry)), Number(this[GID](entry))); + } + catch (er) { } + } + done(); + entry.resume(); + } + [MKDIR](dir, mode) { + try { + return (0, mkdir_js_1.mkdirSync)((0, normalize_windows_path_js_1.normalizeWindowsPath)(dir), { + uid: this.uid, + gid: this.gid, + processUid: this.processUid, + processGid: this.processGid, + umask: this.processUmask, + preserve: this.preservePaths, + unlink: this.unlink, + cache: this.dirCache, + cwd: this.cwd, + mode: mode, + }); + } + catch (er) { + return er; + } + } + [LINK](entry, linkpath, link, done) { + const ls = `${link}Sync`; + try { + node_fs_1.default[ls](linkpath, String(entry.absolute)); + done(); + entry.resume(); + } + catch (er) { + return this[ONERROR](er, entry); + } + } +} +exports.UnpackSync = UnpackSync; +//# sourceMappingURL=unpack.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/update.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/update.js new file mode 100644 index 00000000000000..7687896f4bfeeb --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/update.js @@ -0,0 +1,33 @@ +"use strict"; +// tar -u +Object.defineProperty(exports, "__esModule", { value: true }); +exports.update = void 0; +const make_command_js_1 = require("./make-command.js"); +const replace_js_1 = require("./replace.js"); +// just call tar.r with the filter and mtimeCache +exports.update = (0, make_command_js_1.makeCommand)(replace_js_1.replace.syncFile, replace_js_1.replace.asyncFile, replace_js_1.replace.syncNoFile, replace_js_1.replace.asyncNoFile, (opt, entries = []) => { + replace_js_1.replace.validate?.(opt, entries); + mtimeFilter(opt); +}); +const mtimeFilter = (opt) => { + const filter = opt.filter; + if (!opt.mtimeCache) { + opt.mtimeCache = new Map(); + } + opt.filter = + filter ? + (path, stat) => filter(path, stat) && + !( + /* c8 ignore start */ + ((opt.mtimeCache?.get(path) ?? stat.mtime ?? 0) > + (stat.mtime ?? 0)) + /* c8 ignore stop */ + ) + : (path, stat) => !( + /* c8 ignore start */ + ((opt.mtimeCache?.get(path) ?? stat.mtime ?? 0) > + (stat.mtime ?? 0)) + /* c8 ignore stop */ + ); +}; +//# sourceMappingURL=update.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/warn-method.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/warn-method.js new file mode 100644 index 00000000000000..f25502776e36a3 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/warn-method.js @@ -0,0 +1,31 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.warnMethod = void 0; +const warnMethod = (self, code, message, data = {}) => { + if (self.file) { + data.file = self.file; + } + if (self.cwd) { + data.cwd = self.cwd; + } + data.code = + (message instanceof Error && + message.code) || + code; + data.tarCode = code; + if (!self.strict && data.recoverable !== false) { + if (message instanceof Error) { + data = Object.assign(message, data); + message = message.message; + } + self.emit('warn', code, message, data); + } + else if (message instanceof Error) { + self.emit('error', Object.assign(message, data)); + } + else { + self.emit('error', Object.assign(new Error(`${code}: ${message}`), data)); + } +}; +exports.warnMethod = warnMethod; +//# sourceMappingURL=warn-method.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/winchars.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/winchars.js new file mode 100644 index 00000000000000..c0a4405812929e --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/winchars.js @@ -0,0 +1,14 @@ +"use strict"; +// When writing files on Windows, translate the characters to their +// 0xf000 higher-encoded versions. +Object.defineProperty(exports, "__esModule", { value: true }); +exports.decode = exports.encode = void 0; +const raw = ['|', '<', '>', '?', ':']; +const win = raw.map(char => String.fromCharCode(0xf000 + char.charCodeAt(0))); +const toWin = new Map(raw.map((char, i) => [char, win[i]])); +const toRaw = new Map(win.map((char, i) => [char, raw[i]])); +const encode = (s) => raw.reduce((s, c) => s.split(c).join(toWin.get(c)), s); +exports.encode = encode; +const decode = (s) => win.reduce((s, c) => s.split(c).join(toRaw.get(c)), s); +exports.decode = decode; +//# sourceMappingURL=winchars.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/write-entry.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/write-entry.js new file mode 100644 index 00000000000000..45b7efeb795027 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/commonjs/write-entry.js @@ -0,0 +1,689 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.WriteEntryTar = exports.WriteEntrySync = exports.WriteEntry = void 0; +const fs_1 = __importDefault(require("fs")); +const minipass_1 = require("minipass"); +const path_1 = __importDefault(require("path")); +const header_js_1 = require("./header.js"); +const mode_fix_js_1 = require("./mode-fix.js"); +const normalize_windows_path_js_1 = require("./normalize-windows-path.js"); +const options_js_1 = require("./options.js"); +const pax_js_1 = require("./pax.js"); +const strip_absolute_path_js_1 = require("./strip-absolute-path.js"); +const strip_trailing_slashes_js_1 = require("./strip-trailing-slashes.js"); +const warn_method_js_1 = require("./warn-method.js"); +const winchars = __importStar(require("./winchars.js")); +const prefixPath = (path, prefix) => { + if (!prefix) { + return (0, normalize_windows_path_js_1.normalizeWindowsPath)(path); + } + path = (0, normalize_windows_path_js_1.normalizeWindowsPath)(path).replace(/^\.(\/|$)/, ''); + return (0, strip_trailing_slashes_js_1.stripTrailingSlashes)(prefix) + '/' + path; +}; +const maxReadSize = 16 * 1024 * 1024; +const PROCESS = Symbol('process'); +const FILE = Symbol('file'); +const DIRECTORY = Symbol('directory'); +const SYMLINK = Symbol('symlink'); +const HARDLINK = Symbol('hardlink'); +const HEADER = Symbol('header'); +const READ = Symbol('read'); +const LSTAT = Symbol('lstat'); +const ONLSTAT = Symbol('onlstat'); +const ONREAD = Symbol('onread'); +const ONREADLINK = Symbol('onreadlink'); +const OPENFILE = Symbol('openfile'); +const ONOPENFILE = Symbol('onopenfile'); +const CLOSE = Symbol('close'); +const MODE = Symbol('mode'); +const AWAITDRAIN = Symbol('awaitDrain'); +const ONDRAIN = Symbol('ondrain'); +const PREFIX = Symbol('prefix'); +class WriteEntry extends minipass_1.Minipass { + path; + portable; + myuid = (process.getuid && process.getuid()) || 0; + // until node has builtin pwnam functions, this'll have to do + myuser = process.env.USER || ''; + maxReadSize; + linkCache; + statCache; + preservePaths; + cwd; + strict; + mtime; + noPax; + noMtime; + prefix; + fd; + blockLen = 0; + blockRemain = 0; + buf; + pos = 0; + remain = 0; + length = 0; + offset = 0; + win32; + absolute; + header; + type; + linkpath; + stat; + onWriteEntry; + #hadError = false; + constructor(p, opt_ = {}) { + const opt = (0, options_js_1.dealias)(opt_); + super(); + this.path = (0, normalize_windows_path_js_1.normalizeWindowsPath)(p); + // suppress atime, ctime, uid, gid, uname, gname + this.portable = !!opt.portable; + this.maxReadSize = opt.maxReadSize || maxReadSize; + this.linkCache = opt.linkCache || new Map(); + this.statCache = opt.statCache || new Map(); + this.preservePaths = !!opt.preservePaths; + this.cwd = (0, normalize_windows_path_js_1.normalizeWindowsPath)(opt.cwd || process.cwd()); + this.strict = !!opt.strict; + this.noPax = !!opt.noPax; + this.noMtime = !!opt.noMtime; + this.mtime = opt.mtime; + this.prefix = + opt.prefix ? (0, normalize_windows_path_js_1.normalizeWindowsPath)(opt.prefix) : undefined; + this.onWriteEntry = opt.onWriteEntry; + if (typeof opt.onwarn === 'function') { + this.on('warn', opt.onwarn); + } + let pathWarn = false; + if (!this.preservePaths) { + const [root, stripped] = (0, strip_absolute_path_js_1.stripAbsolutePath)(this.path); + if (root && typeof stripped === 'string') { + this.path = stripped; + pathWarn = root; + } + } + this.win32 = !!opt.win32 || process.platform === 'win32'; + if (this.win32) { + // force the \ to / normalization, since we might not *actually* + // be on windows, but want \ to be considered a path separator. + this.path = winchars.decode(this.path.replace(/\\/g, '/')); + p = p.replace(/\\/g, '/'); + } + this.absolute = (0, normalize_windows_path_js_1.normalizeWindowsPath)(opt.absolute || path_1.default.resolve(this.cwd, p)); + if (this.path === '') { + this.path = './'; + } + if (pathWarn) { + this.warn('TAR_ENTRY_INFO', `stripping ${pathWarn} from absolute path`, { + entry: this, + path: pathWarn + this.path, + }); + } + const cs = this.statCache.get(this.absolute); + if (cs) { + this[ONLSTAT](cs); + } + else { + this[LSTAT](); + } + } + warn(code, message, data = {}) { + return (0, warn_method_js_1.warnMethod)(this, code, message, data); + } + emit(ev, ...data) { + if (ev === 'error') { + this.#hadError = true; + } + return super.emit(ev, ...data); + } + [LSTAT]() { + fs_1.default.lstat(this.absolute, (er, stat) => { + if (er) { + return this.emit('error', er); + } + this[ONLSTAT](stat); + }); + } + [ONLSTAT](stat) { + this.statCache.set(this.absolute, stat); + this.stat = stat; + if (!stat.isFile()) { + stat.size = 0; + } + this.type = getType(stat); + this.emit('stat', stat); + this[PROCESS](); + } + [PROCESS]() { + switch (this.type) { + case 'File': + return this[FILE](); + case 'Directory': + return this[DIRECTORY](); + case 'SymbolicLink': + return this[SYMLINK](); + // unsupported types are ignored. + default: + return this.end(); + } + } + [MODE](mode) { + return (0, mode_fix_js_1.modeFix)(mode, this.type === 'Directory', this.portable); + } + [PREFIX](path) { + return prefixPath(path, this.prefix); + } + [HEADER]() { + /* c8 ignore start */ + if (!this.stat) { + throw new Error('cannot write header before stat'); + } + /* c8 ignore stop */ + if (this.type === 'Directory' && this.portable) { + this.noMtime = true; + } + this.onWriteEntry?.(this); + this.header = new header_js_1.Header({ + path: this[PREFIX](this.path), + // only apply the prefix to hard links. + linkpath: this.type === 'Link' && this.linkpath !== undefined ? + this[PREFIX](this.linkpath) + : this.linkpath, + // only the permissions and setuid/setgid/sticky bitflags + // not the higher-order bits that specify file type + mode: this[MODE](this.stat.mode), + uid: this.portable ? undefined : this.stat.uid, + gid: this.portable ? undefined : this.stat.gid, + size: this.stat.size, + mtime: this.noMtime ? undefined : this.mtime || this.stat.mtime, + /* c8 ignore next */ + type: this.type === 'Unsupported' ? undefined : this.type, + uname: this.portable ? undefined + : this.stat.uid === this.myuid ? this.myuser + : '', + atime: this.portable ? undefined : this.stat.atime, + ctime: this.portable ? undefined : this.stat.ctime, + }); + if (this.header.encode() && !this.noPax) { + super.write(new pax_js_1.Pax({ + atime: this.portable ? undefined : this.header.atime, + ctime: this.portable ? undefined : this.header.ctime, + gid: this.portable ? undefined : this.header.gid, + mtime: this.noMtime ? undefined : (this.mtime || this.header.mtime), + path: this[PREFIX](this.path), + linkpath: this.type === 'Link' && this.linkpath !== undefined ? + this[PREFIX](this.linkpath) + : this.linkpath, + size: this.header.size, + uid: this.portable ? undefined : this.header.uid, + uname: this.portable ? undefined : this.header.uname, + dev: this.portable ? undefined : this.stat.dev, + ino: this.portable ? undefined : this.stat.ino, + nlink: this.portable ? undefined : this.stat.nlink, + }).encode()); + } + const block = this.header?.block; + /* c8 ignore start */ + if (!block) { + throw new Error('failed to encode header'); + } + /* c8 ignore stop */ + super.write(block); + } + [DIRECTORY]() { + /* c8 ignore start */ + if (!this.stat) { + throw new Error('cannot create directory entry without stat'); + } + /* c8 ignore stop */ + if (this.path.slice(-1) !== '/') { + this.path += '/'; + } + this.stat.size = 0; + this[HEADER](); + this.end(); + } + [SYMLINK]() { + fs_1.default.readlink(this.absolute, (er, linkpath) => { + if (er) { + return this.emit('error', er); + } + this[ONREADLINK](linkpath); + }); + } + [ONREADLINK](linkpath) { + this.linkpath = (0, normalize_windows_path_js_1.normalizeWindowsPath)(linkpath); + this[HEADER](); + this.end(); + } + [HARDLINK](linkpath) { + /* c8 ignore start */ + if (!this.stat) { + throw new Error('cannot create link entry without stat'); + } + /* c8 ignore stop */ + this.type = 'Link'; + this.linkpath = (0, normalize_windows_path_js_1.normalizeWindowsPath)(path_1.default.relative(this.cwd, linkpath)); + this.stat.size = 0; + this[HEADER](); + this.end(); + } + [FILE]() { + /* c8 ignore start */ + if (!this.stat) { + throw new Error('cannot create file entry without stat'); + } + /* c8 ignore stop */ + if (this.stat.nlink > 1) { + const linkKey = `${this.stat.dev}:${this.stat.ino}`; + const linkpath = this.linkCache.get(linkKey); + if (linkpath?.indexOf(this.cwd) === 0) { + return this[HARDLINK](linkpath); + } + this.linkCache.set(linkKey, this.absolute); + } + this[HEADER](); + if (this.stat.size === 0) { + return this.end(); + } + this[OPENFILE](); + } + [OPENFILE]() { + fs_1.default.open(this.absolute, 'r', (er, fd) => { + if (er) { + return this.emit('error', er); + } + this[ONOPENFILE](fd); + }); + } + [ONOPENFILE](fd) { + this.fd = fd; + if (this.#hadError) { + return this[CLOSE](); + } + /* c8 ignore start */ + if (!this.stat) { + throw new Error('should stat before calling onopenfile'); + } + /* c8 ignore start */ + this.blockLen = 512 * Math.ceil(this.stat.size / 512); + this.blockRemain = this.blockLen; + const bufLen = Math.min(this.blockLen, this.maxReadSize); + this.buf = Buffer.allocUnsafe(bufLen); + this.offset = 0; + this.pos = 0; + this.remain = this.stat.size; + this.length = this.buf.length; + this[READ](); + } + [READ]() { + const { fd, buf, offset, length, pos } = this; + if (fd === undefined || buf === undefined) { + throw new Error('cannot read file without first opening'); + } + fs_1.default.read(fd, buf, offset, length, pos, (er, bytesRead) => { + if (er) { + // ignoring the error from close(2) is a bad practice, but at + // this point we already have an error, don't need another one + return this[CLOSE](() => this.emit('error', er)); + } + this[ONREAD](bytesRead); + }); + } + /* c8 ignore start */ + [CLOSE](cb = () => { }) { + /* c8 ignore stop */ + if (this.fd !== undefined) + fs_1.default.close(this.fd, cb); + } + [ONREAD](bytesRead) { + if (bytesRead <= 0 && this.remain > 0) { + const er = Object.assign(new Error('encountered unexpected EOF'), { + path: this.absolute, + syscall: 'read', + code: 'EOF', + }); + return this[CLOSE](() => this.emit('error', er)); + } + if (bytesRead > this.remain) { + const er = Object.assign(new Error('did not encounter expected EOF'), { + path: this.absolute, + syscall: 'read', + code: 'EOF', + }); + return this[CLOSE](() => this.emit('error', er)); + } + /* c8 ignore start */ + if (!this.buf) { + throw new Error('should have created buffer prior to reading'); + } + /* c8 ignore stop */ + // null out the rest of the buffer, if we could fit the block padding + // at the end of this loop, we've incremented bytesRead and this.remain + // to be incremented up to the blockRemain level, as if we had expected + // to get a null-padded file, and read it until the end. then we will + // decrement both remain and blockRemain by bytesRead, and know that we + // reached the expected EOF, without any null buffer to append. + if (bytesRead === this.remain) { + for (let i = bytesRead; i < this.length && bytesRead < this.blockRemain; i++) { + this.buf[i + this.offset] = 0; + bytesRead++; + this.remain++; + } + } + const chunk = this.offset === 0 && bytesRead === this.buf.length ? + this.buf + : this.buf.subarray(this.offset, this.offset + bytesRead); + const flushed = this.write(chunk); + if (!flushed) { + this[AWAITDRAIN](() => this[ONDRAIN]()); + } + else { + this[ONDRAIN](); + } + } + [AWAITDRAIN](cb) { + this.once('drain', cb); + } + write(chunk, encoding, cb) { + /* c8 ignore start - just junk to comply with NodeJS.WritableStream */ + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, typeof encoding === 'string' ? encoding : 'utf8'); + } + /* c8 ignore stop */ + if (this.blockRemain < chunk.length) { + const er = Object.assign(new Error('writing more data than expected'), { + path: this.absolute, + }); + return this.emit('error', er); + } + this.remain -= chunk.length; + this.blockRemain -= chunk.length; + this.pos += chunk.length; + this.offset += chunk.length; + return super.write(chunk, null, cb); + } + [ONDRAIN]() { + if (!this.remain) { + if (this.blockRemain) { + super.write(Buffer.alloc(this.blockRemain)); + } + return this[CLOSE](er => er ? this.emit('error', er) : this.end()); + } + /* c8 ignore start */ + if (!this.buf) { + throw new Error('buffer lost somehow in ONDRAIN'); + } + /* c8 ignore stop */ + if (this.offset >= this.length) { + // if we only have a smaller bit left to read, alloc a smaller buffer + // otherwise, keep it the same length it was before. + this.buf = Buffer.allocUnsafe(Math.min(this.blockRemain, this.buf.length)); + this.offset = 0; + } + this.length = this.buf.length - this.offset; + this[READ](); + } +} +exports.WriteEntry = WriteEntry; +class WriteEntrySync extends WriteEntry { + sync = true; + [LSTAT]() { + this[ONLSTAT](fs_1.default.lstatSync(this.absolute)); + } + [SYMLINK]() { + this[ONREADLINK](fs_1.default.readlinkSync(this.absolute)); + } + [OPENFILE]() { + this[ONOPENFILE](fs_1.default.openSync(this.absolute, 'r')); + } + [READ]() { + let threw = true; + try { + const { fd, buf, offset, length, pos } = this; + /* c8 ignore start */ + if (fd === undefined || buf === undefined) { + throw new Error('fd and buf must be set in READ method'); + } + /* c8 ignore stop */ + const bytesRead = fs_1.default.readSync(fd, buf, offset, length, pos); + this[ONREAD](bytesRead); + threw = false; + } + finally { + // ignoring the error from close(2) is a bad practice, but at + // this point we already have an error, don't need another one + if (threw) { + try { + this[CLOSE](() => { }); + } + catch (er) { } + } + } + } + [AWAITDRAIN](cb) { + cb(); + } + /* c8 ignore start */ + [CLOSE](cb = () => { }) { + /* c8 ignore stop */ + if (this.fd !== undefined) + fs_1.default.closeSync(this.fd); + cb(); + } +} +exports.WriteEntrySync = WriteEntrySync; +class WriteEntryTar extends minipass_1.Minipass { + blockLen = 0; + blockRemain = 0; + buf = 0; + pos = 0; + remain = 0; + length = 0; + preservePaths; + portable; + strict; + noPax; + noMtime; + readEntry; + type; + prefix; + path; + mode; + uid; + gid; + uname; + gname; + header; + mtime; + atime; + ctime; + linkpath; + size; + onWriteEntry; + warn(code, message, data = {}) { + return (0, warn_method_js_1.warnMethod)(this, code, message, data); + } + constructor(readEntry, opt_ = {}) { + const opt = (0, options_js_1.dealias)(opt_); + super(); + this.preservePaths = !!opt.preservePaths; + this.portable = !!opt.portable; + this.strict = !!opt.strict; + this.noPax = !!opt.noPax; + this.noMtime = !!opt.noMtime; + this.onWriteEntry = opt.onWriteEntry; + this.readEntry = readEntry; + const { type } = readEntry; + /* c8 ignore start */ + if (type === 'Unsupported') { + throw new Error('writing entry that should be ignored'); + } + /* c8 ignore stop */ + this.type = type; + if (this.type === 'Directory' && this.portable) { + this.noMtime = true; + } + this.prefix = opt.prefix; + this.path = (0, normalize_windows_path_js_1.normalizeWindowsPath)(readEntry.path); + this.mode = + readEntry.mode !== undefined ? + this[MODE](readEntry.mode) + : undefined; + this.uid = this.portable ? undefined : readEntry.uid; + this.gid = this.portable ? undefined : readEntry.gid; + this.uname = this.portable ? undefined : readEntry.uname; + this.gname = this.portable ? undefined : readEntry.gname; + this.size = readEntry.size; + this.mtime = + this.noMtime ? undefined : opt.mtime || readEntry.mtime; + this.atime = this.portable ? undefined : readEntry.atime; + this.ctime = this.portable ? undefined : readEntry.ctime; + this.linkpath = + readEntry.linkpath !== undefined ? + (0, normalize_windows_path_js_1.normalizeWindowsPath)(readEntry.linkpath) + : undefined; + if (typeof opt.onwarn === 'function') { + this.on('warn', opt.onwarn); + } + let pathWarn = false; + if (!this.preservePaths) { + const [root, stripped] = (0, strip_absolute_path_js_1.stripAbsolutePath)(this.path); + if (root && typeof stripped === 'string') { + this.path = stripped; + pathWarn = root; + } + } + this.remain = readEntry.size; + this.blockRemain = readEntry.startBlockSize; + this.onWriteEntry?.(this); + this.header = new header_js_1.Header({ + path: this[PREFIX](this.path), + linkpath: this.type === 'Link' && this.linkpath !== undefined ? + this[PREFIX](this.linkpath) + : this.linkpath, + // only the permissions and setuid/setgid/sticky bitflags + // not the higher-order bits that specify file type + mode: this.mode, + uid: this.portable ? undefined : this.uid, + gid: this.portable ? undefined : this.gid, + size: this.size, + mtime: this.noMtime ? undefined : this.mtime, + type: this.type, + uname: this.portable ? undefined : this.uname, + atime: this.portable ? undefined : this.atime, + ctime: this.portable ? undefined : this.ctime, + }); + if (pathWarn) { + this.warn('TAR_ENTRY_INFO', `stripping ${pathWarn} from absolute path`, { + entry: this, + path: pathWarn + this.path, + }); + } + if (this.header.encode() && !this.noPax) { + super.write(new pax_js_1.Pax({ + atime: this.portable ? undefined : this.atime, + ctime: this.portable ? undefined : this.ctime, + gid: this.portable ? undefined : this.gid, + mtime: this.noMtime ? undefined : this.mtime, + path: this[PREFIX](this.path), + linkpath: this.type === 'Link' && this.linkpath !== undefined ? + this[PREFIX](this.linkpath) + : this.linkpath, + size: this.size, + uid: this.portable ? undefined : this.uid, + uname: this.portable ? undefined : this.uname, + dev: this.portable ? undefined : this.readEntry.dev, + ino: this.portable ? undefined : this.readEntry.ino, + nlink: this.portable ? undefined : this.readEntry.nlink, + }).encode()); + } + const b = this.header?.block; + /* c8 ignore start */ + if (!b) + throw new Error('failed to encode header'); + /* c8 ignore stop */ + super.write(b); + readEntry.pipe(this); + } + [PREFIX](path) { + return prefixPath(path, this.prefix); + } + [MODE](mode) { + return (0, mode_fix_js_1.modeFix)(mode, this.type === 'Directory', this.portable); + } + write(chunk, encoding, cb) { + /* c8 ignore start - just junk to comply with NodeJS.WritableStream */ + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, typeof encoding === 'string' ? encoding : 'utf8'); + } + /* c8 ignore stop */ + const writeLen = chunk.length; + if (writeLen > this.blockRemain) { + throw new Error('writing more to entry than is appropriate'); + } + this.blockRemain -= writeLen; + return super.write(chunk, cb); + } + end(chunk, encoding, cb) { + if (this.blockRemain) { + super.write(Buffer.alloc(this.blockRemain)); + } + /* c8 ignore start - just junk to comply with NodeJS.WritableStream */ + if (typeof chunk === 'function') { + cb = chunk; + encoding = undefined; + chunk = undefined; + } + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, encoding ?? 'utf8'); + } + if (cb) + this.once('finish', cb); + chunk ? super.end(chunk, cb) : super.end(cb); + /* c8 ignore stop */ + return this; + } +} +exports.WriteEntryTar = WriteEntryTar; +const getType = (stat) => stat.isFile() ? 'File' + : stat.isDirectory() ? 'Directory' + : stat.isSymbolicLink() ? 'SymbolicLink' + : 'Unsupported'; +//# sourceMappingURL=write-entry.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/create.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/create.js new file mode 100644 index 00000000000000..512a9911d70d5b --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/create.js @@ -0,0 +1,77 @@ +import { WriteStream, WriteStreamSync } from '@isaacs/fs-minipass'; +import path from 'node:path'; +import { list } from './list.js'; +import { makeCommand } from './make-command.js'; +import { Pack, PackSync } from './pack.js'; +const createFileSync = (opt, files) => { + const p = new PackSync(opt); + const stream = new WriteStreamSync(opt.file, { + mode: opt.mode || 0o666, + }); + p.pipe(stream); + addFilesSync(p, files); +}; +const createFile = (opt, files) => { + const p = new Pack(opt); + const stream = new WriteStream(opt.file, { + mode: opt.mode || 0o666, + }); + p.pipe(stream); + const promise = new Promise((res, rej) => { + stream.on('error', rej); + stream.on('close', res); + p.on('error', rej); + }); + addFilesAsync(p, files); + return promise; +}; +const addFilesSync = (p, files) => { + files.forEach(file => { + if (file.charAt(0) === '@') { + list({ + file: path.resolve(p.cwd, file.slice(1)), + sync: true, + noResume: true, + onReadEntry: entry => p.add(entry), + }); + } + else { + p.add(file); + } + }); + p.end(); +}; +const addFilesAsync = async (p, files) => { + for (let i = 0; i < files.length; i++) { + const file = String(files[i]); + if (file.charAt(0) === '@') { + await list({ + file: path.resolve(String(p.cwd), file.slice(1)), + noResume: true, + onReadEntry: entry => { + p.add(entry); + }, + }); + } + else { + p.add(file); + } + } + p.end(); +}; +const createSync = (opt, files) => { + const p = new PackSync(opt); + addFilesSync(p, files); + return p; +}; +const createAsync = (opt, files) => { + const p = new Pack(opt); + addFilesAsync(p, files); + return p; +}; +export const create = makeCommand(createFileSync, createFile, createSync, createAsync, (_opt, files) => { + if (!files?.length) { + throw new TypeError('no paths specified to add to archive'); + } +}); +//# sourceMappingURL=create.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/cwd-error.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/cwd-error.js new file mode 100644 index 00000000000000..289a066b8e0317 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/cwd-error.js @@ -0,0 +1,14 @@ +export class CwdError extends Error { + path; + code; + syscall = 'chdir'; + constructor(path, code) { + super(`${code}: Cannot cd into '${path}'`); + this.path = path; + this.code = code; + } + get name() { + return 'CwdError'; + } +} +//# sourceMappingURL=cwd-error.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/extract.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/extract.js new file mode 100644 index 00000000000000..2274feef26e78f --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/extract.js @@ -0,0 +1,49 @@ +// tar -x +import * as fsm from '@isaacs/fs-minipass'; +import fs from 'node:fs'; +import { filesFilter } from './list.js'; +import { makeCommand } from './make-command.js'; +import { Unpack, UnpackSync } from './unpack.js'; +const extractFileSync = (opt) => { + const u = new UnpackSync(opt); + const file = opt.file; + const stat = fs.statSync(file); + // This trades a zero-byte read() syscall for a stat + // However, it will usually result in less memory allocation + const readSize = opt.maxReadSize || 16 * 1024 * 1024; + const stream = new fsm.ReadStreamSync(file, { + readSize: readSize, + size: stat.size, + }); + stream.pipe(u); +}; +const extractFile = (opt, _) => { + const u = new Unpack(opt); + const readSize = opt.maxReadSize || 16 * 1024 * 1024; + const file = opt.file; + const p = new Promise((resolve, reject) => { + u.on('error', reject); + u.on('close', resolve); + // This trades a zero-byte read() syscall for a stat + // However, it will usually result in less memory allocation + fs.stat(file, (er, stat) => { + if (er) { + reject(er); + } + else { + const stream = new fsm.ReadStream(file, { + readSize: readSize, + size: stat.size, + }); + stream.on('error', reject); + stream.pipe(u); + } + }); + }); + return p; +}; +export const extract = makeCommand(extractFileSync, extractFile, opt => new UnpackSync(opt), opt => new Unpack(opt), (opt, files) => { + if (files?.length) + filesFilter(opt, files); +}); +//# sourceMappingURL=extract.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/get-write-flag.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/get-write-flag.js new file mode 100644 index 00000000000000..2c7f3e8b28fdaf --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/get-write-flag.js @@ -0,0 +1,23 @@ +// Get the appropriate flag to use for creating files +// We use fmap on Windows platforms for files less than +// 512kb. This is a fairly low limit, but avoids making +// things slower in some cases. Since most of what this +// library is used for is extracting tarballs of many +// relatively small files in npm packages and the like, +// it can be a big boost on Windows platforms. +import fs from 'fs'; +const platform = process.env.__FAKE_PLATFORM__ || process.platform; +const isWindows = platform === 'win32'; +/* c8 ignore start */ +const { O_CREAT, O_TRUNC, O_WRONLY } = fs.constants; +const UV_FS_O_FILEMAP = Number(process.env.__FAKE_FS_O_FILENAME__) || + fs.constants.UV_FS_O_FILEMAP || + 0; +/* c8 ignore stop */ +const fMapEnabled = isWindows && !!UV_FS_O_FILEMAP; +const fMapLimit = 512 * 1024; +const fMapFlag = UV_FS_O_FILEMAP | O_TRUNC | O_CREAT | O_WRONLY; +export const getWriteFlag = !fMapEnabled ? + () => 'w' + : (size) => (size < fMapLimit ? fMapFlag : 'w'); +//# sourceMappingURL=get-write-flag.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/header.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/header.js new file mode 100644 index 00000000000000..e15192b14b16e1 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/header.js @@ -0,0 +1,279 @@ +// parse a 512-byte header block to a data object, or vice-versa +// encode returns `true` if a pax extended header is needed, because +// the data could not be faithfully encoded in a simple header. +// (Also, check header.needPax to see if it needs a pax header.) +import { posix as pathModule } from 'node:path'; +import * as large from './large-numbers.js'; +import * as types from './types.js'; +export class Header { + cksumValid = false; + needPax = false; + nullBlock = false; + block; + path; + mode; + uid; + gid; + size; + cksum; + #type = 'Unsupported'; + linkpath; + uname; + gname; + devmaj = 0; + devmin = 0; + atime; + ctime; + mtime; + charset; + comment; + constructor(data, off = 0, ex, gex) { + if (Buffer.isBuffer(data)) { + this.decode(data, off || 0, ex, gex); + } + else if (data) { + this.#slurp(data); + } + } + decode(buf, off, ex, gex) { + if (!off) { + off = 0; + } + if (!buf || !(buf.length >= off + 512)) { + throw new Error('need 512 bytes for header'); + } + this.path = decString(buf, off, 100); + this.mode = decNumber(buf, off + 100, 8); + this.uid = decNumber(buf, off + 108, 8); + this.gid = decNumber(buf, off + 116, 8); + this.size = decNumber(buf, off + 124, 12); + this.mtime = decDate(buf, off + 136, 12); + this.cksum = decNumber(buf, off + 148, 12); + // if we have extended or global extended headers, apply them now + // See https://github.com/npm/node-tar/pull/187 + // Apply global before local, so it overrides + if (gex) + this.#slurp(gex, true); + if (ex) + this.#slurp(ex); + // old tar versions marked dirs as a file with a trailing / + const t = decString(buf, off + 156, 1); + if (types.isCode(t)) { + this.#type = t || '0'; + } + if (this.#type === '0' && this.path.slice(-1) === '/') { + this.#type = '5'; + } + // tar implementations sometimes incorrectly put the stat(dir).size + // as the size in the tarball, even though Directory entries are + // not able to have any body at all. In the very rare chance that + // it actually DOES have a body, we weren't going to do anything with + // it anyway, and it'll just be a warning about an invalid header. + if (this.#type === '5') { + this.size = 0; + } + this.linkpath = decString(buf, off + 157, 100); + if (buf.subarray(off + 257, off + 265).toString() === + 'ustar\u000000') { + this.uname = decString(buf, off + 265, 32); + this.gname = decString(buf, off + 297, 32); + /* c8 ignore start */ + this.devmaj = decNumber(buf, off + 329, 8) ?? 0; + this.devmin = decNumber(buf, off + 337, 8) ?? 0; + /* c8 ignore stop */ + if (buf[off + 475] !== 0) { + // definitely a prefix, definitely >130 chars. + const prefix = decString(buf, off + 345, 155); + this.path = prefix + '/' + this.path; + } + else { + const prefix = decString(buf, off + 345, 130); + if (prefix) { + this.path = prefix + '/' + this.path; + } + this.atime = decDate(buf, off + 476, 12); + this.ctime = decDate(buf, off + 488, 12); + } + } + let sum = 8 * 0x20; + for (let i = off; i < off + 148; i++) { + sum += buf[i]; + } + for (let i = off + 156; i < off + 512; i++) { + sum += buf[i]; + } + this.cksumValid = sum === this.cksum; + if (this.cksum === undefined && sum === 8 * 0x20) { + this.nullBlock = true; + } + } + #slurp(ex, gex = false) { + Object.assign(this, Object.fromEntries(Object.entries(ex).filter(([k, v]) => { + // we slurp in everything except for the path attribute in + // a global extended header, because that's weird. Also, any + // null/undefined values are ignored. + return !(v === null || + v === undefined || + (k === 'path' && gex) || + (k === 'linkpath' && gex) || + k === 'global'); + }))); + } + encode(buf, off = 0) { + if (!buf) { + buf = this.block = Buffer.alloc(512); + } + if (this.#type === 'Unsupported') { + this.#type = '0'; + } + if (!(buf.length >= off + 512)) { + throw new Error('need 512 bytes for header'); + } + const prefixSize = this.ctime || this.atime ? 130 : 155; + const split = splitPrefix(this.path || '', prefixSize); + const path = split[0]; + const prefix = split[1]; + this.needPax = !!split[2]; + this.needPax = encString(buf, off, 100, path) || this.needPax; + this.needPax = + encNumber(buf, off + 100, 8, this.mode) || this.needPax; + this.needPax = + encNumber(buf, off + 108, 8, this.uid) || this.needPax; + this.needPax = + encNumber(buf, off + 116, 8, this.gid) || this.needPax; + this.needPax = + encNumber(buf, off + 124, 12, this.size) || this.needPax; + this.needPax = + encDate(buf, off + 136, 12, this.mtime) || this.needPax; + buf[off + 156] = this.#type.charCodeAt(0); + this.needPax = + encString(buf, off + 157, 100, this.linkpath) || this.needPax; + buf.write('ustar\u000000', off + 257, 8); + this.needPax = + encString(buf, off + 265, 32, this.uname) || this.needPax; + this.needPax = + encString(buf, off + 297, 32, this.gname) || this.needPax; + this.needPax = + encNumber(buf, off + 329, 8, this.devmaj) || this.needPax; + this.needPax = + encNumber(buf, off + 337, 8, this.devmin) || this.needPax; + this.needPax = + encString(buf, off + 345, prefixSize, prefix) || this.needPax; + if (buf[off + 475] !== 0) { + this.needPax = + encString(buf, off + 345, 155, prefix) || this.needPax; + } + else { + this.needPax = + encString(buf, off + 345, 130, prefix) || this.needPax; + this.needPax = + encDate(buf, off + 476, 12, this.atime) || this.needPax; + this.needPax = + encDate(buf, off + 488, 12, this.ctime) || this.needPax; + } + let sum = 8 * 0x20; + for (let i = off; i < off + 148; i++) { + sum += buf[i]; + } + for (let i = off + 156; i < off + 512; i++) { + sum += buf[i]; + } + this.cksum = sum; + encNumber(buf, off + 148, 8, this.cksum); + this.cksumValid = true; + return this.needPax; + } + get type() { + return (this.#type === 'Unsupported' ? + this.#type + : types.name.get(this.#type)); + } + get typeKey() { + return this.#type; + } + set type(type) { + const c = String(types.code.get(type)); + if (types.isCode(c) || c === 'Unsupported') { + this.#type = c; + } + else if (types.isCode(type)) { + this.#type = type; + } + else { + throw new TypeError('invalid entry type: ' + type); + } + } +} +const splitPrefix = (p, prefixSize) => { + const pathSize = 100; + let pp = p; + let prefix = ''; + let ret = undefined; + const root = pathModule.parse(p).root || '.'; + if (Buffer.byteLength(pp) < pathSize) { + ret = [pp, prefix, false]; + } + else { + // first set prefix to the dir, and path to the base + prefix = pathModule.dirname(pp); + pp = pathModule.basename(pp); + do { + if (Buffer.byteLength(pp) <= pathSize && + Buffer.byteLength(prefix) <= prefixSize) { + // both fit! + ret = [pp, prefix, false]; + } + else if (Buffer.byteLength(pp) > pathSize && + Buffer.byteLength(prefix) <= prefixSize) { + // prefix fits in prefix, but path doesn't fit in path + ret = [pp.slice(0, pathSize - 1), prefix, true]; + } + else { + // make path take a bit from prefix + pp = pathModule.join(pathModule.basename(prefix), pp); + prefix = pathModule.dirname(prefix); + } + } while (prefix !== root && ret === undefined); + // at this point, found no resolution, just truncate + if (!ret) { + ret = [p.slice(0, pathSize - 1), '', true]; + } + } + return ret; +}; +const decString = (buf, off, size) => buf + .subarray(off, off + size) + .toString('utf8') + .replace(/\0.*/, ''); +const decDate = (buf, off, size) => numToDate(decNumber(buf, off, size)); +const numToDate = (num) => num === undefined ? undefined : new Date(num * 1000); +const decNumber = (buf, off, size) => Number(buf[off]) & 0x80 ? + large.parse(buf.subarray(off, off + size)) + : decSmallNumber(buf, off, size); +const nanUndef = (value) => (isNaN(value) ? undefined : value); +const decSmallNumber = (buf, off, size) => nanUndef(parseInt(buf + .subarray(off, off + size) + .toString('utf8') + .replace(/\0.*$/, '') + .trim(), 8)); +// the maximum encodable as a null-terminated octal, by field size +const MAXNUM = { + 12: 0o77777777777, + 8: 0o7777777, +}; +const encNumber = (buf, off, size, num) => num === undefined ? false + : num > MAXNUM[size] || num < 0 ? + (large.encode(num, buf.subarray(off, off + size)), true) + : (encSmallNumber(buf, off, size, num), false); +const encSmallNumber = (buf, off, size, num) => buf.write(octalString(num, size), off, size, 'ascii'); +const octalString = (num, size) => padOctal(Math.floor(num).toString(8), size); +const padOctal = (str, size) => (str.length === size - 1 ? + str + : new Array(size - str.length - 1).join('0') + str + ' ') + '\0'; +const encDate = (buf, off, size, date) => date === undefined ? false : (encNumber(buf, off, size, date.getTime() / 1000)); +// enough to fill the longest string we've got +const NULLS = new Array(156).join('\0'); +// pad with nulls, return true if it's longer or non-ascii +const encString = (buf, off, size, str) => str === undefined ? false : ((buf.write(str + NULLS, off, size, 'utf8'), + str.length !== Buffer.byteLength(str) || str.length > size)); +//# sourceMappingURL=header.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/index.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/index.js new file mode 100644 index 00000000000000..1bac6415c8d732 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/index.js @@ -0,0 +1,20 @@ +export * from './create.js'; +export { create as c } from './create.js'; +export * from './extract.js'; +export { extract as x } from './extract.js'; +export * from './header.js'; +export * from './list.js'; +export { list as t } from './list.js'; +// classes +export * from './pack.js'; +export * from './parse.js'; +export * from './pax.js'; +export * from './read-entry.js'; +export * from './replace.js'; +export { replace as r } from './replace.js'; +export * as types from './types.js'; +export * from './unpack.js'; +export * from './update.js'; +export { update as u } from './update.js'; +export * from './write-entry.js'; +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/large-numbers.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/large-numbers.js new file mode 100644 index 00000000000000..4f2f7e5f14fc1b --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/large-numbers.js @@ -0,0 +1,94 @@ +// Tar can encode large and negative numbers using a leading byte of +// 0xff for negative, and 0x80 for positive. +export const encode = (num, buf) => { + if (!Number.isSafeInteger(num)) { + // The number is so large that javascript cannot represent it with integer + // precision. + throw Error('cannot encode number outside of javascript safe integer range'); + } + else if (num < 0) { + encodeNegative(num, buf); + } + else { + encodePositive(num, buf); + } + return buf; +}; +const encodePositive = (num, buf) => { + buf[0] = 0x80; + for (var i = buf.length; i > 1; i--) { + buf[i - 1] = num & 0xff; + num = Math.floor(num / 0x100); + } +}; +const encodeNegative = (num, buf) => { + buf[0] = 0xff; + var flipped = false; + num = num * -1; + for (var i = buf.length; i > 1; i--) { + var byte = num & 0xff; + num = Math.floor(num / 0x100); + if (flipped) { + buf[i - 1] = onesComp(byte); + } + else if (byte === 0) { + buf[i - 1] = 0; + } + else { + flipped = true; + buf[i - 1] = twosComp(byte); + } + } +}; +export const parse = (buf) => { + const pre = buf[0]; + const value = pre === 0x80 ? pos(buf.subarray(1, buf.length)) + : pre === 0xff ? twos(buf) + : null; + if (value === null) { + throw Error('invalid base256 encoding'); + } + if (!Number.isSafeInteger(value)) { + // The number is so large that javascript cannot represent it with integer + // precision. + throw Error('parsed number outside of javascript safe integer range'); + } + return value; +}; +const twos = (buf) => { + var len = buf.length; + var sum = 0; + var flipped = false; + for (var i = len - 1; i > -1; i--) { + var byte = Number(buf[i]); + var f; + if (flipped) { + f = onesComp(byte); + } + else if (byte === 0) { + f = byte; + } + else { + flipped = true; + f = twosComp(byte); + } + if (f !== 0) { + sum -= f * Math.pow(256, len - i - 1); + } + } + return sum; +}; +const pos = (buf) => { + var len = buf.length; + var sum = 0; + for (var i = len - 1; i > -1; i--) { + var byte = Number(buf[i]); + if (byte !== 0) { + sum += byte * Math.pow(256, len - i - 1); + } + } + return sum; +}; +const onesComp = (byte) => (0xff ^ byte) & 0xff; +const twosComp = (byte) => ((0xff ^ byte) + 1) & 0xff; +//# sourceMappingURL=large-numbers.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/list.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/list.js new file mode 100644 index 00000000000000..f49068400b6c92 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/list.js @@ -0,0 +1,106 @@ +// tar -t +import * as fsm from '@isaacs/fs-minipass'; +import fs from 'node:fs'; +import { dirname, parse } from 'path'; +import { makeCommand } from './make-command.js'; +import { Parser } from './parse.js'; +import { stripTrailingSlashes } from './strip-trailing-slashes.js'; +const onReadEntryFunction = (opt) => { + const onReadEntry = opt.onReadEntry; + opt.onReadEntry = + onReadEntry ? + e => { + onReadEntry(e); + e.resume(); + } + : e => e.resume(); +}; +// construct a filter that limits the file entries listed +// include child entries if a dir is included +export const filesFilter = (opt, files) => { + const map = new Map(files.map(f => [stripTrailingSlashes(f), true])); + const filter = opt.filter; + const mapHas = (file, r = '') => { + const root = r || parse(file).root || '.'; + let ret; + if (file === root) + ret = false; + else { + const m = map.get(file); + if (m !== undefined) { + ret = m; + } + else { + ret = mapHas(dirname(file), root); + } + } + map.set(file, ret); + return ret; + }; + opt.filter = + filter ? + (file, entry) => filter(file, entry) && mapHas(stripTrailingSlashes(file)) + : file => mapHas(stripTrailingSlashes(file)); +}; +const listFileSync = (opt) => { + const p = new Parser(opt); + const file = opt.file; + let fd; + try { + const stat = fs.statSync(file); + const readSize = opt.maxReadSize || 16 * 1024 * 1024; + if (stat.size < readSize) { + p.end(fs.readFileSync(file)); + } + else { + let pos = 0; + const buf = Buffer.allocUnsafe(readSize); + fd = fs.openSync(file, 'r'); + while (pos < stat.size) { + const bytesRead = fs.readSync(fd, buf, 0, readSize, pos); + pos += bytesRead; + p.write(buf.subarray(0, bytesRead)); + } + p.end(); + } + } + finally { + if (typeof fd === 'number') { + try { + fs.closeSync(fd); + /* c8 ignore next */ + } + catch (er) { } + } + } +}; +const listFile = (opt, _files) => { + const parse = new Parser(opt); + const readSize = opt.maxReadSize || 16 * 1024 * 1024; + const file = opt.file; + const p = new Promise((resolve, reject) => { + parse.on('error', reject); + parse.on('end', resolve); + fs.stat(file, (er, stat) => { + if (er) { + reject(er); + } + else { + const stream = new fsm.ReadStream(file, { + readSize: readSize, + size: stat.size, + }); + stream.on('error', reject); + stream.pipe(parse); + } + }); + }); + return p; +}; +export const list = makeCommand(listFileSync, listFile, opt => new Parser(opt), opt => new Parser(opt), (opt, files) => { + if (files?.length) + filesFilter(opt, files); + if (!opt.noResume) + onReadEntryFunction(opt); +}); +//# sourceMappingURL=list.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/make-command.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/make-command.js new file mode 100644 index 00000000000000..f2f737bca78fd7 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/make-command.js @@ -0,0 +1,57 @@ +import { dealias, isAsyncFile, isAsyncNoFile, isSyncFile, isSyncNoFile, } from './options.js'; +export const makeCommand = (syncFile, asyncFile, syncNoFile, asyncNoFile, validate) => { + return Object.assign((opt_ = [], entries, cb) => { + if (Array.isArray(opt_)) { + entries = opt_; + opt_ = {}; + } + if (typeof entries === 'function') { + cb = entries; + entries = undefined; + } + if (!entries) { + entries = []; + } + else { + entries = Array.from(entries); + } + const opt = dealias(opt_); + validate?.(opt, entries); + if (isSyncFile(opt)) { + if (typeof cb === 'function') { + throw new TypeError('callback not supported for sync tar functions'); + } + return syncFile(opt, entries); + } + else if (isAsyncFile(opt)) { + const p = asyncFile(opt, entries); + // weirdness to make TS happy + const c = cb ? cb : undefined; + return c ? p.then(() => c(), c) : p; + } + else if (isSyncNoFile(opt)) { + if (typeof cb === 'function') { + throw new TypeError('callback not supported for sync tar functions'); + } + return syncNoFile(opt, entries); + } + else if (isAsyncNoFile(opt)) { + if (typeof cb === 'function') { + throw new TypeError('callback only supported with file option'); + } + return asyncNoFile(opt, entries); + /* c8 ignore start */ + } + else { + throw new Error('impossible options??'); + } + /* c8 ignore stop */ + }, { + syncFile, + asyncFile, + syncNoFile, + asyncNoFile, + validate, + }); +}; +//# sourceMappingURL=make-command.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/mkdir.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/mkdir.js new file mode 100644 index 00000000000000..13498ef0082f0b --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/mkdir.js @@ -0,0 +1,201 @@ +import { chownr, chownrSync } from 'chownr'; +import fs from 'fs'; +import { mkdirp, mkdirpSync } from 'mkdirp'; +import path from 'node:path'; +import { CwdError } from './cwd-error.js'; +import { normalizeWindowsPath } from './normalize-windows-path.js'; +import { SymlinkError } from './symlink-error.js'; +const cGet = (cache, key) => cache.get(normalizeWindowsPath(key)); +const cSet = (cache, key, val) => cache.set(normalizeWindowsPath(key), val); +const checkCwd = (dir, cb) => { + fs.stat(dir, (er, st) => { + if (er || !st.isDirectory()) { + er = new CwdError(dir, er?.code || 'ENOTDIR'); + } + cb(er); + }); +}; +/** + * Wrapper around mkdirp for tar's needs. + * + * The main purpose is to avoid creating directories if we know that + * they already exist (and track which ones exist for this purpose), + * and prevent entries from being extracted into symlinked folders, + * if `preservePaths` is not set. + */ +export const mkdir = (dir, opt, cb) => { + dir = normalizeWindowsPath(dir); + // if there's any overlap between mask and mode, + // then we'll need an explicit chmod + /* c8 ignore next */ + const umask = opt.umask ?? 0o22; + const mode = opt.mode | 0o0700; + const needChmod = (mode & umask) !== 0; + const uid = opt.uid; + const gid = opt.gid; + const doChown = typeof uid === 'number' && + typeof gid === 'number' && + (uid !== opt.processUid || gid !== opt.processGid); + const preserve = opt.preserve; + const unlink = opt.unlink; + const cache = opt.cache; + const cwd = normalizeWindowsPath(opt.cwd); + const done = (er, created) => { + if (er) { + cb(er); + } + else { + cSet(cache, dir, true); + if (created && doChown) { + chownr(created, uid, gid, er => done(er)); + } + else if (needChmod) { + fs.chmod(dir, mode, cb); + } + else { + cb(); + } + } + }; + if (cache && cGet(cache, dir) === true) { + return done(); + } + if (dir === cwd) { + return checkCwd(dir, done); + } + if (preserve) { + return mkdirp(dir, { mode }).then(made => done(null, made ?? undefined), // oh, ts + done); + } + const sub = normalizeWindowsPath(path.relative(cwd, dir)); + const parts = sub.split('/'); + mkdir_(cwd, parts, mode, cache, unlink, cwd, undefined, done); +}; +const mkdir_ = (base, parts, mode, cache, unlink, cwd, created, cb) => { + if (!parts.length) { + return cb(null, created); + } + const p = parts.shift(); + const part = normalizeWindowsPath(path.resolve(base + '/' + p)); + if (cGet(cache, part)) { + return mkdir_(part, parts, mode, cache, unlink, cwd, created, cb); + } + fs.mkdir(part, mode, onmkdir(part, parts, mode, cache, unlink, cwd, created, cb)); +}; +const onmkdir = (part, parts, mode, cache, unlink, cwd, created, cb) => (er) => { + if (er) { + fs.lstat(part, (statEr, st) => { + if (statEr) { + statEr.path = + statEr.path && normalizeWindowsPath(statEr.path); + cb(statEr); + } + else if (st.isDirectory()) { + mkdir_(part, parts, mode, cache, unlink, cwd, created, cb); + } + else if (unlink) { + fs.unlink(part, er => { + if (er) { + return cb(er); + } + fs.mkdir(part, mode, onmkdir(part, parts, mode, cache, unlink, cwd, created, cb)); + }); + } + else if (st.isSymbolicLink()) { + return cb(new SymlinkError(part, part + '/' + parts.join('/'))); + } + else { + cb(er); + } + }); + } + else { + created = created || part; + mkdir_(part, parts, mode, cache, unlink, cwd, created, cb); + } +}; +const checkCwdSync = (dir) => { + let ok = false; + let code = undefined; + try { + ok = fs.statSync(dir).isDirectory(); + } + catch (er) { + code = er?.code; + } + finally { + if (!ok) { + throw new CwdError(dir, code ?? 'ENOTDIR'); + } + } +}; +export const mkdirSync = (dir, opt) => { + dir = normalizeWindowsPath(dir); + // if there's any overlap between mask and mode, + // then we'll need an explicit chmod + /* c8 ignore next */ + const umask = opt.umask ?? 0o22; + const mode = opt.mode | 0o700; + const needChmod = (mode & umask) !== 0; + const uid = opt.uid; + const gid = opt.gid; + const doChown = typeof uid === 'number' && + typeof gid === 'number' && + (uid !== opt.processUid || gid !== opt.processGid); + const preserve = opt.preserve; + const unlink = opt.unlink; + const cache = opt.cache; + const cwd = normalizeWindowsPath(opt.cwd); + const done = (created) => { + cSet(cache, dir, true); + if (created && doChown) { + chownrSync(created, uid, gid); + } + if (needChmod) { + fs.chmodSync(dir, mode); + } + }; + if (cache && cGet(cache, dir) === true) { + return done(); + } + if (dir === cwd) { + checkCwdSync(cwd); + return done(); + } + if (preserve) { + return done(mkdirpSync(dir, mode) ?? undefined); + } + const sub = normalizeWindowsPath(path.relative(cwd, dir)); + const parts = sub.split('/'); + let created = undefined; + for (let p = parts.shift(), part = cwd; p && (part += '/' + p); p = parts.shift()) { + part = normalizeWindowsPath(path.resolve(part)); + if (cGet(cache, part)) { + continue; + } + try { + fs.mkdirSync(part, mode); + created = created || part; + cSet(cache, part, true); + } + catch (er) { + const st = fs.lstatSync(part); + if (st.isDirectory()) { + cSet(cache, part, true); + continue; + } + else if (unlink) { + fs.unlinkSync(part); + fs.mkdirSync(part, mode); + created = created || part; + cSet(cache, part, true); + continue; + } + else if (st.isSymbolicLink()) { + return new SymlinkError(part, part + '/' + parts.join('/')); + } + } + } + return done(created); +}; +//# sourceMappingURL=mkdir.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/mode-fix.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/mode-fix.js new file mode 100644 index 00000000000000..5fd3bb88c1cb25 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/mode-fix.js @@ -0,0 +1,25 @@ +export const modeFix = (mode, isDir, portable) => { + mode &= 0o7777; + // in portable mode, use the minimum reasonable umask + // if this system creates files with 0o664 by default + // (as some linux distros do), then we'll write the + // archive with 0o644 instead. Also, don't ever create + // a file that is not readable/writable by the owner. + if (portable) { + mode = (mode | 0o600) & ~0o22; + } + // if dirs are readable, then they should be listable + if (isDir) { + if (mode & 0o400) { + mode |= 0o100; + } + if (mode & 0o40) { + mode |= 0o10; + } + if (mode & 0o4) { + mode |= 0o1; + } + } + return mode; +}; +//# sourceMappingURL=mode-fix.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/normalize-unicode.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/normalize-unicode.js new file mode 100644 index 00000000000000..94e5095476d6e0 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/normalize-unicode.js @@ -0,0 +1,13 @@ +// warning: extremely hot code path. +// This has been meticulously optimized for use +// within npm install on large package trees. +// Do not edit without careful benchmarking. +const normalizeCache = Object.create(null); +const { hasOwnProperty } = Object.prototype; +export const normalizeUnicode = (s) => { + if (!hasOwnProperty.call(normalizeCache, s)) { + normalizeCache[s] = s.normalize('NFD'); + } + return normalizeCache[s]; +}; +//# sourceMappingURL=normalize-unicode.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/normalize-windows-path.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/normalize-windows-path.js new file mode 100644 index 00000000000000..2d97d2b884e627 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/normalize-windows-path.js @@ -0,0 +1,9 @@ +// on windows, either \ or / are valid directory separators. +// on unix, \ is a valid character in filenames. +// so, on windows, and only on windows, we replace all \ chars with /, +// so that we can use / as our one and only directory separator char. +const platform = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform; +export const normalizeWindowsPath = platform !== 'win32' ? + (p) => p + : (p) => p && p.replace(/\\/g, '/'); +//# sourceMappingURL=normalize-windows-path.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/options.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/options.js new file mode 100644 index 00000000000000..a006d36c23c923 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/options.js @@ -0,0 +1,54 @@ +// turn tar(1) style args like `C` into the more verbose things like `cwd` +const argmap = new Map([ + ['C', 'cwd'], + ['f', 'file'], + ['z', 'gzip'], + ['P', 'preservePaths'], + ['U', 'unlink'], + ['strip-components', 'strip'], + ['stripComponents', 'strip'], + ['keep-newer', 'newer'], + ['keepNewer', 'newer'], + ['keep-newer-files', 'newer'], + ['keepNewerFiles', 'newer'], + ['k', 'keep'], + ['keep-existing', 'keep'], + ['keepExisting', 'keep'], + ['m', 'noMtime'], + ['no-mtime', 'noMtime'], + ['p', 'preserveOwner'], + ['L', 'follow'], + ['h', 'follow'], + ['onentry', 'onReadEntry'], +]); +export const isSyncFile = (o) => !!o.sync && !!o.file; +export const isAsyncFile = (o) => !o.sync && !!o.file; +export const isSyncNoFile = (o) => !!o.sync && !o.file; +export const isAsyncNoFile = (o) => !o.sync && !o.file; +export const isSync = (o) => !!o.sync; +export const isAsync = (o) => !o.sync; +export const isFile = (o) => !!o.file; +export const isNoFile = (o) => !o.file; +const dealiasKey = (k) => { + const d = argmap.get(k); + if (d) + return d; + return k; +}; +export const dealias = (opt = {}) => { + if (!opt) + return {}; + const result = {}; + for (const [key, v] of Object.entries(opt)) { + // TS doesn't know that aliases are going to always be the same type + const k = dealiasKey(key); + result[k] = v; + } + // affordance for deprecated noChmod -> chmod + if (result.chmod === undefined && result.noChmod === false) { + result.chmod = true; + } + delete result.noChmod; + return result; +}; +//# sourceMappingURL=options.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/pack.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/pack.js new file mode 100644 index 00000000000000..f59f32f94201fa --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/pack.js @@ -0,0 +1,445 @@ +// A readable tar stream creator +// Technically, this is a transform stream that you write paths into, +// and tar format comes out of. +// The `add()` method is like `write()` but returns this, +// and end() return `this` as well, so you can +// do `new Pack(opt).add('files').add('dir').end().pipe(output) +// You could also do something like: +// streamOfPaths().pipe(new Pack()).pipe(new fs.WriteStream('out.tar')) +import fs from 'fs'; +import { WriteEntry, WriteEntrySync, WriteEntryTar, } from './write-entry.js'; +export class PackJob { + path; + absolute; + entry; + stat; + readdir; + pending = false; + ignore = false; + piped = false; + constructor(path, absolute) { + this.path = path || './'; + this.absolute = absolute; + } +} +import { Minipass } from 'minipass'; +import * as zlib from 'minizlib'; +import { Yallist } from 'yallist'; +import { ReadEntry } from './read-entry.js'; +import { warnMethod, } from './warn-method.js'; +const EOF = Buffer.alloc(1024); +const ONSTAT = Symbol('onStat'); +const ENDED = Symbol('ended'); +const QUEUE = Symbol('queue'); +const CURRENT = Symbol('current'); +const PROCESS = Symbol('process'); +const PROCESSING = Symbol('processing'); +const PROCESSJOB = Symbol('processJob'); +const JOBS = Symbol('jobs'); +const JOBDONE = Symbol('jobDone'); +const ADDFSENTRY = Symbol('addFSEntry'); +const ADDTARENTRY = Symbol('addTarEntry'); +const STAT = Symbol('stat'); +const READDIR = Symbol('readdir'); +const ONREADDIR = Symbol('onreaddir'); +const PIPE = Symbol('pipe'); +const ENTRY = Symbol('entry'); +const ENTRYOPT = Symbol('entryOpt'); +const WRITEENTRYCLASS = Symbol('writeEntryClass'); +const WRITE = Symbol('write'); +const ONDRAIN = Symbol('ondrain'); +import path from 'path'; +import { normalizeWindowsPath } from './normalize-windows-path.js'; +export class Pack extends Minipass { + opt; + cwd; + maxReadSize; + preservePaths; + strict; + noPax; + prefix; + linkCache; + statCache; + file; + portable; + zip; + readdirCache; + noDirRecurse; + follow; + noMtime; + mtime; + filter; + jobs; + [WRITEENTRYCLASS]; + onWriteEntry; + [QUEUE]; + [JOBS] = 0; + [PROCESSING] = false; + [ENDED] = false; + constructor(opt = {}) { + //@ts-ignore + super(); + this.opt = opt; + this.file = opt.file || ''; + this.cwd = opt.cwd || process.cwd(); + this.maxReadSize = opt.maxReadSize; + this.preservePaths = !!opt.preservePaths; + this.strict = !!opt.strict; + this.noPax = !!opt.noPax; + this.prefix = normalizeWindowsPath(opt.prefix || ''); + this.linkCache = opt.linkCache || new Map(); + this.statCache = opt.statCache || new Map(); + this.readdirCache = opt.readdirCache || new Map(); + this.onWriteEntry = opt.onWriteEntry; + this[WRITEENTRYCLASS] = WriteEntry; + if (typeof opt.onwarn === 'function') { + this.on('warn', opt.onwarn); + } + this.portable = !!opt.portable; + if (opt.gzip || opt.brotli) { + if (opt.gzip && opt.brotli) { + throw new TypeError('gzip and brotli are mutually exclusive'); + } + if (opt.gzip) { + if (typeof opt.gzip !== 'object') { + opt.gzip = {}; + } + if (this.portable) { + opt.gzip.portable = true; + } + this.zip = new zlib.Gzip(opt.gzip); + } + if (opt.brotli) { + if (typeof opt.brotli !== 'object') { + opt.brotli = {}; + } + this.zip = new zlib.BrotliCompress(opt.brotli); + } + /* c8 ignore next */ + if (!this.zip) + throw new Error('impossible'); + const zip = this.zip; + zip.on('data', chunk => super.write(chunk)); + zip.on('end', () => super.end()); + zip.on('drain', () => this[ONDRAIN]()); + this.on('resume', () => zip.resume()); + } + else { + this.on('drain', this[ONDRAIN]); + } + this.noDirRecurse = !!opt.noDirRecurse; + this.follow = !!opt.follow; + this.noMtime = !!opt.noMtime; + if (opt.mtime) + this.mtime = opt.mtime; + this.filter = + typeof opt.filter === 'function' ? opt.filter : () => true; + this[QUEUE] = new Yallist(); + this[JOBS] = 0; + this.jobs = Number(opt.jobs) || 4; + this[PROCESSING] = false; + this[ENDED] = false; + } + [WRITE](chunk) { + return super.write(chunk); + } + add(path) { + this.write(path); + return this; + } + end(path, encoding, cb) { + /* c8 ignore start */ + if (typeof path === 'function') { + cb = path; + path = undefined; + } + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + /* c8 ignore stop */ + if (path) { + this.add(path); + } + this[ENDED] = true; + this[PROCESS](); + /* c8 ignore next */ + if (cb) + cb(); + return this; + } + write(path) { + if (this[ENDED]) { + throw new Error('write after end'); + } + if (path instanceof ReadEntry) { + this[ADDTARENTRY](path); + } + else { + this[ADDFSENTRY](path); + } + return this.flowing; + } + [ADDTARENTRY](p) { + const absolute = normalizeWindowsPath(path.resolve(this.cwd, p.path)); + // in this case, we don't have to wait for the stat + if (!this.filter(p.path, p)) { + p.resume(); + } + else { + const job = new PackJob(p.path, absolute); + job.entry = new WriteEntryTar(p, this[ENTRYOPT](job)); + job.entry.on('end', () => this[JOBDONE](job)); + this[JOBS] += 1; + this[QUEUE].push(job); + } + this[PROCESS](); + } + [ADDFSENTRY](p) { + const absolute = normalizeWindowsPath(path.resolve(this.cwd, p)); + this[QUEUE].push(new PackJob(p, absolute)); + this[PROCESS](); + } + [STAT](job) { + job.pending = true; + this[JOBS] += 1; + const stat = this.follow ? 'stat' : 'lstat'; + fs[stat](job.absolute, (er, stat) => { + job.pending = false; + this[JOBS] -= 1; + if (er) { + this.emit('error', er); + } + else { + this[ONSTAT](job, stat); + } + }); + } + [ONSTAT](job, stat) { + this.statCache.set(job.absolute, stat); + job.stat = stat; + // now we have the stat, we can filter it. + if (!this.filter(job.path, stat)) { + job.ignore = true; + } + this[PROCESS](); + } + [READDIR](job) { + job.pending = true; + this[JOBS] += 1; + fs.readdir(job.absolute, (er, entries) => { + job.pending = false; + this[JOBS] -= 1; + if (er) { + return this.emit('error', er); + } + this[ONREADDIR](job, entries); + }); + } + [ONREADDIR](job, entries) { + this.readdirCache.set(job.absolute, entries); + job.readdir = entries; + this[PROCESS](); + } + [PROCESS]() { + if (this[PROCESSING]) { + return; + } + this[PROCESSING] = true; + for (let w = this[QUEUE].head; !!w && this[JOBS] < this.jobs; w = w.next) { + this[PROCESSJOB](w.value); + if (w.value.ignore) { + const p = w.next; + this[QUEUE].removeNode(w); + w.next = p; + } + } + this[PROCESSING] = false; + if (this[ENDED] && !this[QUEUE].length && this[JOBS] === 0) { + if (this.zip) { + this.zip.end(EOF); + } + else { + super.write(EOF); + super.end(); + } + } + } + get [CURRENT]() { + return this[QUEUE] && this[QUEUE].head && this[QUEUE].head.value; + } + [JOBDONE](_job) { + this[QUEUE].shift(); + this[JOBS] -= 1; + this[PROCESS](); + } + [PROCESSJOB](job) { + if (job.pending) { + return; + } + if (job.entry) { + if (job === this[CURRENT] && !job.piped) { + this[PIPE](job); + } + return; + } + if (!job.stat) { + const sc = this.statCache.get(job.absolute); + if (sc) { + this[ONSTAT](job, sc); + } + else { + this[STAT](job); + } + } + if (!job.stat) { + return; + } + // filtered out! + if (job.ignore) { + return; + } + if (!this.noDirRecurse && + job.stat.isDirectory() && + !job.readdir) { + const rc = this.readdirCache.get(job.absolute); + if (rc) { + this[ONREADDIR](job, rc); + } + else { + this[READDIR](job); + } + if (!job.readdir) { + return; + } + } + // we know it doesn't have an entry, because that got checked above + job.entry = this[ENTRY](job); + if (!job.entry) { + job.ignore = true; + return; + } + if (job === this[CURRENT] && !job.piped) { + this[PIPE](job); + } + } + [ENTRYOPT](job) { + return { + onwarn: (code, msg, data) => this.warn(code, msg, data), + noPax: this.noPax, + cwd: this.cwd, + absolute: job.absolute, + preservePaths: this.preservePaths, + maxReadSize: this.maxReadSize, + strict: this.strict, + portable: this.portable, + linkCache: this.linkCache, + statCache: this.statCache, + noMtime: this.noMtime, + mtime: this.mtime, + prefix: this.prefix, + onWriteEntry: this.onWriteEntry, + }; + } + [ENTRY](job) { + this[JOBS] += 1; + try { + const e = new this[WRITEENTRYCLASS](job.path, this[ENTRYOPT](job)); + return e + .on('end', () => this[JOBDONE](job)) + .on('error', er => this.emit('error', er)); + } + catch (er) { + this.emit('error', er); + } + } + [ONDRAIN]() { + if (this[CURRENT] && this[CURRENT].entry) { + this[CURRENT].entry.resume(); + } + } + // like .pipe() but using super, because our write() is special + [PIPE](job) { + job.piped = true; + if (job.readdir) { + job.readdir.forEach(entry => { + const p = job.path; + const base = p === './' ? '' : p.replace(/\/*$/, '/'); + this[ADDFSENTRY](base + entry); + }); + } + const source = job.entry; + const zip = this.zip; + /* c8 ignore start */ + if (!source) + throw new Error('cannot pipe without source'); + /* c8 ignore stop */ + if (zip) { + source.on('data', chunk => { + if (!zip.write(chunk)) { + source.pause(); + } + }); + } + else { + source.on('data', chunk => { + if (!super.write(chunk)) { + source.pause(); + } + }); + } + } + pause() { + if (this.zip) { + this.zip.pause(); + } + return super.pause(); + } + warn(code, message, data = {}) { + warnMethod(this, code, message, data); + } +} +export class PackSync extends Pack { + sync = true; + constructor(opt) { + super(opt); + this[WRITEENTRYCLASS] = WriteEntrySync; + } + // pause/resume are no-ops in sync streams. + pause() { } + resume() { } + [STAT](job) { + const stat = this.follow ? 'statSync' : 'lstatSync'; + this[ONSTAT](job, fs[stat](job.absolute)); + } + [READDIR](job) { + this[ONREADDIR](job, fs.readdirSync(job.absolute)); + } + // gotta get it all in this tick + [PIPE](job) { + const source = job.entry; + const zip = this.zip; + if (job.readdir) { + job.readdir.forEach(entry => { + const p = job.path; + const base = p === './' ? '' : p.replace(/\/*$/, '/'); + this[ADDFSENTRY](base + entry); + }); + } + /* c8 ignore start */ + if (!source) + throw new Error('Cannot pipe without source'); + /* c8 ignore stop */ + if (zip) { + source.on('data', chunk => { + zip.write(chunk); + }); + } + else { + source.on('data', chunk => { + super[WRITE](chunk); + }); + } + } +} +//# sourceMappingURL=pack.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/package.json b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/package.json new file mode 100644 index 00000000000000..3dbc1ca591c055 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/package.json @@ -0,0 +1,3 @@ +{ + "type": "module" +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/parse.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/parse.js new file mode 100644 index 00000000000000..f2c802e6eef04d --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/parse.js @@ -0,0 +1,595 @@ +// this[BUFFER] is the remainder of a chunk if we're waiting for +// the full 512 bytes of a header to come in. We will Buffer.concat() +// it to the next write(), which is a mem copy, but a small one. +// +// this[QUEUE] is a Yallist of entries that haven't been emitted +// yet this can only get filled up if the user keeps write()ing after +// a write() returns false, or does a write() with more than one entry +// +// We don't buffer chunks, we always parse them and either create an +// entry, or push it into the active entry. The ReadEntry class knows +// to throw data away if .ignore=true +// +// Shift entry off the buffer when it emits 'end', and emit 'entry' for +// the next one in the list. +// +// At any time, we're pushing body chunks into the entry at WRITEENTRY, +// and waiting for 'end' on the entry at READENTRY +// +// ignored entries get .resume() called on them straight away +import { EventEmitter as EE } from 'events'; +import { BrotliDecompress, Unzip } from 'minizlib'; +import { Yallist } from 'yallist'; +import { Header } from './header.js'; +import { Pax } from './pax.js'; +import { ReadEntry } from './read-entry.js'; +import { warnMethod, } from './warn-method.js'; +const maxMetaEntrySize = 1024 * 1024; +const gzipHeader = Buffer.from([0x1f, 0x8b]); +const STATE = Symbol('state'); +const WRITEENTRY = Symbol('writeEntry'); +const READENTRY = Symbol('readEntry'); +const NEXTENTRY = Symbol('nextEntry'); +const PROCESSENTRY = Symbol('processEntry'); +const EX = Symbol('extendedHeader'); +const GEX = Symbol('globalExtendedHeader'); +const META = Symbol('meta'); +const EMITMETA = Symbol('emitMeta'); +const BUFFER = Symbol('buffer'); +const QUEUE = Symbol('queue'); +const ENDED = Symbol('ended'); +const EMITTEDEND = Symbol('emittedEnd'); +const EMIT = Symbol('emit'); +const UNZIP = Symbol('unzip'); +const CONSUMECHUNK = Symbol('consumeChunk'); +const CONSUMECHUNKSUB = Symbol('consumeChunkSub'); +const CONSUMEBODY = Symbol('consumeBody'); +const CONSUMEMETA = Symbol('consumeMeta'); +const CONSUMEHEADER = Symbol('consumeHeader'); +const CONSUMING = Symbol('consuming'); +const BUFFERCONCAT = Symbol('bufferConcat'); +const MAYBEEND = Symbol('maybeEnd'); +const WRITING = Symbol('writing'); +const ABORTED = Symbol('aborted'); +const DONE = Symbol('onDone'); +const SAW_VALID_ENTRY = Symbol('sawValidEntry'); +const SAW_NULL_BLOCK = Symbol('sawNullBlock'); +const SAW_EOF = Symbol('sawEOF'); +const CLOSESTREAM = Symbol('closeStream'); +const noop = () => true; +export class Parser extends EE { + file; + strict; + maxMetaEntrySize; + filter; + brotli; + writable = true; + readable = false; + [QUEUE] = new Yallist(); + [BUFFER]; + [READENTRY]; + [WRITEENTRY]; + [STATE] = 'begin'; + [META] = ''; + [EX]; + [GEX]; + [ENDED] = false; + [UNZIP]; + [ABORTED] = false; + [SAW_VALID_ENTRY]; + [SAW_NULL_BLOCK] = false; + [SAW_EOF] = false; + [WRITING] = false; + [CONSUMING] = false; + [EMITTEDEND] = false; + constructor(opt = {}) { + super(); + this.file = opt.file || ''; + // these BADARCHIVE errors can't be detected early. listen on DONE. + this.on(DONE, () => { + if (this[STATE] === 'begin' || + this[SAW_VALID_ENTRY] === false) { + // either less than 1 block of data, or all entries were invalid. + // Either way, probably not even a tarball. + this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format'); + } + }); + if (opt.ondone) { + this.on(DONE, opt.ondone); + } + else { + this.on(DONE, () => { + this.emit('prefinish'); + this.emit('finish'); + this.emit('end'); + }); + } + this.strict = !!opt.strict; + this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize; + this.filter = typeof opt.filter === 'function' ? opt.filter : noop; + // Unlike gzip, brotli doesn't have any magic bytes to identify it + // Users need to explicitly tell us they're extracting a brotli file + // Or we infer from the file extension + const isTBR = opt.file && + (opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')); + // if it's a tbr file it MIGHT be brotli, but we don't know until + // we look at it and verify it's not a valid tar file. + this.brotli = + !opt.gzip && opt.brotli !== undefined ? opt.brotli + : isTBR ? undefined + : false; + // have to set this so that streams are ok piping into it + this.on('end', () => this[CLOSESTREAM]()); + if (typeof opt.onwarn === 'function') { + this.on('warn', opt.onwarn); + } + if (typeof opt.onReadEntry === 'function') { + this.on('entry', opt.onReadEntry); + } + } + warn(code, message, data = {}) { + warnMethod(this, code, message, data); + } + [CONSUMEHEADER](chunk, position) { + if (this[SAW_VALID_ENTRY] === undefined) { + this[SAW_VALID_ENTRY] = false; + } + let header; + try { + header = new Header(chunk, position, this[EX], this[GEX]); + } + catch (er) { + return this.warn('TAR_ENTRY_INVALID', er); + } + if (header.nullBlock) { + if (this[SAW_NULL_BLOCK]) { + this[SAW_EOF] = true; + // ending an archive with no entries. pointless, but legal. + if (this[STATE] === 'begin') { + this[STATE] = 'header'; + } + this[EMIT]('eof'); + } + else { + this[SAW_NULL_BLOCK] = true; + this[EMIT]('nullBlock'); + } + } + else { + this[SAW_NULL_BLOCK] = false; + if (!header.cksumValid) { + this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header }); + } + else if (!header.path) { + this.warn('TAR_ENTRY_INVALID', 'path is required', { header }); + } + else { + const type = header.type; + if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) { + this.warn('TAR_ENTRY_INVALID', 'linkpath required', { + header, + }); + } + else if (!/^(Symbolic)?Link$/.test(type) && + !/^(Global)?ExtendedHeader$/.test(type) && + header.linkpath) { + this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', { + header, + }); + } + else { + const entry = (this[WRITEENTRY] = new ReadEntry(header, this[EX], this[GEX])); + // we do this for meta & ignored entries as well, because they + // are still valid tar, or else we wouldn't know to ignore them + if (!this[SAW_VALID_ENTRY]) { + if (entry.remain) { + // this might be the one! + const onend = () => { + if (!entry.invalid) { + this[SAW_VALID_ENTRY] = true; + } + }; + entry.on('end', onend); + } + else { + this[SAW_VALID_ENTRY] = true; + } + } + if (entry.meta) { + if (entry.size > this.maxMetaEntrySize) { + entry.ignore = true; + this[EMIT]('ignoredEntry', entry); + this[STATE] = 'ignore'; + entry.resume(); + } + else if (entry.size > 0) { + this[META] = ''; + entry.on('data', c => (this[META] += c)); + this[STATE] = 'meta'; + } + } + else { + this[EX] = undefined; + entry.ignore = + entry.ignore || !this.filter(entry.path, entry); + if (entry.ignore) { + // probably valid, just not something we care about + this[EMIT]('ignoredEntry', entry); + this[STATE] = entry.remain ? 'ignore' : 'header'; + entry.resume(); + } + else { + if (entry.remain) { + this[STATE] = 'body'; + } + else { + this[STATE] = 'header'; + entry.end(); + } + if (!this[READENTRY]) { + this[QUEUE].push(entry); + this[NEXTENTRY](); + } + else { + this[QUEUE].push(entry); + } + } + } + } + } + } + } + [CLOSESTREAM]() { + queueMicrotask(() => this.emit('close')); + } + [PROCESSENTRY](entry) { + let go = true; + if (!entry) { + this[READENTRY] = undefined; + go = false; + } + else if (Array.isArray(entry)) { + const [ev, ...args] = entry; + this.emit(ev, ...args); + } + else { + this[READENTRY] = entry; + this.emit('entry', entry); + if (!entry.emittedEnd) { + entry.on('end', () => this[NEXTENTRY]()); + go = false; + } + } + return go; + } + [NEXTENTRY]() { + do { } while (this[PROCESSENTRY](this[QUEUE].shift())); + if (!this[QUEUE].length) { + // At this point, there's nothing in the queue, but we may have an + // entry which is being consumed (readEntry). + // If we don't, then we definitely can handle more data. + // If we do, and either it's flowing, or it has never had any data + // written to it, then it needs more. + // The only other possibility is that it has returned false from a + // write() call, so we wait for the next drain to continue. + const re = this[READENTRY]; + const drainNow = !re || re.flowing || re.size === re.remain; + if (drainNow) { + if (!this[WRITING]) { + this.emit('drain'); + } + } + else { + re.once('drain', () => this.emit('drain')); + } + } + } + [CONSUMEBODY](chunk, position) { + // write up to but no more than writeEntry.blockRemain + const entry = this[WRITEENTRY]; + /* c8 ignore start */ + if (!entry) { + throw new Error('attempt to consume body without entry??'); + } + const br = entry.blockRemain ?? 0; + /* c8 ignore stop */ + const c = br >= chunk.length && position === 0 ? + chunk + : chunk.subarray(position, position + br); + entry.write(c); + if (!entry.blockRemain) { + this[STATE] = 'header'; + this[WRITEENTRY] = undefined; + entry.end(); + } + return c.length; + } + [CONSUMEMETA](chunk, position) { + const entry = this[WRITEENTRY]; + const ret = this[CONSUMEBODY](chunk, position); + // if we finished, then the entry is reset + if (!this[WRITEENTRY] && entry) { + this[EMITMETA](entry); + } + return ret; + } + [EMIT](ev, data, extra) { + if (!this[QUEUE].length && !this[READENTRY]) { + this.emit(ev, data, extra); + } + else { + this[QUEUE].push([ev, data, extra]); + } + } + [EMITMETA](entry) { + this[EMIT]('meta', this[META]); + switch (entry.type) { + case 'ExtendedHeader': + case 'OldExtendedHeader': + this[EX] = Pax.parse(this[META], this[EX], false); + break; + case 'GlobalExtendedHeader': + this[GEX] = Pax.parse(this[META], this[GEX], true); + break; + case 'NextFileHasLongPath': + case 'OldGnuLongPath': { + const ex = this[EX] ?? Object.create(null); + this[EX] = ex; + ex.path = this[META].replace(/\0.*/, ''); + break; + } + case 'NextFileHasLongLinkpath': { + const ex = this[EX] || Object.create(null); + this[EX] = ex; + ex.linkpath = this[META].replace(/\0.*/, ''); + break; + } + /* c8 ignore start */ + default: + throw new Error('unknown meta: ' + entry.type); + /* c8 ignore stop */ + } + } + abort(error) { + this[ABORTED] = true; + this.emit('abort', error); + // always throws, even in non-strict mode + this.warn('TAR_ABORT', error, { recoverable: false }); + } + write(chunk, encoding, cb) { + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, + /* c8 ignore next */ + typeof encoding === 'string' ? encoding : 'utf8'); + } + if (this[ABORTED]) { + /* c8 ignore next */ + cb?.(); + return false; + } + // first write, might be gzipped + const needSniff = this[UNZIP] === undefined || + (this.brotli === undefined && this[UNZIP] === false); + if (needSniff && chunk) { + if (this[BUFFER]) { + chunk = Buffer.concat([this[BUFFER], chunk]); + this[BUFFER] = undefined; + } + if (chunk.length < gzipHeader.length) { + this[BUFFER] = chunk; + /* c8 ignore next */ + cb?.(); + return true; + } + // look for gzip header + for (let i = 0; this[UNZIP] === undefined && i < gzipHeader.length; i++) { + if (chunk[i] !== gzipHeader[i]) { + this[UNZIP] = false; + } + } + const maybeBrotli = this.brotli === undefined; + if (this[UNZIP] === false && maybeBrotli) { + // read the first header to see if it's a valid tar file. If so, + // we can safely assume that it's not actually brotli, despite the + // .tbr or .tar.br file extension. + // if we ended before getting a full chunk, yes, def brotli + if (chunk.length < 512) { + if (this[ENDED]) { + this.brotli = true; + } + else { + this[BUFFER] = chunk; + /* c8 ignore next */ + cb?.(); + return true; + } + } + else { + // if it's tar, it's pretty reliably not brotli, chances of + // that happening are astronomical. + try { + new Header(chunk.subarray(0, 512)); + this.brotli = false; + } + catch (_) { + this.brotli = true; + } + } + } + if (this[UNZIP] === undefined || + (this[UNZIP] === false && this.brotli)) { + const ended = this[ENDED]; + this[ENDED] = false; + this[UNZIP] = + this[UNZIP] === undefined ? + new Unzip({}) + : new BrotliDecompress({}); + this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk)); + this[UNZIP].on('error', er => this.abort(er)); + this[UNZIP].on('end', () => { + this[ENDED] = true; + this[CONSUMECHUNK](); + }); + this[WRITING] = true; + const ret = !!this[UNZIP][ended ? 'end' : 'write'](chunk); + this[WRITING] = false; + cb?.(); + return ret; + } + } + this[WRITING] = true; + if (this[UNZIP]) { + this[UNZIP].write(chunk); + } + else { + this[CONSUMECHUNK](chunk); + } + this[WRITING] = false; + // return false if there's a queue, or if the current entry isn't flowing + const ret = this[QUEUE].length ? false + : this[READENTRY] ? this[READENTRY].flowing + : true; + // if we have no queue, then that means a clogged READENTRY + if (!ret && !this[QUEUE].length) { + this[READENTRY]?.once('drain', () => this.emit('drain')); + } + /* c8 ignore next */ + cb?.(); + return ret; + } + [BUFFERCONCAT](c) { + if (c && !this[ABORTED]) { + this[BUFFER] = + this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c; + } + } + [MAYBEEND]() { + if (this[ENDED] && + !this[EMITTEDEND] && + !this[ABORTED] && + !this[CONSUMING]) { + this[EMITTEDEND] = true; + const entry = this[WRITEENTRY]; + if (entry && entry.blockRemain) { + // truncated, likely a damaged file + const have = this[BUFFER] ? this[BUFFER].length : 0; + this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${entry.blockRemain} more bytes, only ${have} available)`, { entry }); + if (this[BUFFER]) { + entry.write(this[BUFFER]); + } + entry.end(); + } + this[EMIT](DONE); + } + } + [CONSUMECHUNK](chunk) { + if (this[CONSUMING] && chunk) { + this[BUFFERCONCAT](chunk); + } + else if (!chunk && !this[BUFFER]) { + this[MAYBEEND](); + } + else if (chunk) { + this[CONSUMING] = true; + if (this[BUFFER]) { + this[BUFFERCONCAT](chunk); + const c = this[BUFFER]; + this[BUFFER] = undefined; + this[CONSUMECHUNKSUB](c); + } + else { + this[CONSUMECHUNKSUB](chunk); + } + while (this[BUFFER] && + this[BUFFER]?.length >= 512 && + !this[ABORTED] && + !this[SAW_EOF]) { + const c = this[BUFFER]; + this[BUFFER] = undefined; + this[CONSUMECHUNKSUB](c); + } + this[CONSUMING] = false; + } + if (!this[BUFFER] || this[ENDED]) { + this[MAYBEEND](); + } + } + [CONSUMECHUNKSUB](chunk) { + // we know that we are in CONSUMING mode, so anything written goes into + // the buffer. Advance the position and put any remainder in the buffer. + let position = 0; + const length = chunk.length; + while (position + 512 <= length && + !this[ABORTED] && + !this[SAW_EOF]) { + switch (this[STATE]) { + case 'begin': + case 'header': + this[CONSUMEHEADER](chunk, position); + position += 512; + break; + case 'ignore': + case 'body': + position += this[CONSUMEBODY](chunk, position); + break; + case 'meta': + position += this[CONSUMEMETA](chunk, position); + break; + /* c8 ignore start */ + default: + throw new Error('invalid state: ' + this[STATE]); + /* c8 ignore stop */ + } + } + if (position < length) { + if (this[BUFFER]) { + this[BUFFER] = Buffer.concat([ + chunk.subarray(position), + this[BUFFER], + ]); + } + else { + this[BUFFER] = chunk.subarray(position); + } + } + } + end(chunk, encoding, cb) { + if (typeof chunk === 'function') { + cb = chunk; + encoding = undefined; + chunk = undefined; + } + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, encoding); + } + if (cb) + this.once('finish', cb); + if (!this[ABORTED]) { + if (this[UNZIP]) { + /* c8 ignore start */ + if (chunk) + this[UNZIP].write(chunk); + /* c8 ignore stop */ + this[UNZIP].end(); + } + else { + this[ENDED] = true; + if (this.brotli === undefined) + chunk = chunk || Buffer.alloc(0); + if (chunk) + this.write(chunk); + this[MAYBEEND](); + } + } + return this; + } +} +//# sourceMappingURL=parse.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/path-reservations.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/path-reservations.js new file mode 100644 index 00000000000000..e63b9c91e9a808 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/path-reservations.js @@ -0,0 +1,166 @@ +// A path exclusive reservation system +// reserve([list, of, paths], fn) +// When the fn is first in line for all its paths, it +// is called with a cb that clears the reservation. +// +// Used by async unpack to avoid clobbering paths in use, +// while still allowing maximal safe parallelization. +import { join } from 'node:path'; +import { normalizeUnicode } from './normalize-unicode.js'; +import { stripTrailingSlashes } from './strip-trailing-slashes.js'; +const platform = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform; +const isWindows = platform === 'win32'; +// return a set of parent dirs for a given path +// '/a/b/c/d' -> ['/', '/a', '/a/b', '/a/b/c', '/a/b/c/d'] +const getDirs = (path) => { + const dirs = path + .split('/') + .slice(0, -1) + .reduce((set, path) => { + const s = set[set.length - 1]; + if (s !== undefined) { + path = join(s, path); + } + set.push(path || '/'); + return set; + }, []); + return dirs; +}; +export class PathReservations { + // path => [function or Set] + // A Set object means a directory reservation + // A fn is a direct reservation on that path + #queues = new Map(); + // fn => {paths:[path,...], dirs:[path, ...]} + #reservations = new Map(); + // functions currently running + #running = new Set(); + reserve(paths, fn) { + paths = + isWindows ? + ['win32 parallelization disabled'] + : paths.map(p => { + // don't need normPath, because we skip this entirely for windows + return stripTrailingSlashes(join(normalizeUnicode(p))).toLowerCase(); + }); + const dirs = new Set(paths.map(path => getDirs(path)).reduce((a, b) => a.concat(b))); + this.#reservations.set(fn, { dirs, paths }); + for (const p of paths) { + const q = this.#queues.get(p); + if (!q) { + this.#queues.set(p, [fn]); + } + else { + q.push(fn); + } + } + for (const dir of dirs) { + const q = this.#queues.get(dir); + if (!q) { + this.#queues.set(dir, [new Set([fn])]); + } + else { + const l = q[q.length - 1]; + if (l instanceof Set) { + l.add(fn); + } + else { + q.push(new Set([fn])); + } + } + } + return this.#run(fn); + } + // return the queues for each path the function cares about + // fn => {paths, dirs} + #getQueues(fn) { + const res = this.#reservations.get(fn); + /* c8 ignore start */ + if (!res) { + throw new Error('function does not have any path reservations'); + } + /* c8 ignore stop */ + return { + paths: res.paths.map((path) => this.#queues.get(path)), + dirs: [...res.dirs].map(path => this.#queues.get(path)), + }; + } + // check if fn is first in line for all its paths, and is + // included in the first set for all its dir queues + check(fn) { + const { paths, dirs } = this.#getQueues(fn); + return (paths.every(q => q && q[0] === fn) && + dirs.every(q => q && q[0] instanceof Set && q[0].has(fn))); + } + // run the function if it's first in line and not already running + #run(fn) { + if (this.#running.has(fn) || !this.check(fn)) { + return false; + } + this.#running.add(fn); + fn(() => this.#clear(fn)); + return true; + } + #clear(fn) { + if (!this.#running.has(fn)) { + return false; + } + const res = this.#reservations.get(fn); + /* c8 ignore start */ + if (!res) { + throw new Error('invalid reservation'); + } + /* c8 ignore stop */ + const { paths, dirs } = res; + const next = new Set(); + for (const path of paths) { + const q = this.#queues.get(path); + /* c8 ignore start */ + if (!q || q?.[0] !== fn) { + continue; + } + /* c8 ignore stop */ + const q0 = q[1]; + if (!q0) { + this.#queues.delete(path); + continue; + } + q.shift(); + if (typeof q0 === 'function') { + next.add(q0); + } + else { + for (const f of q0) { + next.add(f); + } + } + } + for (const dir of dirs) { + const q = this.#queues.get(dir); + const q0 = q?.[0]; + /* c8 ignore next - type safety only */ + if (!q || !(q0 instanceof Set)) + continue; + if (q0.size === 1 && q.length === 1) { + this.#queues.delete(dir); + continue; + } + else if (q0.size === 1) { + q.shift(); + // next one must be a function, + // or else the Set would've been reused + const n = q[0]; + if (typeof n === 'function') { + next.add(n); + } + } + else { + q0.delete(fn); + } + } + this.#running.delete(fn); + next.forEach(fn => this.#run(fn)); + return true; + } +} +//# sourceMappingURL=path-reservations.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/pax.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/pax.js new file mode 100644 index 00000000000000..832808f344da53 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/pax.js @@ -0,0 +1,154 @@ +import { basename } from 'node:path'; +import { Header } from './header.js'; +export class Pax { + atime; + mtime; + ctime; + charset; + comment; + gid; + uid; + gname; + uname; + linkpath; + dev; + ino; + nlink; + path; + size; + mode; + global; + constructor(obj, global = false) { + this.atime = obj.atime; + this.charset = obj.charset; + this.comment = obj.comment; + this.ctime = obj.ctime; + this.dev = obj.dev; + this.gid = obj.gid; + this.global = global; + this.gname = obj.gname; + this.ino = obj.ino; + this.linkpath = obj.linkpath; + this.mtime = obj.mtime; + this.nlink = obj.nlink; + this.path = obj.path; + this.size = obj.size; + this.uid = obj.uid; + this.uname = obj.uname; + } + encode() { + const body = this.encodeBody(); + if (body === '') { + return Buffer.allocUnsafe(0); + } + const bodyLen = Buffer.byteLength(body); + // round up to 512 bytes + // add 512 for header + const bufLen = 512 * Math.ceil(1 + bodyLen / 512); + const buf = Buffer.allocUnsafe(bufLen); + // 0-fill the header section, it might not hit every field + for (let i = 0; i < 512; i++) { + buf[i] = 0; + } + new Header({ + // XXX split the path + // then the path should be PaxHeader + basename, but less than 99, + // prepend with the dirname + /* c8 ignore start */ + path: ('PaxHeader/' + basename(this.path ?? '')).slice(0, 99), + /* c8 ignore stop */ + mode: this.mode || 0o644, + uid: this.uid, + gid: this.gid, + size: bodyLen, + mtime: this.mtime, + type: this.global ? 'GlobalExtendedHeader' : 'ExtendedHeader', + linkpath: '', + uname: this.uname || '', + gname: this.gname || '', + devmaj: 0, + devmin: 0, + atime: this.atime, + ctime: this.ctime, + }).encode(buf); + buf.write(body, 512, bodyLen, 'utf8'); + // null pad after the body + for (let i = bodyLen + 512; i < buf.length; i++) { + buf[i] = 0; + } + return buf; + } + encodeBody() { + return (this.encodeField('path') + + this.encodeField('ctime') + + this.encodeField('atime') + + this.encodeField('dev') + + this.encodeField('ino') + + this.encodeField('nlink') + + this.encodeField('charset') + + this.encodeField('comment') + + this.encodeField('gid') + + this.encodeField('gname') + + this.encodeField('linkpath') + + this.encodeField('mtime') + + this.encodeField('size') + + this.encodeField('uid') + + this.encodeField('uname')); + } + encodeField(field) { + if (this[field] === undefined) { + return ''; + } + const r = this[field]; + const v = r instanceof Date ? r.getTime() / 1000 : r; + const s = ' ' + + (field === 'dev' || field === 'ino' || field === 'nlink' ? + 'SCHILY.' + : '') + + field + + '=' + + v + + '\n'; + const byteLen = Buffer.byteLength(s); + // the digits includes the length of the digits in ascii base-10 + // so if it's 9 characters, then adding 1 for the 9 makes it 10 + // which makes it 11 chars. + let digits = Math.floor(Math.log(byteLen) / Math.log(10)) + 1; + if (byteLen + digits >= Math.pow(10, digits)) { + digits += 1; + } + const len = digits + byteLen; + return len + s; + } + static parse(str, ex, g = false) { + return new Pax(merge(parseKV(str), ex), g); + } +} +const merge = (a, b) => b ? Object.assign({}, b, a) : a; +const parseKV = (str) => str + .replace(/\n$/, '') + .split('\n') + .reduce(parseKVLine, Object.create(null)); +const parseKVLine = (set, line) => { + const n = parseInt(line, 10); + // XXX Values with \n in them will fail this. + // Refactor to not be a naive line-by-line parse. + if (n !== Buffer.byteLength(line) + 1) { + return set; + } + line = line.slice((n + ' ').length); + const kv = line.split('='); + const r = kv.shift(); + if (!r) { + return set; + } + const k = r.replace(/^SCHILY\.(dev|ino|nlink)/, '$1'); + const v = kv.join('='); + set[k] = + /^([A-Z]+\.)?([mac]|birth|creation)time$/.test(k) ? + new Date(Number(v) * 1000) + : /^[0-9]+$/.test(v) ? +v + : v; + return set; +}; +//# sourceMappingURL=pax.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/read-entry.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/read-entry.js new file mode 100644 index 00000000000000..23cc673e610879 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/read-entry.js @@ -0,0 +1,136 @@ +import { Minipass } from 'minipass'; +import { normalizeWindowsPath } from './normalize-windows-path.js'; +export class ReadEntry extends Minipass { + extended; + globalExtended; + header; + startBlockSize; + blockRemain; + remain; + type; + meta = false; + ignore = false; + path; + mode; + uid; + gid; + uname; + gname; + size = 0; + mtime; + atime; + ctime; + linkpath; + dev; + ino; + nlink; + invalid = false; + absolute; + unsupported = false; + constructor(header, ex, gex) { + super({}); + // read entries always start life paused. this is to avoid the + // situation where Minipass's auto-ending empty streams results + // in an entry ending before we're ready for it. + this.pause(); + this.extended = ex; + this.globalExtended = gex; + this.header = header; + /* c8 ignore start */ + this.remain = header.size ?? 0; + /* c8 ignore stop */ + this.startBlockSize = 512 * Math.ceil(this.remain / 512); + this.blockRemain = this.startBlockSize; + this.type = header.type; + switch (this.type) { + case 'File': + case 'OldFile': + case 'Link': + case 'SymbolicLink': + case 'CharacterDevice': + case 'BlockDevice': + case 'Directory': + case 'FIFO': + case 'ContiguousFile': + case 'GNUDumpDir': + break; + case 'NextFileHasLongLinkpath': + case 'NextFileHasLongPath': + case 'OldGnuLongPath': + case 'GlobalExtendedHeader': + case 'ExtendedHeader': + case 'OldExtendedHeader': + this.meta = true; + break; + // NOTE: gnutar and bsdtar treat unrecognized types as 'File' + // it may be worth doing the same, but with a warning. + default: + this.ignore = true; + } + /* c8 ignore start */ + if (!header.path) { + throw new Error('no path provided for tar.ReadEntry'); + } + /* c8 ignore stop */ + this.path = normalizeWindowsPath(header.path); + this.mode = header.mode; + if (this.mode) { + this.mode = this.mode & 0o7777; + } + this.uid = header.uid; + this.gid = header.gid; + this.uname = header.uname; + this.gname = header.gname; + this.size = this.remain; + this.mtime = header.mtime; + this.atime = header.atime; + this.ctime = header.ctime; + /* c8 ignore start */ + this.linkpath = + header.linkpath ? + normalizeWindowsPath(header.linkpath) + : undefined; + /* c8 ignore stop */ + this.uname = header.uname; + this.gname = header.gname; + if (ex) { + this.#slurp(ex); + } + if (gex) { + this.#slurp(gex, true); + } + } + write(data) { + const writeLen = data.length; + if (writeLen > this.blockRemain) { + throw new Error('writing more to entry than is appropriate'); + } + const r = this.remain; + const br = this.blockRemain; + this.remain = Math.max(0, r - writeLen); + this.blockRemain = Math.max(0, br - writeLen); + if (this.ignore) { + return true; + } + if (r >= writeLen) { + return super.write(data); + } + // r < writeLen + return super.write(data.subarray(0, r)); + } + #slurp(ex, gex = false) { + if (ex.path) + ex.path = normalizeWindowsPath(ex.path); + if (ex.linkpath) + ex.linkpath = normalizeWindowsPath(ex.linkpath); + Object.assign(this, Object.fromEntries(Object.entries(ex).filter(([k, v]) => { + // we slurp in everything except for the path attribute in + // a global extended header, because that's weird. Also, any + // null/undefined values are ignored. + return !(v === null || + v === undefined || + (k === 'path' && gex)); + }))); + } +} +//# sourceMappingURL=read-entry.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/replace.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/replace.js new file mode 100644 index 00000000000000..c461a4c7d8b63c --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/replace.js @@ -0,0 +1,225 @@ +// tar -r +import { WriteStream, WriteStreamSync } from '@isaacs/fs-minipass'; +import fs from 'node:fs'; +import path from 'node:path'; +import { Header } from './header.js'; +import { list } from './list.js'; +import { makeCommand } from './make-command.js'; +import { isFile, } from './options.js'; +import { Pack, PackSync } from './pack.js'; +// starting at the head of the file, read a Header +// If the checksum is invalid, that's our position to start writing +// If it is, jump forward by the specified size (round up to 512) +// and try again. +// Write the new Pack stream starting there. +const replaceSync = (opt, files) => { + const p = new PackSync(opt); + let threw = true; + let fd; + let position; + try { + try { + fd = fs.openSync(opt.file, 'r+'); + } + catch (er) { + if (er?.code === 'ENOENT') { + fd = fs.openSync(opt.file, 'w+'); + } + else { + throw er; + } + } + const st = fs.fstatSync(fd); + const headBuf = Buffer.alloc(512); + POSITION: for (position = 0; position < st.size; position += 512) { + for (let bufPos = 0, bytes = 0; bufPos < 512; bufPos += bytes) { + bytes = fs.readSync(fd, headBuf, bufPos, headBuf.length - bufPos, position + bufPos); + if (position === 0 && + headBuf[0] === 0x1f && + headBuf[1] === 0x8b) { + throw new Error('cannot append to compressed archives'); + } + if (!bytes) { + break POSITION; + } + } + const h = new Header(headBuf); + if (!h.cksumValid) { + break; + } + const entryBlockSize = 512 * Math.ceil((h.size || 0) / 512); + if (position + entryBlockSize + 512 > st.size) { + break; + } + // the 512 for the header we just parsed will be added as well + // also jump ahead all the blocks for the body + position += entryBlockSize; + if (opt.mtimeCache && h.mtime) { + opt.mtimeCache.set(String(h.path), h.mtime); + } + } + threw = false; + streamSync(opt, p, position, fd, files); + } + finally { + if (threw) { + try { + fs.closeSync(fd); + } + catch (er) { } + } + } +}; +const streamSync = (opt, p, position, fd, files) => { + const stream = new WriteStreamSync(opt.file, { + fd: fd, + start: position, + }); + p.pipe(stream); + addFilesSync(p, files); +}; +const replaceAsync = (opt, files) => { + files = Array.from(files); + const p = new Pack(opt); + const getPos = (fd, size, cb_) => { + const cb = (er, pos) => { + if (er) { + fs.close(fd, _ => cb_(er)); + } + else { + cb_(null, pos); + } + }; + let position = 0; + if (size === 0) { + return cb(null, 0); + } + let bufPos = 0; + const headBuf = Buffer.alloc(512); + const onread = (er, bytes) => { + if (er || typeof bytes === 'undefined') { + return cb(er); + } + bufPos += bytes; + if (bufPos < 512 && bytes) { + return fs.read(fd, headBuf, bufPos, headBuf.length - bufPos, position + bufPos, onread); + } + if (position === 0 && + headBuf[0] === 0x1f && + headBuf[1] === 0x8b) { + return cb(new Error('cannot append to compressed archives')); + } + // truncated header + if (bufPos < 512) { + return cb(null, position); + } + const h = new Header(headBuf); + if (!h.cksumValid) { + return cb(null, position); + } + /* c8 ignore next */ + const entryBlockSize = 512 * Math.ceil((h.size ?? 0) / 512); + if (position + entryBlockSize + 512 > size) { + return cb(null, position); + } + position += entryBlockSize + 512; + if (position >= size) { + return cb(null, position); + } + if (opt.mtimeCache && h.mtime) { + opt.mtimeCache.set(String(h.path), h.mtime); + } + bufPos = 0; + fs.read(fd, headBuf, 0, 512, position, onread); + }; + fs.read(fd, headBuf, 0, 512, position, onread); + }; + const promise = new Promise((resolve, reject) => { + p.on('error', reject); + let flag = 'r+'; + const onopen = (er, fd) => { + if (er && er.code === 'ENOENT' && flag === 'r+') { + flag = 'w+'; + return fs.open(opt.file, flag, onopen); + } + if (er || !fd) { + return reject(er); + } + fs.fstat(fd, (er, st) => { + if (er) { + return fs.close(fd, () => reject(er)); + } + getPos(fd, st.size, (er, position) => { + if (er) { + return reject(er); + } + const stream = new WriteStream(opt.file, { + fd: fd, + start: position, + }); + p.pipe(stream); + stream.on('error', reject); + stream.on('close', resolve); + addFilesAsync(p, files); + }); + }); + }; + fs.open(opt.file, flag, onopen); + }); + return promise; +}; +const addFilesSync = (p, files) => { + files.forEach(file => { + if (file.charAt(0) === '@') { + list({ + file: path.resolve(p.cwd, file.slice(1)), + sync: true, + noResume: true, + onReadEntry: entry => p.add(entry), + }); + } + else { + p.add(file); + } + }); + p.end(); +}; +const addFilesAsync = async (p, files) => { + for (let i = 0; i < files.length; i++) { + const file = String(files[i]); + if (file.charAt(0) === '@') { + await list({ + file: path.resolve(String(p.cwd), file.slice(1)), + noResume: true, + onReadEntry: entry => p.add(entry), + }); + } + else { + p.add(file); + } + } + p.end(); +}; +export const replace = makeCommand(replaceSync, replaceAsync, +/* c8 ignore start */ +() => { + throw new TypeError('file is required'); +}, () => { + throw new TypeError('file is required'); +}, +/* c8 ignore stop */ +(opt, entries) => { + if (!isFile(opt)) { + throw new TypeError('file is required'); + } + if (opt.gzip || + opt.brotli || + opt.file.endsWith('.br') || + opt.file.endsWith('.tbr')) { + throw new TypeError('cannot append to compressed archives'); + } + if (!entries?.length) { + throw new TypeError('no paths specified to add/replace'); + } +}); +//# sourceMappingURL=replace.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/strip-absolute-path.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/strip-absolute-path.js new file mode 100644 index 00000000000000..cce5ff80b00db3 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/strip-absolute-path.js @@ -0,0 +1,25 @@ +// unix absolute paths are also absolute on win32, so we use this for both +import { win32 } from 'node:path'; +const { isAbsolute, parse } = win32; +// returns [root, stripped] +// Note that windows will think that //x/y/z/a has a "root" of //x/y, and in +// those cases, we want to sanitize it to x/y/z/a, not z/a, so we strip / +// explicitly if it's the first character. +// drive-specific relative paths on Windows get their root stripped off even +// though they are not absolute, so `c:../foo` becomes ['c:', '../foo'] +export const stripAbsolutePath = (path) => { + let r = ''; + let parsed = parse(path); + while (isAbsolute(path) || parsed.root) { + // windows will think that //x/y/z has a "root" of //x/y/ + // but strip the //?/C:/ off of //?/C:/path + const root = path.charAt(0) === '/' && path.slice(0, 4) !== '//?/' ? + '/' + : parsed.root; + path = path.slice(root.length); + r += root; + parsed = parse(path); + } + return [r, path]; +}; +//# sourceMappingURL=strip-absolute-path.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/strip-trailing-slashes.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/strip-trailing-slashes.js new file mode 100644 index 00000000000000..ace4218a7547bf --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/strip-trailing-slashes.js @@ -0,0 +1,14 @@ +// warning: extremely hot code path. +// This has been meticulously optimized for use +// within npm install on large package trees. +// Do not edit without careful benchmarking. +export const stripTrailingSlashes = (str) => { + let i = str.length - 1; + let slashesStart = -1; + while (i > -1 && str.charAt(i) === '/') { + slashesStart = i; + i--; + } + return slashesStart === -1 ? str : str.slice(0, slashesStart); +}; +//# sourceMappingURL=strip-trailing-slashes.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/symlink-error.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/symlink-error.js new file mode 100644 index 00000000000000..d31766e2e0afa0 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/symlink-error.js @@ -0,0 +1,15 @@ +export class SymlinkError extends Error { + path; + symlink; + syscall = 'symlink'; + code = 'TAR_SYMLINK_ERROR'; + constructor(symlink, path) { + super('TAR_SYMLINK_ERROR: Cannot extract through symbolic link'); + this.symlink = symlink; + this.path = path; + } + get name() { + return 'SymlinkError'; + } +} +//# sourceMappingURL=symlink-error.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/types.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/types.js new file mode 100644 index 00000000000000..27b982ae1e0922 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/types.js @@ -0,0 +1,45 @@ +export const isCode = (c) => name.has(c); +export const isName = (c) => code.has(c); +// map types from key to human-friendly name +export const name = new Map([ + ['0', 'File'], + // same as File + ['', 'OldFile'], + ['1', 'Link'], + ['2', 'SymbolicLink'], + // Devices and FIFOs aren't fully supported + // they are parsed, but skipped when unpacking + ['3', 'CharacterDevice'], + ['4', 'BlockDevice'], + ['5', 'Directory'], + ['6', 'FIFO'], + // same as File + ['7', 'ContiguousFile'], + // pax headers + ['g', 'GlobalExtendedHeader'], + ['x', 'ExtendedHeader'], + // vendor-specific stuff + // skip + ['A', 'SolarisACL'], + // like 5, but with data, which should be skipped + ['D', 'GNUDumpDir'], + // metadata only, skip + ['I', 'Inode'], + // data = link path of next file + ['K', 'NextFileHasLongLinkpath'], + // data = path of next file + ['L', 'NextFileHasLongPath'], + // skip + ['M', 'ContinuationFile'], + // like L + ['N', 'OldGnuLongPath'], + // skip + ['S', 'SparseFile'], + // skip + ['V', 'TapeVolumeHeader'], + // like x + ['X', 'OldExtendedHeader'], +]); +// map the other direction +export const code = new Map(Array.from(name).map(kv => [kv[1], kv[0]])); +//# sourceMappingURL=types.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/unpack.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/unpack.js new file mode 100644 index 00000000000000..6e744cfc1a6f9f --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/unpack.js @@ -0,0 +1,888 @@ +// the PEND/UNPEND stuff tracks whether we're ready to emit end/close yet. +// but the path reservations are required to avoid race conditions where +// parallelized unpack ops may mess with one another, due to dependencies +// (like a Link depending on its target) or destructive operations (like +// clobbering an fs object to create one of a different type.) +import * as fsm from '@isaacs/fs-minipass'; +import assert from 'node:assert'; +import { randomBytes } from 'node:crypto'; +import fs from 'node:fs'; +import path from 'node:path'; +import { getWriteFlag } from './get-write-flag.js'; +import { mkdir, mkdirSync } from './mkdir.js'; +import { normalizeUnicode } from './normalize-unicode.js'; +import { normalizeWindowsPath } from './normalize-windows-path.js'; +import { Parser } from './parse.js'; +import { stripAbsolutePath } from './strip-absolute-path.js'; +import { stripTrailingSlashes } from './strip-trailing-slashes.js'; +import * as wc from './winchars.js'; +import { PathReservations } from './path-reservations.js'; +const ONENTRY = Symbol('onEntry'); +const CHECKFS = Symbol('checkFs'); +const CHECKFS2 = Symbol('checkFs2'); +const PRUNECACHE = Symbol('pruneCache'); +const ISREUSABLE = Symbol('isReusable'); +const MAKEFS = Symbol('makeFs'); +const FILE = Symbol('file'); +const DIRECTORY = Symbol('directory'); +const LINK = Symbol('link'); +const SYMLINK = Symbol('symlink'); +const HARDLINK = Symbol('hardlink'); +const UNSUPPORTED = Symbol('unsupported'); +const CHECKPATH = Symbol('checkPath'); +const MKDIR = Symbol('mkdir'); +const ONERROR = Symbol('onError'); +const PENDING = Symbol('pending'); +const PEND = Symbol('pend'); +const UNPEND = Symbol('unpend'); +const ENDED = Symbol('ended'); +const MAYBECLOSE = Symbol('maybeClose'); +const SKIP = Symbol('skip'); +const DOCHOWN = Symbol('doChown'); +const UID = Symbol('uid'); +const GID = Symbol('gid'); +const CHECKED_CWD = Symbol('checkedCwd'); +const platform = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform; +const isWindows = platform === 'win32'; +const DEFAULT_MAX_DEPTH = 1024; +// Unlinks on Windows are not atomic. +// +// This means that if you have a file entry, followed by another +// file entry with an identical name, and you cannot re-use the file +// (because it's a hardlink, or because unlink:true is set, or it's +// Windows, which does not have useful nlink values), then the unlink +// will be committed to the disk AFTER the new file has been written +// over the old one, deleting the new file. +// +// To work around this, on Windows systems, we rename the file and then +// delete the renamed file. It's a sloppy kludge, but frankly, I do not +// know of a better way to do this, given windows' non-atomic unlink +// semantics. +// +// See: https://github.com/npm/node-tar/issues/183 +/* c8 ignore start */ +const unlinkFile = (path, cb) => { + if (!isWindows) { + return fs.unlink(path, cb); + } + const name = path + '.DELETE.' + randomBytes(16).toString('hex'); + fs.rename(path, name, er => { + if (er) { + return cb(er); + } + fs.unlink(name, cb); + }); +}; +/* c8 ignore stop */ +/* c8 ignore start */ +const unlinkFileSync = (path) => { + if (!isWindows) { + return fs.unlinkSync(path); + } + const name = path + '.DELETE.' + randomBytes(16).toString('hex'); + fs.renameSync(path, name); + fs.unlinkSync(name); +}; +/* c8 ignore stop */ +// this.gid, entry.gid, this.processUid +const uint32 = (a, b, c) => a !== undefined && a === a >>> 0 ? a + : b !== undefined && b === b >>> 0 ? b + : c; +// clear the cache if it's a case-insensitive unicode-squashing match. +// we can't know if the current file system is case-sensitive or supports +// unicode fully, so we check for similarity on the maximally compatible +// representation. Err on the side of pruning, since all it's doing is +// preventing lstats, and it's not the end of the world if we get a false +// positive. +// Note that on windows, we always drop the entire cache whenever a +// symbolic link is encountered, because 8.3 filenames are impossible +// to reason about, and collisions are hazards rather than just failures. +const cacheKeyNormalize = (path) => stripTrailingSlashes(normalizeWindowsPath(normalizeUnicode(path))).toLowerCase(); +// remove all cache entries matching ${abs}/** +const pruneCache = (cache, abs) => { + abs = cacheKeyNormalize(abs); + for (const path of cache.keys()) { + const pnorm = cacheKeyNormalize(path); + if (pnorm === abs || pnorm.indexOf(abs + '/') === 0) { + cache.delete(path); + } + } +}; +const dropCache = (cache) => { + for (const key of cache.keys()) { + cache.delete(key); + } +}; +export class Unpack extends Parser { + [ENDED] = false; + [CHECKED_CWD] = false; + [PENDING] = 0; + reservations = new PathReservations(); + transform; + writable = true; + readable = false; + dirCache; + uid; + gid; + setOwner; + preserveOwner; + processGid; + processUid; + maxDepth; + forceChown; + win32; + newer; + keep; + noMtime; + preservePaths; + unlink; + cwd; + strip; + processUmask; + umask; + dmode; + fmode; + chmod; + constructor(opt = {}) { + opt.ondone = () => { + this[ENDED] = true; + this[MAYBECLOSE](); + }; + super(opt); + this.transform = opt.transform; + this.dirCache = opt.dirCache || new Map(); + this.chmod = !!opt.chmod; + if (typeof opt.uid === 'number' || typeof opt.gid === 'number') { + // need both or neither + if (typeof opt.uid !== 'number' || + typeof opt.gid !== 'number') { + throw new TypeError('cannot set owner without number uid and gid'); + } + if (opt.preserveOwner) { + throw new TypeError('cannot preserve owner in archive and also set owner explicitly'); + } + this.uid = opt.uid; + this.gid = opt.gid; + this.setOwner = true; + } + else { + this.uid = undefined; + this.gid = undefined; + this.setOwner = false; + } + // default true for root + if (opt.preserveOwner === undefined && + typeof opt.uid !== 'number') { + this.preserveOwner = !!(process.getuid && process.getuid() === 0); + } + else { + this.preserveOwner = !!opt.preserveOwner; + } + this.processUid = + (this.preserveOwner || this.setOwner) && process.getuid ? + process.getuid() + : undefined; + this.processGid = + (this.preserveOwner || this.setOwner) && process.getgid ? + process.getgid() + : undefined; + // prevent excessively deep nesting of subfolders + // set to `Infinity` to remove this restriction + this.maxDepth = + typeof opt.maxDepth === 'number' ? + opt.maxDepth + : DEFAULT_MAX_DEPTH; + // mostly just for testing, but useful in some cases. + // Forcibly trigger a chown on every entry, no matter what + this.forceChown = opt.forceChown === true; + // turn > this[ONENTRY](entry)); + } + // a bad or damaged archive is a warning for Parser, but an error + // when extracting. Mark those errors as unrecoverable, because + // the Unpack contract cannot be met. + warn(code, msg, data = {}) { + if (code === 'TAR_BAD_ARCHIVE' || code === 'TAR_ABORT') { + data.recoverable = false; + } + return super.warn(code, msg, data); + } + [MAYBECLOSE]() { + if (this[ENDED] && this[PENDING] === 0) { + this.emit('prefinish'); + this.emit('finish'); + this.emit('end'); + } + } + [CHECKPATH](entry) { + const p = normalizeWindowsPath(entry.path); + const parts = p.split('/'); + if (this.strip) { + if (parts.length < this.strip) { + return false; + } + if (entry.type === 'Link') { + const linkparts = normalizeWindowsPath(String(entry.linkpath)).split('/'); + if (linkparts.length >= this.strip) { + entry.linkpath = linkparts.slice(this.strip).join('/'); + } + else { + return false; + } + } + parts.splice(0, this.strip); + entry.path = parts.join('/'); + } + if (isFinite(this.maxDepth) && parts.length > this.maxDepth) { + this.warn('TAR_ENTRY_ERROR', 'path excessively deep', { + entry, + path: p, + depth: parts.length, + maxDepth: this.maxDepth, + }); + return false; + } + if (!this.preservePaths) { + if (parts.includes('..') || + /* c8 ignore next */ + (isWindows && /^[a-z]:\.\.$/i.test(parts[0] ?? ''))) { + this.warn('TAR_ENTRY_ERROR', `path contains '..'`, { + entry, + path: p, + }); + return false; + } + // strip off the root + const [root, stripped] = stripAbsolutePath(p); + if (root) { + entry.path = String(stripped); + this.warn('TAR_ENTRY_INFO', `stripping ${root} from absolute path`, { + entry, + path: p, + }); + } + } + if (path.isAbsolute(entry.path)) { + entry.absolute = normalizeWindowsPath(path.resolve(entry.path)); + } + else { + entry.absolute = normalizeWindowsPath(path.resolve(this.cwd, entry.path)); + } + // if we somehow ended up with a path that escapes the cwd, and we are + // not in preservePaths mode, then something is fishy! This should have + // been prevented above, so ignore this for coverage. + /* c8 ignore start - defense in depth */ + if (!this.preservePaths && + typeof entry.absolute === 'string' && + entry.absolute.indexOf(this.cwd + '/') !== 0 && + entry.absolute !== this.cwd) { + this.warn('TAR_ENTRY_ERROR', 'path escaped extraction target', { + entry, + path: normalizeWindowsPath(entry.path), + resolvedPath: entry.absolute, + cwd: this.cwd, + }); + return false; + } + /* c8 ignore stop */ + // an archive can set properties on the extraction directory, but it + // may not replace the cwd with a different kind of thing entirely. + if (entry.absolute === this.cwd && + entry.type !== 'Directory' && + entry.type !== 'GNUDumpDir') { + return false; + } + // only encode : chars that aren't drive letter indicators + if (this.win32) { + const { root: aRoot } = path.win32.parse(String(entry.absolute)); + entry.absolute = + aRoot + wc.encode(String(entry.absolute).slice(aRoot.length)); + const { root: pRoot } = path.win32.parse(entry.path); + entry.path = pRoot + wc.encode(entry.path.slice(pRoot.length)); + } + return true; + } + [ONENTRY](entry) { + if (!this[CHECKPATH](entry)) { + return entry.resume(); + } + assert.equal(typeof entry.absolute, 'string'); + switch (entry.type) { + case 'Directory': + case 'GNUDumpDir': + if (entry.mode) { + entry.mode = entry.mode | 0o700; + } + // eslint-disable-next-line no-fallthrough + case 'File': + case 'OldFile': + case 'ContiguousFile': + case 'Link': + case 'SymbolicLink': + return this[CHECKFS](entry); + case 'CharacterDevice': + case 'BlockDevice': + case 'FIFO': + default: + return this[UNSUPPORTED](entry); + } + } + [ONERROR](er, entry) { + // Cwd has to exist, or else nothing works. That's serious. + // Other errors are warnings, which raise the error in strict + // mode, but otherwise continue on. + if (er.name === 'CwdError') { + this.emit('error', er); + } + else { + this.warn('TAR_ENTRY_ERROR', er, { entry }); + this[UNPEND](); + entry.resume(); + } + } + [MKDIR](dir, mode, cb) { + mkdir(normalizeWindowsPath(dir), { + uid: this.uid, + gid: this.gid, + processUid: this.processUid, + processGid: this.processGid, + umask: this.processUmask, + preserve: this.preservePaths, + unlink: this.unlink, + cache: this.dirCache, + cwd: this.cwd, + mode: mode, + }, cb); + } + [DOCHOWN](entry) { + // in preserve owner mode, chown if the entry doesn't match process + // in set owner mode, chown if setting doesn't match process + return (this.forceChown || + (this.preserveOwner && + ((typeof entry.uid === 'number' && + entry.uid !== this.processUid) || + (typeof entry.gid === 'number' && + entry.gid !== this.processGid))) || + (typeof this.uid === 'number' && + this.uid !== this.processUid) || + (typeof this.gid === 'number' && this.gid !== this.processGid)); + } + [UID](entry) { + return uint32(this.uid, entry.uid, this.processUid); + } + [GID](entry) { + return uint32(this.gid, entry.gid, this.processGid); + } + [FILE](entry, fullyDone) { + const mode = typeof entry.mode === 'number' ? + entry.mode & 0o7777 + : this.fmode; + const stream = new fsm.WriteStream(String(entry.absolute), { + // slight lie, but it can be numeric flags + flags: getWriteFlag(entry.size), + mode: mode, + autoClose: false, + }); + stream.on('error', (er) => { + if (stream.fd) { + fs.close(stream.fd, () => { }); + } + // flush all the data out so that we aren't left hanging + // if the error wasn't actually fatal. otherwise the parse + // is blocked, and we never proceed. + stream.write = () => true; + this[ONERROR](er, entry); + fullyDone(); + }); + let actions = 1; + const done = (er) => { + if (er) { + /* c8 ignore start - we should always have a fd by now */ + if (stream.fd) { + fs.close(stream.fd, () => { }); + } + /* c8 ignore stop */ + this[ONERROR](er, entry); + fullyDone(); + return; + } + if (--actions === 0) { + if (stream.fd !== undefined) { + fs.close(stream.fd, er => { + if (er) { + this[ONERROR](er, entry); + } + else { + this[UNPEND](); + } + fullyDone(); + }); + } + } + }; + stream.on('finish', () => { + // if futimes fails, try utimes + // if utimes fails, fail with the original error + // same for fchown/chown + const abs = String(entry.absolute); + const fd = stream.fd; + if (typeof fd === 'number' && entry.mtime && !this.noMtime) { + actions++; + const atime = entry.atime || new Date(); + const mtime = entry.mtime; + fs.futimes(fd, atime, mtime, er => er ? + fs.utimes(abs, atime, mtime, er2 => done(er2 && er)) + : done()); + } + if (typeof fd === 'number' && this[DOCHOWN](entry)) { + actions++; + const uid = this[UID](entry); + const gid = this[GID](entry); + if (typeof uid === 'number' && typeof gid === 'number') { + fs.fchown(fd, uid, gid, er => er ? + fs.chown(abs, uid, gid, er2 => done(er2 && er)) + : done()); + } + } + done(); + }); + const tx = this.transform ? this.transform(entry) || entry : entry; + if (tx !== entry) { + tx.on('error', (er) => { + this[ONERROR](er, entry); + fullyDone(); + }); + entry.pipe(tx); + } + tx.pipe(stream); + } + [DIRECTORY](entry, fullyDone) { + const mode = typeof entry.mode === 'number' ? + entry.mode & 0o7777 + : this.dmode; + this[MKDIR](String(entry.absolute), mode, er => { + if (er) { + this[ONERROR](er, entry); + fullyDone(); + return; + } + let actions = 1; + const done = () => { + if (--actions === 0) { + fullyDone(); + this[UNPEND](); + entry.resume(); + } + }; + if (entry.mtime && !this.noMtime) { + actions++; + fs.utimes(String(entry.absolute), entry.atime || new Date(), entry.mtime, done); + } + if (this[DOCHOWN](entry)) { + actions++; + fs.chown(String(entry.absolute), Number(this[UID](entry)), Number(this[GID](entry)), done); + } + done(); + }); + } + [UNSUPPORTED](entry) { + entry.unsupported = true; + this.warn('TAR_ENTRY_UNSUPPORTED', `unsupported entry type: ${entry.type}`, { entry }); + entry.resume(); + } + [SYMLINK](entry, done) { + this[LINK](entry, String(entry.linkpath), 'symlink', done); + } + [HARDLINK](entry, done) { + const linkpath = normalizeWindowsPath(path.resolve(this.cwd, String(entry.linkpath))); + this[LINK](entry, linkpath, 'link', done); + } + [PEND]() { + this[PENDING]++; + } + [UNPEND]() { + this[PENDING]--; + this[MAYBECLOSE](); + } + [SKIP](entry) { + this[UNPEND](); + entry.resume(); + } + // Check if we can reuse an existing filesystem entry safely and + // overwrite it, rather than unlinking and recreating + // Windows doesn't report a useful nlink, so we just never reuse entries + [ISREUSABLE](entry, st) { + return (entry.type === 'File' && + !this.unlink && + st.isFile() && + st.nlink <= 1 && + !isWindows); + } + // check if a thing is there, and if so, try to clobber it + [CHECKFS](entry) { + this[PEND](); + const paths = [entry.path]; + if (entry.linkpath) { + paths.push(entry.linkpath); + } + this.reservations.reserve(paths, done => this[CHECKFS2](entry, done)); + } + [PRUNECACHE](entry) { + // if we are not creating a directory, and the path is in the dirCache, + // then that means we are about to delete the directory we created + // previously, and it is no longer going to be a directory, and neither + // is any of its children. + // If a symbolic link is encountered, all bets are off. There is no + // reasonable way to sanitize the cache in such a way we will be able to + // avoid having filesystem collisions. If this happens with a non-symlink + // entry, it'll just fail to unpack, but a symlink to a directory, using an + // 8.3 shortname or certain unicode attacks, can evade detection and lead + // to arbitrary writes to anywhere on the system. + if (entry.type === 'SymbolicLink') { + dropCache(this.dirCache); + } + else if (entry.type !== 'Directory') { + pruneCache(this.dirCache, String(entry.absolute)); + } + } + [CHECKFS2](entry, fullyDone) { + this[PRUNECACHE](entry); + const done = (er) => { + this[PRUNECACHE](entry); + fullyDone(er); + }; + const checkCwd = () => { + this[MKDIR](this.cwd, this.dmode, er => { + if (er) { + this[ONERROR](er, entry); + done(); + return; + } + this[CHECKED_CWD] = true; + start(); + }); + }; + const start = () => { + if (entry.absolute !== this.cwd) { + const parent = normalizeWindowsPath(path.dirname(String(entry.absolute))); + if (parent !== this.cwd) { + return this[MKDIR](parent, this.dmode, er => { + if (er) { + this[ONERROR](er, entry); + done(); + return; + } + afterMakeParent(); + }); + } + } + afterMakeParent(); + }; + const afterMakeParent = () => { + fs.lstat(String(entry.absolute), (lstatEr, st) => { + if (st && + (this.keep || + /* c8 ignore next */ + (this.newer && st.mtime > (entry.mtime ?? st.mtime)))) { + this[SKIP](entry); + done(); + return; + } + if (lstatEr || this[ISREUSABLE](entry, st)) { + return this[MAKEFS](null, entry, done); + } + if (st.isDirectory()) { + if (entry.type === 'Directory') { + const needChmod = this.chmod && + entry.mode && + (st.mode & 0o7777) !== entry.mode; + const afterChmod = (er) => this[MAKEFS](er ?? null, entry, done); + if (!needChmod) { + return afterChmod(); + } + return fs.chmod(String(entry.absolute), Number(entry.mode), afterChmod); + } + // Not a dir entry, have to remove it. + // NB: the only way to end up with an entry that is the cwd + // itself, in such a way that == does not detect, is a + // tricky windows absolute path with UNC or 8.3 parts (and + // preservePaths:true, or else it will have been stripped). + // In that case, the user has opted out of path protections + // explicitly, so if they blow away the cwd, c'est la vie. + if (entry.absolute !== this.cwd) { + return fs.rmdir(String(entry.absolute), (er) => this[MAKEFS](er ?? null, entry, done)); + } + } + // not a dir, and not reusable + // don't remove if the cwd, we want that error + if (entry.absolute === this.cwd) { + return this[MAKEFS](null, entry, done); + } + unlinkFile(String(entry.absolute), er => this[MAKEFS](er ?? null, entry, done)); + }); + }; + if (this[CHECKED_CWD]) { + start(); + } + else { + checkCwd(); + } + } + [MAKEFS](er, entry, done) { + if (er) { + this[ONERROR](er, entry); + done(); + return; + } + switch (entry.type) { + case 'File': + case 'OldFile': + case 'ContiguousFile': + return this[FILE](entry, done); + case 'Link': + return this[HARDLINK](entry, done); + case 'SymbolicLink': + return this[SYMLINK](entry, done); + case 'Directory': + case 'GNUDumpDir': + return this[DIRECTORY](entry, done); + } + } + [LINK](entry, linkpath, link, done) { + // XXX: get the type ('symlink' or 'junction') for windows + fs[link](linkpath, String(entry.absolute), er => { + if (er) { + this[ONERROR](er, entry); + } + else { + this[UNPEND](); + entry.resume(); + } + done(); + }); + } +} +const callSync = (fn) => { + try { + return [null, fn()]; + } + catch (er) { + return [er, null]; + } +}; +export class UnpackSync extends Unpack { + sync = true; + [MAKEFS](er, entry) { + return super[MAKEFS](er, entry, () => { }); + } + [CHECKFS](entry) { + this[PRUNECACHE](entry); + if (!this[CHECKED_CWD]) { + const er = this[MKDIR](this.cwd, this.dmode); + if (er) { + return this[ONERROR](er, entry); + } + this[CHECKED_CWD] = true; + } + // don't bother to make the parent if the current entry is the cwd, + // we've already checked it. + if (entry.absolute !== this.cwd) { + const parent = normalizeWindowsPath(path.dirname(String(entry.absolute))); + if (parent !== this.cwd) { + const mkParent = this[MKDIR](parent, this.dmode); + if (mkParent) { + return this[ONERROR](mkParent, entry); + } + } + } + const [lstatEr, st] = callSync(() => fs.lstatSync(String(entry.absolute))); + if (st && + (this.keep || + /* c8 ignore next */ + (this.newer && st.mtime > (entry.mtime ?? st.mtime)))) { + return this[SKIP](entry); + } + if (lstatEr || this[ISREUSABLE](entry, st)) { + return this[MAKEFS](null, entry); + } + if (st.isDirectory()) { + if (entry.type === 'Directory') { + const needChmod = this.chmod && + entry.mode && + (st.mode & 0o7777) !== entry.mode; + const [er] = needChmod ? + callSync(() => { + fs.chmodSync(String(entry.absolute), Number(entry.mode)); + }) + : []; + return this[MAKEFS](er, entry); + } + // not a dir entry, have to remove it + const [er] = callSync(() => fs.rmdirSync(String(entry.absolute))); + this[MAKEFS](er, entry); + } + // not a dir, and not reusable. + // don't remove if it's the cwd, since we want that error. + const [er] = entry.absolute === this.cwd ? + [] + : callSync(() => unlinkFileSync(String(entry.absolute))); + this[MAKEFS](er, entry); + } + [FILE](entry, done) { + const mode = typeof entry.mode === 'number' ? + entry.mode & 0o7777 + : this.fmode; + const oner = (er) => { + let closeError; + try { + fs.closeSync(fd); + } + catch (e) { + closeError = e; + } + if (er || closeError) { + this[ONERROR](er || closeError, entry); + } + done(); + }; + let fd; + try { + fd = fs.openSync(String(entry.absolute), getWriteFlag(entry.size), mode); + } + catch (er) { + return oner(er); + } + const tx = this.transform ? this.transform(entry) || entry : entry; + if (tx !== entry) { + tx.on('error', (er) => this[ONERROR](er, entry)); + entry.pipe(tx); + } + tx.on('data', (chunk) => { + try { + fs.writeSync(fd, chunk, 0, chunk.length); + } + catch (er) { + oner(er); + } + }); + tx.on('end', () => { + let er = null; + // try both, falling futimes back to utimes + // if either fails, handle the first error + if (entry.mtime && !this.noMtime) { + const atime = entry.atime || new Date(); + const mtime = entry.mtime; + try { + fs.futimesSync(fd, atime, mtime); + } + catch (futimeser) { + try { + fs.utimesSync(String(entry.absolute), atime, mtime); + } + catch (utimeser) { + er = futimeser; + } + } + } + if (this[DOCHOWN](entry)) { + const uid = this[UID](entry); + const gid = this[GID](entry); + try { + fs.fchownSync(fd, Number(uid), Number(gid)); + } + catch (fchowner) { + try { + fs.chownSync(String(entry.absolute), Number(uid), Number(gid)); + } + catch (chowner) { + er = er || fchowner; + } + } + } + oner(er); + }); + } + [DIRECTORY](entry, done) { + const mode = typeof entry.mode === 'number' ? + entry.mode & 0o7777 + : this.dmode; + const er = this[MKDIR](String(entry.absolute), mode); + if (er) { + this[ONERROR](er, entry); + done(); + return; + } + if (entry.mtime && !this.noMtime) { + try { + fs.utimesSync(String(entry.absolute), entry.atime || new Date(), entry.mtime); + /* c8 ignore next */ + } + catch (er) { } + } + if (this[DOCHOWN](entry)) { + try { + fs.chownSync(String(entry.absolute), Number(this[UID](entry)), Number(this[GID](entry))); + } + catch (er) { } + } + done(); + entry.resume(); + } + [MKDIR](dir, mode) { + try { + return mkdirSync(normalizeWindowsPath(dir), { + uid: this.uid, + gid: this.gid, + processUid: this.processUid, + processGid: this.processGid, + umask: this.processUmask, + preserve: this.preservePaths, + unlink: this.unlink, + cache: this.dirCache, + cwd: this.cwd, + mode: mode, + }); + } + catch (er) { + return er; + } + } + [LINK](entry, linkpath, link, done) { + const ls = `${link}Sync`; + try { + fs[ls](linkpath, String(entry.absolute)); + done(); + entry.resume(); + } + catch (er) { + return this[ONERROR](er, entry); + } + } +} +//# sourceMappingURL=unpack.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/update.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/update.js new file mode 100644 index 00000000000000..21398e9766663d --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/update.js @@ -0,0 +1,30 @@ +// tar -u +import { makeCommand } from './make-command.js'; +import { replace as r } from './replace.js'; +// just call tar.r with the filter and mtimeCache +export const update = makeCommand(r.syncFile, r.asyncFile, r.syncNoFile, r.asyncNoFile, (opt, entries = []) => { + r.validate?.(opt, entries); + mtimeFilter(opt); +}); +const mtimeFilter = (opt) => { + const filter = opt.filter; + if (!opt.mtimeCache) { + opt.mtimeCache = new Map(); + } + opt.filter = + filter ? + (path, stat) => filter(path, stat) && + !( + /* c8 ignore start */ + ((opt.mtimeCache?.get(path) ?? stat.mtime ?? 0) > + (stat.mtime ?? 0)) + /* c8 ignore stop */ + ) + : (path, stat) => !( + /* c8 ignore start */ + ((opt.mtimeCache?.get(path) ?? stat.mtime ?? 0) > + (stat.mtime ?? 0)) + /* c8 ignore stop */ + ); +}; +//# sourceMappingURL=update.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/warn-method.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/warn-method.js new file mode 100644 index 00000000000000..13e798afefc85e --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/warn-method.js @@ -0,0 +1,27 @@ +export const warnMethod = (self, code, message, data = {}) => { + if (self.file) { + data.file = self.file; + } + if (self.cwd) { + data.cwd = self.cwd; + } + data.code = + (message instanceof Error && + message.code) || + code; + data.tarCode = code; + if (!self.strict && data.recoverable !== false) { + if (message instanceof Error) { + data = Object.assign(message, data); + message = message.message; + } + self.emit('warn', code, message, data); + } + else if (message instanceof Error) { + self.emit('error', Object.assign(message, data)); + } + else { + self.emit('error', Object.assign(new Error(`${code}: ${message}`), data)); + } +}; +//# sourceMappingURL=warn-method.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/winchars.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/winchars.js new file mode 100644 index 00000000000000..c41eb86d69a4bb --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/winchars.js @@ -0,0 +1,9 @@ +// When writing files on Windows, translate the characters to their +// 0xf000 higher-encoded versions. +const raw = ['|', '<', '>', '?', ':']; +const win = raw.map(char => String.fromCharCode(0xf000 + char.charCodeAt(0))); +const toWin = new Map(raw.map((char, i) => [char, win[i]])); +const toRaw = new Map(win.map((char, i) => [char, raw[i]])); +export const encode = (s) => raw.reduce((s, c) => s.split(c).join(toWin.get(c)), s); +export const decode = (s) => win.reduce((s, c) => s.split(c).join(toRaw.get(c)), s); +//# sourceMappingURL=winchars.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/write-entry.js b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/write-entry.js new file mode 100644 index 00000000000000..9028cd676b4cd2 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/dist/esm/write-entry.js @@ -0,0 +1,657 @@ +import fs from 'fs'; +import { Minipass } from 'minipass'; +import path from 'path'; +import { Header } from './header.js'; +import { modeFix } from './mode-fix.js'; +import { normalizeWindowsPath } from './normalize-windows-path.js'; +import { dealias, } from './options.js'; +import { Pax } from './pax.js'; +import { stripAbsolutePath } from './strip-absolute-path.js'; +import { stripTrailingSlashes } from './strip-trailing-slashes.js'; +import { warnMethod, } from './warn-method.js'; +import * as winchars from './winchars.js'; +const prefixPath = (path, prefix) => { + if (!prefix) { + return normalizeWindowsPath(path); + } + path = normalizeWindowsPath(path).replace(/^\.(\/|$)/, ''); + return stripTrailingSlashes(prefix) + '/' + path; +}; +const maxReadSize = 16 * 1024 * 1024; +const PROCESS = Symbol('process'); +const FILE = Symbol('file'); +const DIRECTORY = Symbol('directory'); +const SYMLINK = Symbol('symlink'); +const HARDLINK = Symbol('hardlink'); +const HEADER = Symbol('header'); +const READ = Symbol('read'); +const LSTAT = Symbol('lstat'); +const ONLSTAT = Symbol('onlstat'); +const ONREAD = Symbol('onread'); +const ONREADLINK = Symbol('onreadlink'); +const OPENFILE = Symbol('openfile'); +const ONOPENFILE = Symbol('onopenfile'); +const CLOSE = Symbol('close'); +const MODE = Symbol('mode'); +const AWAITDRAIN = Symbol('awaitDrain'); +const ONDRAIN = Symbol('ondrain'); +const PREFIX = Symbol('prefix'); +export class WriteEntry extends Minipass { + path; + portable; + myuid = (process.getuid && process.getuid()) || 0; + // until node has builtin pwnam functions, this'll have to do + myuser = process.env.USER || ''; + maxReadSize; + linkCache; + statCache; + preservePaths; + cwd; + strict; + mtime; + noPax; + noMtime; + prefix; + fd; + blockLen = 0; + blockRemain = 0; + buf; + pos = 0; + remain = 0; + length = 0; + offset = 0; + win32; + absolute; + header; + type; + linkpath; + stat; + onWriteEntry; + #hadError = false; + constructor(p, opt_ = {}) { + const opt = dealias(opt_); + super(); + this.path = normalizeWindowsPath(p); + // suppress atime, ctime, uid, gid, uname, gname + this.portable = !!opt.portable; + this.maxReadSize = opt.maxReadSize || maxReadSize; + this.linkCache = opt.linkCache || new Map(); + this.statCache = opt.statCache || new Map(); + this.preservePaths = !!opt.preservePaths; + this.cwd = normalizeWindowsPath(opt.cwd || process.cwd()); + this.strict = !!opt.strict; + this.noPax = !!opt.noPax; + this.noMtime = !!opt.noMtime; + this.mtime = opt.mtime; + this.prefix = + opt.prefix ? normalizeWindowsPath(opt.prefix) : undefined; + this.onWriteEntry = opt.onWriteEntry; + if (typeof opt.onwarn === 'function') { + this.on('warn', opt.onwarn); + } + let pathWarn = false; + if (!this.preservePaths) { + const [root, stripped] = stripAbsolutePath(this.path); + if (root && typeof stripped === 'string') { + this.path = stripped; + pathWarn = root; + } + } + this.win32 = !!opt.win32 || process.platform === 'win32'; + if (this.win32) { + // force the \ to / normalization, since we might not *actually* + // be on windows, but want \ to be considered a path separator. + this.path = winchars.decode(this.path.replace(/\\/g, '/')); + p = p.replace(/\\/g, '/'); + } + this.absolute = normalizeWindowsPath(opt.absolute || path.resolve(this.cwd, p)); + if (this.path === '') { + this.path = './'; + } + if (pathWarn) { + this.warn('TAR_ENTRY_INFO', `stripping ${pathWarn} from absolute path`, { + entry: this, + path: pathWarn + this.path, + }); + } + const cs = this.statCache.get(this.absolute); + if (cs) { + this[ONLSTAT](cs); + } + else { + this[LSTAT](); + } + } + warn(code, message, data = {}) { + return warnMethod(this, code, message, data); + } + emit(ev, ...data) { + if (ev === 'error') { + this.#hadError = true; + } + return super.emit(ev, ...data); + } + [LSTAT]() { + fs.lstat(this.absolute, (er, stat) => { + if (er) { + return this.emit('error', er); + } + this[ONLSTAT](stat); + }); + } + [ONLSTAT](stat) { + this.statCache.set(this.absolute, stat); + this.stat = stat; + if (!stat.isFile()) { + stat.size = 0; + } + this.type = getType(stat); + this.emit('stat', stat); + this[PROCESS](); + } + [PROCESS]() { + switch (this.type) { + case 'File': + return this[FILE](); + case 'Directory': + return this[DIRECTORY](); + case 'SymbolicLink': + return this[SYMLINK](); + // unsupported types are ignored. + default: + return this.end(); + } + } + [MODE](mode) { + return modeFix(mode, this.type === 'Directory', this.portable); + } + [PREFIX](path) { + return prefixPath(path, this.prefix); + } + [HEADER]() { + /* c8 ignore start */ + if (!this.stat) { + throw new Error('cannot write header before stat'); + } + /* c8 ignore stop */ + if (this.type === 'Directory' && this.portable) { + this.noMtime = true; + } + this.onWriteEntry?.(this); + this.header = new Header({ + path: this[PREFIX](this.path), + // only apply the prefix to hard links. + linkpath: this.type === 'Link' && this.linkpath !== undefined ? + this[PREFIX](this.linkpath) + : this.linkpath, + // only the permissions and setuid/setgid/sticky bitflags + // not the higher-order bits that specify file type + mode: this[MODE](this.stat.mode), + uid: this.portable ? undefined : this.stat.uid, + gid: this.portable ? undefined : this.stat.gid, + size: this.stat.size, + mtime: this.noMtime ? undefined : this.mtime || this.stat.mtime, + /* c8 ignore next */ + type: this.type === 'Unsupported' ? undefined : this.type, + uname: this.portable ? undefined + : this.stat.uid === this.myuid ? this.myuser + : '', + atime: this.portable ? undefined : this.stat.atime, + ctime: this.portable ? undefined : this.stat.ctime, + }); + if (this.header.encode() && !this.noPax) { + super.write(new Pax({ + atime: this.portable ? undefined : this.header.atime, + ctime: this.portable ? undefined : this.header.ctime, + gid: this.portable ? undefined : this.header.gid, + mtime: this.noMtime ? undefined : (this.mtime || this.header.mtime), + path: this[PREFIX](this.path), + linkpath: this.type === 'Link' && this.linkpath !== undefined ? + this[PREFIX](this.linkpath) + : this.linkpath, + size: this.header.size, + uid: this.portable ? undefined : this.header.uid, + uname: this.portable ? undefined : this.header.uname, + dev: this.portable ? undefined : this.stat.dev, + ino: this.portable ? undefined : this.stat.ino, + nlink: this.portable ? undefined : this.stat.nlink, + }).encode()); + } + const block = this.header?.block; + /* c8 ignore start */ + if (!block) { + throw new Error('failed to encode header'); + } + /* c8 ignore stop */ + super.write(block); + } + [DIRECTORY]() { + /* c8 ignore start */ + if (!this.stat) { + throw new Error('cannot create directory entry without stat'); + } + /* c8 ignore stop */ + if (this.path.slice(-1) !== '/') { + this.path += '/'; + } + this.stat.size = 0; + this[HEADER](); + this.end(); + } + [SYMLINK]() { + fs.readlink(this.absolute, (er, linkpath) => { + if (er) { + return this.emit('error', er); + } + this[ONREADLINK](linkpath); + }); + } + [ONREADLINK](linkpath) { + this.linkpath = normalizeWindowsPath(linkpath); + this[HEADER](); + this.end(); + } + [HARDLINK](linkpath) { + /* c8 ignore start */ + if (!this.stat) { + throw new Error('cannot create link entry without stat'); + } + /* c8 ignore stop */ + this.type = 'Link'; + this.linkpath = normalizeWindowsPath(path.relative(this.cwd, linkpath)); + this.stat.size = 0; + this[HEADER](); + this.end(); + } + [FILE]() { + /* c8 ignore start */ + if (!this.stat) { + throw new Error('cannot create file entry without stat'); + } + /* c8 ignore stop */ + if (this.stat.nlink > 1) { + const linkKey = `${this.stat.dev}:${this.stat.ino}`; + const linkpath = this.linkCache.get(linkKey); + if (linkpath?.indexOf(this.cwd) === 0) { + return this[HARDLINK](linkpath); + } + this.linkCache.set(linkKey, this.absolute); + } + this[HEADER](); + if (this.stat.size === 0) { + return this.end(); + } + this[OPENFILE](); + } + [OPENFILE]() { + fs.open(this.absolute, 'r', (er, fd) => { + if (er) { + return this.emit('error', er); + } + this[ONOPENFILE](fd); + }); + } + [ONOPENFILE](fd) { + this.fd = fd; + if (this.#hadError) { + return this[CLOSE](); + } + /* c8 ignore start */ + if (!this.stat) { + throw new Error('should stat before calling onopenfile'); + } + /* c8 ignore start */ + this.blockLen = 512 * Math.ceil(this.stat.size / 512); + this.blockRemain = this.blockLen; + const bufLen = Math.min(this.blockLen, this.maxReadSize); + this.buf = Buffer.allocUnsafe(bufLen); + this.offset = 0; + this.pos = 0; + this.remain = this.stat.size; + this.length = this.buf.length; + this[READ](); + } + [READ]() { + const { fd, buf, offset, length, pos } = this; + if (fd === undefined || buf === undefined) { + throw new Error('cannot read file without first opening'); + } + fs.read(fd, buf, offset, length, pos, (er, bytesRead) => { + if (er) { + // ignoring the error from close(2) is a bad practice, but at + // this point we already have an error, don't need another one + return this[CLOSE](() => this.emit('error', er)); + } + this[ONREAD](bytesRead); + }); + } + /* c8 ignore start */ + [CLOSE](cb = () => { }) { + /* c8 ignore stop */ + if (this.fd !== undefined) + fs.close(this.fd, cb); + } + [ONREAD](bytesRead) { + if (bytesRead <= 0 && this.remain > 0) { + const er = Object.assign(new Error('encountered unexpected EOF'), { + path: this.absolute, + syscall: 'read', + code: 'EOF', + }); + return this[CLOSE](() => this.emit('error', er)); + } + if (bytesRead > this.remain) { + const er = Object.assign(new Error('did not encounter expected EOF'), { + path: this.absolute, + syscall: 'read', + code: 'EOF', + }); + return this[CLOSE](() => this.emit('error', er)); + } + /* c8 ignore start */ + if (!this.buf) { + throw new Error('should have created buffer prior to reading'); + } + /* c8 ignore stop */ + // null out the rest of the buffer, if we could fit the block padding + // at the end of this loop, we've incremented bytesRead and this.remain + // to be incremented up to the blockRemain level, as if we had expected + // to get a null-padded file, and read it until the end. then we will + // decrement both remain and blockRemain by bytesRead, and know that we + // reached the expected EOF, without any null buffer to append. + if (bytesRead === this.remain) { + for (let i = bytesRead; i < this.length && bytesRead < this.blockRemain; i++) { + this.buf[i + this.offset] = 0; + bytesRead++; + this.remain++; + } + } + const chunk = this.offset === 0 && bytesRead === this.buf.length ? + this.buf + : this.buf.subarray(this.offset, this.offset + bytesRead); + const flushed = this.write(chunk); + if (!flushed) { + this[AWAITDRAIN](() => this[ONDRAIN]()); + } + else { + this[ONDRAIN](); + } + } + [AWAITDRAIN](cb) { + this.once('drain', cb); + } + write(chunk, encoding, cb) { + /* c8 ignore start - just junk to comply with NodeJS.WritableStream */ + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, typeof encoding === 'string' ? encoding : 'utf8'); + } + /* c8 ignore stop */ + if (this.blockRemain < chunk.length) { + const er = Object.assign(new Error('writing more data than expected'), { + path: this.absolute, + }); + return this.emit('error', er); + } + this.remain -= chunk.length; + this.blockRemain -= chunk.length; + this.pos += chunk.length; + this.offset += chunk.length; + return super.write(chunk, null, cb); + } + [ONDRAIN]() { + if (!this.remain) { + if (this.blockRemain) { + super.write(Buffer.alloc(this.blockRemain)); + } + return this[CLOSE](er => er ? this.emit('error', er) : this.end()); + } + /* c8 ignore start */ + if (!this.buf) { + throw new Error('buffer lost somehow in ONDRAIN'); + } + /* c8 ignore stop */ + if (this.offset >= this.length) { + // if we only have a smaller bit left to read, alloc a smaller buffer + // otherwise, keep it the same length it was before. + this.buf = Buffer.allocUnsafe(Math.min(this.blockRemain, this.buf.length)); + this.offset = 0; + } + this.length = this.buf.length - this.offset; + this[READ](); + } +} +export class WriteEntrySync extends WriteEntry { + sync = true; + [LSTAT]() { + this[ONLSTAT](fs.lstatSync(this.absolute)); + } + [SYMLINK]() { + this[ONREADLINK](fs.readlinkSync(this.absolute)); + } + [OPENFILE]() { + this[ONOPENFILE](fs.openSync(this.absolute, 'r')); + } + [READ]() { + let threw = true; + try { + const { fd, buf, offset, length, pos } = this; + /* c8 ignore start */ + if (fd === undefined || buf === undefined) { + throw new Error('fd and buf must be set in READ method'); + } + /* c8 ignore stop */ + const bytesRead = fs.readSync(fd, buf, offset, length, pos); + this[ONREAD](bytesRead); + threw = false; + } + finally { + // ignoring the error from close(2) is a bad practice, but at + // this point we already have an error, don't need another one + if (threw) { + try { + this[CLOSE](() => { }); + } + catch (er) { } + } + } + } + [AWAITDRAIN](cb) { + cb(); + } + /* c8 ignore start */ + [CLOSE](cb = () => { }) { + /* c8 ignore stop */ + if (this.fd !== undefined) + fs.closeSync(this.fd); + cb(); + } +} +export class WriteEntryTar extends Minipass { + blockLen = 0; + blockRemain = 0; + buf = 0; + pos = 0; + remain = 0; + length = 0; + preservePaths; + portable; + strict; + noPax; + noMtime; + readEntry; + type; + prefix; + path; + mode; + uid; + gid; + uname; + gname; + header; + mtime; + atime; + ctime; + linkpath; + size; + onWriteEntry; + warn(code, message, data = {}) { + return warnMethod(this, code, message, data); + } + constructor(readEntry, opt_ = {}) { + const opt = dealias(opt_); + super(); + this.preservePaths = !!opt.preservePaths; + this.portable = !!opt.portable; + this.strict = !!opt.strict; + this.noPax = !!opt.noPax; + this.noMtime = !!opt.noMtime; + this.onWriteEntry = opt.onWriteEntry; + this.readEntry = readEntry; + const { type } = readEntry; + /* c8 ignore start */ + if (type === 'Unsupported') { + throw new Error('writing entry that should be ignored'); + } + /* c8 ignore stop */ + this.type = type; + if (this.type === 'Directory' && this.portable) { + this.noMtime = true; + } + this.prefix = opt.prefix; + this.path = normalizeWindowsPath(readEntry.path); + this.mode = + readEntry.mode !== undefined ? + this[MODE](readEntry.mode) + : undefined; + this.uid = this.portable ? undefined : readEntry.uid; + this.gid = this.portable ? undefined : readEntry.gid; + this.uname = this.portable ? undefined : readEntry.uname; + this.gname = this.portable ? undefined : readEntry.gname; + this.size = readEntry.size; + this.mtime = + this.noMtime ? undefined : opt.mtime || readEntry.mtime; + this.atime = this.portable ? undefined : readEntry.atime; + this.ctime = this.portable ? undefined : readEntry.ctime; + this.linkpath = + readEntry.linkpath !== undefined ? + normalizeWindowsPath(readEntry.linkpath) + : undefined; + if (typeof opt.onwarn === 'function') { + this.on('warn', opt.onwarn); + } + let pathWarn = false; + if (!this.preservePaths) { + const [root, stripped] = stripAbsolutePath(this.path); + if (root && typeof stripped === 'string') { + this.path = stripped; + pathWarn = root; + } + } + this.remain = readEntry.size; + this.blockRemain = readEntry.startBlockSize; + this.onWriteEntry?.(this); + this.header = new Header({ + path: this[PREFIX](this.path), + linkpath: this.type === 'Link' && this.linkpath !== undefined ? + this[PREFIX](this.linkpath) + : this.linkpath, + // only the permissions and setuid/setgid/sticky bitflags + // not the higher-order bits that specify file type + mode: this.mode, + uid: this.portable ? undefined : this.uid, + gid: this.portable ? undefined : this.gid, + size: this.size, + mtime: this.noMtime ? undefined : this.mtime, + type: this.type, + uname: this.portable ? undefined : this.uname, + atime: this.portable ? undefined : this.atime, + ctime: this.portable ? undefined : this.ctime, + }); + if (pathWarn) { + this.warn('TAR_ENTRY_INFO', `stripping ${pathWarn} from absolute path`, { + entry: this, + path: pathWarn + this.path, + }); + } + if (this.header.encode() && !this.noPax) { + super.write(new Pax({ + atime: this.portable ? undefined : this.atime, + ctime: this.portable ? undefined : this.ctime, + gid: this.portable ? undefined : this.gid, + mtime: this.noMtime ? undefined : this.mtime, + path: this[PREFIX](this.path), + linkpath: this.type === 'Link' && this.linkpath !== undefined ? + this[PREFIX](this.linkpath) + : this.linkpath, + size: this.size, + uid: this.portable ? undefined : this.uid, + uname: this.portable ? undefined : this.uname, + dev: this.portable ? undefined : this.readEntry.dev, + ino: this.portable ? undefined : this.readEntry.ino, + nlink: this.portable ? undefined : this.readEntry.nlink, + }).encode()); + } + const b = this.header?.block; + /* c8 ignore start */ + if (!b) + throw new Error('failed to encode header'); + /* c8 ignore stop */ + super.write(b); + readEntry.pipe(this); + } + [PREFIX](path) { + return prefixPath(path, this.prefix); + } + [MODE](mode) { + return modeFix(mode, this.type === 'Directory', this.portable); + } + write(chunk, encoding, cb) { + /* c8 ignore start - just junk to comply with NodeJS.WritableStream */ + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, typeof encoding === 'string' ? encoding : 'utf8'); + } + /* c8 ignore stop */ + const writeLen = chunk.length; + if (writeLen > this.blockRemain) { + throw new Error('writing more to entry than is appropriate'); + } + this.blockRemain -= writeLen; + return super.write(chunk, cb); + } + end(chunk, encoding, cb) { + if (this.blockRemain) { + super.write(Buffer.alloc(this.blockRemain)); + } + /* c8 ignore start - just junk to comply with NodeJS.WritableStream */ + if (typeof chunk === 'function') { + cb = chunk; + encoding = undefined; + chunk = undefined; + } + if (typeof encoding === 'function') { + cb = encoding; + encoding = undefined; + } + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, encoding ?? 'utf8'); + } + if (cb) + this.once('finish', cb); + chunk ? super.end(chunk, cb) : super.end(cb); + /* c8 ignore stop */ + return this; + } +} +const getType = (stat) => stat.isFile() ? 'File' + : stat.isDirectory() ? 'Directory' + : stat.isSymbolicLink() ? 'SymbolicLink' + : 'Unsupported'; +//# sourceMappingURL=write-entry.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/tar/package.json b/deps/npm/node_modules/node-gyp/node_modules/tar/package.json new file mode 100644 index 00000000000000..0283103ee9eaf9 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/tar/package.json @@ -0,0 +1,325 @@ +{ + "author": "Isaac Z. Schlueter", + "name": "tar", + "description": "tar for node", + "version": "7.4.3", + "repository": { + "type": "git", + "url": "https://github.com/isaacs/node-tar.git" + }, + "scripts": { + "genparse": "node scripts/generate-parse-fixtures.js", + "snap": "tap", + "test": "tap", + "pretest": "npm run prepare", + "presnap": "npm run prepare", + "prepare": "tshy", + "preversion": "npm test", + "postversion": "npm publish", + "prepublishOnly": "git push origin --follow-tags", + "format": "prettier --write . --log-level warn", + "typedoc": "typedoc --tsconfig .tshy/esm.json ./src/*.ts" + }, + "dependencies": { + "@isaacs/fs-minipass": "^4.0.0", + "chownr": "^3.0.0", + "minipass": "^7.1.2", + "minizlib": "^3.0.1", + "mkdirp": "^3.0.1", + "yallist": "^5.0.0" + }, + "devDependencies": { + "chmodr": "^1.2.0", + "end-of-stream": "^1.4.3", + "events-to-array": "^2.0.3", + "mutate-fs": "^2.1.1", + "nock": "^13.5.4", + "prettier": "^3.2.5", + "rimraf": "^5.0.5", + "tap": "^18.7.2", + "tshy": "^1.13.1", + "typedoc": "^0.25.13" + }, + "license": "ISC", + "engines": { + "node": ">=18" + }, + "files": [ + "dist" + ], + "tap": { + "coverage-map": "map.js", + "timeout": 0, + "typecheck": true + }, + "prettier": { + "experimentalTernaries": true, + "semi": false, + "printWidth": 70, + "tabWidth": 2, + "useTabs": false, + "singleQuote": true, + "jsxSingleQuote": false, + "bracketSameLine": true, + "arrowParens": "avoid", + "endOfLine": "lf" + }, + "tshy": { + "exports": { + "./package.json": "./package.json", + ".": "./src/index.ts", + "./c": "./src/create.ts", + "./create": "./src/create.ts", + "./replace": "./src/create.ts", + "./r": "./src/create.ts", + "./list": "./src/list.ts", + "./t": "./src/list.ts", + "./update": "./src/update.ts", + "./u": "./src/update.ts", + "./extract": "./src/extract.ts", + "./x": "./src/extract.ts", + "./pack": "./src/pack.ts", + "./unpack": "./src/unpack.ts", + "./parse": "./src/parse.ts", + "./read-entry": "./src/read-entry.ts", + "./write-entry": "./src/write-entry.ts", + "./header": "./src/header.ts", + "./pax": "./src/pax.ts", + "./types": "./src/types.ts" + } + }, + "exports": { + "./package.json": "./package.json", + ".": { + "import": { + "source": "./src/index.ts", + "types": "./dist/esm/index.d.ts", + "default": "./dist/esm/index.js" + }, + "require": { + "source": "./src/index.ts", + "types": "./dist/commonjs/index.d.ts", + "default": "./dist/commonjs/index.js" + } + }, + "./c": { + "import": { + "source": "./src/create.ts", + "types": "./dist/esm/create.d.ts", + "default": "./dist/esm/create.js" + }, + "require": { + "source": "./src/create.ts", + "types": "./dist/commonjs/create.d.ts", + "default": "./dist/commonjs/create.js" + } + }, + "./create": { + "import": { + "source": "./src/create.ts", + "types": "./dist/esm/create.d.ts", + "default": "./dist/esm/create.js" + }, + "require": { + "source": "./src/create.ts", + "types": "./dist/commonjs/create.d.ts", + "default": "./dist/commonjs/create.js" + } + }, + "./replace": { + "import": { + "source": "./src/create.ts", + "types": "./dist/esm/create.d.ts", + "default": "./dist/esm/create.js" + }, + "require": { + "source": "./src/create.ts", + "types": "./dist/commonjs/create.d.ts", + "default": "./dist/commonjs/create.js" + } + }, + "./r": { + "import": { + "source": "./src/create.ts", + "types": "./dist/esm/create.d.ts", + "default": "./dist/esm/create.js" + }, + "require": { + "source": "./src/create.ts", + "types": "./dist/commonjs/create.d.ts", + "default": "./dist/commonjs/create.js" + } + }, + "./list": { + "import": { + "source": "./src/list.ts", + "types": "./dist/esm/list.d.ts", + "default": "./dist/esm/list.js" + }, + "require": { + "source": "./src/list.ts", + "types": "./dist/commonjs/list.d.ts", + "default": "./dist/commonjs/list.js" + } + }, + "./t": { + "import": { + "source": "./src/list.ts", + "types": "./dist/esm/list.d.ts", + "default": "./dist/esm/list.js" + }, + "require": { + "source": "./src/list.ts", + "types": "./dist/commonjs/list.d.ts", + "default": "./dist/commonjs/list.js" + } + }, + "./update": { + "import": { + "source": "./src/update.ts", + "types": "./dist/esm/update.d.ts", + "default": "./dist/esm/update.js" + }, + "require": { + "source": "./src/update.ts", + "types": "./dist/commonjs/update.d.ts", + "default": "./dist/commonjs/update.js" + } + }, + "./u": { + "import": { + "source": "./src/update.ts", + "types": "./dist/esm/update.d.ts", + "default": "./dist/esm/update.js" + }, + "require": { + "source": "./src/update.ts", + "types": "./dist/commonjs/update.d.ts", + "default": "./dist/commonjs/update.js" + } + }, + "./extract": { + "import": { + "source": "./src/extract.ts", + "types": "./dist/esm/extract.d.ts", + "default": "./dist/esm/extract.js" + }, + "require": { + "source": "./src/extract.ts", + "types": "./dist/commonjs/extract.d.ts", + "default": "./dist/commonjs/extract.js" + } + }, + "./x": { + "import": { + "source": "./src/extract.ts", + "types": "./dist/esm/extract.d.ts", + "default": "./dist/esm/extract.js" + }, + "require": { + "source": "./src/extract.ts", + "types": "./dist/commonjs/extract.d.ts", + "default": "./dist/commonjs/extract.js" + } + }, + "./pack": { + "import": { + "source": "./src/pack.ts", + "types": "./dist/esm/pack.d.ts", + "default": "./dist/esm/pack.js" + }, + "require": { + "source": "./src/pack.ts", + "types": "./dist/commonjs/pack.d.ts", + "default": "./dist/commonjs/pack.js" + } + }, + "./unpack": { + "import": { + "source": "./src/unpack.ts", + "types": "./dist/esm/unpack.d.ts", + "default": "./dist/esm/unpack.js" + }, + "require": { + "source": "./src/unpack.ts", + "types": "./dist/commonjs/unpack.d.ts", + "default": "./dist/commonjs/unpack.js" + } + }, + "./parse": { + "import": { + "source": "./src/parse.ts", + "types": "./dist/esm/parse.d.ts", + "default": "./dist/esm/parse.js" + }, + "require": { + "source": "./src/parse.ts", + "types": "./dist/commonjs/parse.d.ts", + "default": "./dist/commonjs/parse.js" + } + }, + "./read-entry": { + "import": { + "source": "./src/read-entry.ts", + "types": "./dist/esm/read-entry.d.ts", + "default": "./dist/esm/read-entry.js" + }, + "require": { + "source": "./src/read-entry.ts", + "types": "./dist/commonjs/read-entry.d.ts", + "default": "./dist/commonjs/read-entry.js" + } + }, + "./write-entry": { + "import": { + "source": "./src/write-entry.ts", + "types": "./dist/esm/write-entry.d.ts", + "default": "./dist/esm/write-entry.js" + }, + "require": { + "source": "./src/write-entry.ts", + "types": "./dist/commonjs/write-entry.d.ts", + "default": "./dist/commonjs/write-entry.js" + } + }, + "./header": { + "import": { + "source": "./src/header.ts", + "types": "./dist/esm/header.d.ts", + "default": "./dist/esm/header.js" + }, + "require": { + "source": "./src/header.ts", + "types": "./dist/commonjs/header.d.ts", + "default": "./dist/commonjs/header.js" + } + }, + "./pax": { + "import": { + "source": "./src/pax.ts", + "types": "./dist/esm/pax.d.ts", + "default": "./dist/esm/pax.js" + }, + "require": { + "source": "./src/pax.ts", + "types": "./dist/commonjs/pax.d.ts", + "default": "./dist/commonjs/pax.js" + } + }, + "./types": { + "import": { + "source": "./src/types.ts", + "types": "./dist/esm/types.d.ts", + "default": "./dist/esm/types.js" + }, + "require": { + "source": "./src/types.ts", + "types": "./dist/commonjs/types.d.ts", + "default": "./dist/commonjs/types.js" + } + } + }, + "type": "module", + "main": "./dist/commonjs/index.js", + "types": "./dist/commonjs/index.d.ts" +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/unique-filename/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/unique-filename/LICENSE deleted file mode 100644 index 69619c125ea7ef..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/unique-filename/LICENSE +++ /dev/null @@ -1,5 +0,0 @@ -Copyright npm, Inc - -Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/unique-filename/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/unique-filename/lib/index.js deleted file mode 100644 index d067d2e709809a..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/unique-filename/lib/index.js +++ /dev/null @@ -1,7 +0,0 @@ -var path = require('path') - -var uniqueSlug = require('unique-slug') - -module.exports = function (filepath, prefix, uniq) { - return path.join(filepath, (prefix ? prefix + '-' : '') + uniqueSlug(uniq)) -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/unique-filename/package.json b/deps/npm/node_modules/node-gyp/node_modules/unique-filename/package.json deleted file mode 100644 index b2fbf0666489a6..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/unique-filename/package.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "name": "unique-filename", - "version": "3.0.0", - "description": "Generate a unique filename for use in temporary directories or caches.", - "main": "lib/index.js", - "scripts": { - "test": "tap", - "lint": "eslint \"**/*.js\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/unique-filename.git" - }, - "keywords": [], - "author": "GitHub Inc.", - "license": "ISC", - "bugs": { - "url": "https://github.com/iarna/unique-filename/issues" - }, - "homepage": "https://github.com/iarna/unique-filename", - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.5.1", - "tap": "^16.3.0" - }, - "dependencies": { - "unique-slug": "^4.0.0" - }, - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.5.1" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/unique-slug/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/unique-slug/LICENSE deleted file mode 100644 index 7953647e7760b8..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/unique-slug/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -The ISC License - -Copyright npm, Inc - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR -IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/unique-slug/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/unique-slug/lib/index.js deleted file mode 100644 index 1bac84d95d7307..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/unique-slug/lib/index.js +++ /dev/null @@ -1,11 +0,0 @@ -'use strict' -var MurmurHash3 = require('imurmurhash') - -module.exports = function (uniq) { - if (uniq) { - var hash = new MurmurHash3(uniq) - return ('00000000' + hash.result().toString(16)).slice(-8) - } else { - return (Math.random().toString(16) + '0000000').slice(2, 10) - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/unique-slug/package.json b/deps/npm/node_modules/node-gyp/node_modules/unique-slug/package.json deleted file mode 100644 index 33732cdbb42859..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/unique-slug/package.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "name": "unique-slug", - "version": "4.0.0", - "description": "Generate a unique character string suitible for use in files and URLs.", - "main": "lib/index.js", - "scripts": { - "test": "tap", - "lint": "eslint \"**/*.js\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "keywords": [], - "author": "GitHub Inc.", - "license": "ISC", - "devDependencies": { - "@npmcli/eslint-config": "^3.1.0", - "@npmcli/template-oss": "4.5.1", - "tap": "^16.3.0" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/unique-slug.git" - }, - "dependencies": { - "imurmurhash": "^0.1.4" - }, - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.5.1" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/which/LICENSE b/deps/npm/node_modules/node-gyp/node_modules/which/LICENSE deleted file mode 100644 index 19129e315fe593..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/which/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -The ISC License - -Copyright (c) Isaac Z. Schlueter and Contributors - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR -IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/node-gyp/node_modules/which/README.md b/deps/npm/node_modules/node-gyp/node_modules/which/README.md deleted file mode 100644 index 942bf1e0932b89..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/which/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# which - -Like the unix `which` utility. - -Finds the first instance of a specified executable in the PATH -environment variable. Does not cache the results, so `hash -r` is not -needed when the PATH changes. - -## USAGE - -```javascript -const which = require('which') - -// async usage -// rejects if not found -const resolved = await which('node') - -// if nothrow option is used, returns null if not found -const resolvedOrNull = await which('node', { nothrow: true }) - -// sync usage -// throws if not found -const resolved = which.sync('node') - -// if nothrow option is used, returns null if not found -const resolvedOrNull = which.sync('node', { nothrow: true }) - -// Pass options to override the PATH and PATHEXT environment vars. -await which('node', { path: someOtherPath, pathExt: somePathExt }) -``` - -## CLI USAGE - -Just like the BSD `which(1)` binary but using `node-which`. - -``` -usage: node-which [-as] program ... -``` - -You can learn more about why the binary is `node-which` and not `which` -[here](https://github.com/npm/node-which/pull/67) - -## OPTIONS - -You may pass an options object as the second argument. - -- `path`: Use instead of the `PATH` environment variable. -- `pathExt`: Use instead of the `PATHEXT` environment variable. -- `all`: Return all matches, instead of just the first one. Note that - this means the function returns an array of strings instead of a - single string. diff --git a/deps/npm/node_modules/node-gyp/node_modules/which/bin/which.js b/deps/npm/node_modules/node-gyp/node_modules/which/bin/which.js deleted file mode 100755 index 6df16f21acf930..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/which/bin/which.js +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env node - -const which = require('../lib') -const argv = process.argv.slice(2) - -const usage = (err) => { - if (err) { - console.error(`which: ${err}`) - } - console.error('usage: which [-as] program ...') - process.exit(1) -} - -if (!argv.length) { - return usage() -} - -let dashdash = false -const [commands, flags] = argv.reduce((acc, arg) => { - if (dashdash || arg === '--') { - dashdash = true - return acc - } - - if (!/^-/.test(arg)) { - acc[0].push(arg) - return acc - } - - for (const flag of arg.slice(1).split('')) { - if (flag === 's') { - acc[1].silent = true - } else if (flag === 'a') { - acc[1].all = true - } else { - usage(`illegal option -- ${flag}`) - } - } - - return acc -}, [[], {}]) - -for (const command of commands) { - try { - const res = which.sync(command, { all: flags.all }) - if (!flags.silent) { - console.log([].concat(res).join('\n')) - } - } catch (err) { - process.exitCode = 1 - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/which/lib/index.js b/deps/npm/node_modules/node-gyp/node_modules/which/lib/index.js deleted file mode 100644 index 2fd358baf888fd..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/which/lib/index.js +++ /dev/null @@ -1,111 +0,0 @@ -const { isexe, sync: isexeSync } = require('isexe') -const { join, delimiter, sep, posix } = require('path') - -const isWindows = process.platform === 'win32' - -// used to check for slashed in commands passed in. always checks for the posix -// seperator on all platforms, and checks for the current separator when not on -// a posix platform. don't use the isWindows check for this since that is mocked -// in tests but we still need the code to actually work when called. that is also -// why it is ignored from coverage. -/* istanbul ignore next */ -const rSlash = new RegExp(`[${posix.sep}${sep === posix.sep ? '' : sep}]`.replace(/(\\)/g, '\\$1')) -const rRel = new RegExp(`^\\.${rSlash.source}`) - -const getNotFoundError = (cmd) => - Object.assign(new Error(`not found: ${cmd}`), { code: 'ENOENT' }) - -const getPathInfo = (cmd, { - path: optPath = process.env.PATH, - pathExt: optPathExt = process.env.PATHEXT, - delimiter: optDelimiter = delimiter, -}) => { - // If it has a slash, then we don't bother searching the pathenv. - // just check the file itself, and that's it. - const pathEnv = cmd.match(rSlash) ? [''] : [ - // windows always checks the cwd first - ...(isWindows ? [process.cwd()] : []), - ...(optPath || /* istanbul ignore next: very unusual */ '').split(optDelimiter), - ] - - if (isWindows) { - const pathExtExe = optPathExt || - ['.EXE', '.CMD', '.BAT', '.COM'].join(optDelimiter) - const pathExt = pathExtExe.split(optDelimiter).flatMap((item) => [item, item.toLowerCase()]) - if (cmd.includes('.') && pathExt[0] !== '') { - pathExt.unshift('') - } - return { pathEnv, pathExt, pathExtExe } - } - - return { pathEnv, pathExt: [''] } -} - -const getPathPart = (raw, cmd) => { - const pathPart = /^".*"$/.test(raw) ? raw.slice(1, -1) : raw - const prefix = !pathPart && rRel.test(cmd) ? cmd.slice(0, 2) : '' - return prefix + join(pathPart, cmd) -} - -const which = async (cmd, opt = {}) => { - const { pathEnv, pathExt, pathExtExe } = getPathInfo(cmd, opt) - const found = [] - - for (const envPart of pathEnv) { - const p = getPathPart(envPart, cmd) - - for (const ext of pathExt) { - const withExt = p + ext - const is = await isexe(withExt, { pathExt: pathExtExe, ignoreErrors: true }) - if (is) { - if (!opt.all) { - return withExt - } - found.push(withExt) - } - } - } - - if (opt.all && found.length) { - return found - } - - if (opt.nothrow) { - return null - } - - throw getNotFoundError(cmd) -} - -const whichSync = (cmd, opt = {}) => { - const { pathEnv, pathExt, pathExtExe } = getPathInfo(cmd, opt) - const found = [] - - for (const pathEnvPart of pathEnv) { - const p = getPathPart(pathEnvPart, cmd) - - for (const ext of pathExt) { - const withExt = p + ext - const is = isexeSync(withExt, { pathExt: pathExtExe, ignoreErrors: true }) - if (is) { - if (!opt.all) { - return withExt - } - found.push(withExt) - } - } - } - - if (opt.all && found.length) { - return found - } - - if (opt.nothrow) { - return null - } - - throw getNotFoundError(cmd) -} - -module.exports = which -which.sync = whichSync diff --git a/deps/npm/node_modules/node-gyp/node_modules/which/package.json b/deps/npm/node_modules/node-gyp/node_modules/which/package.json deleted file mode 100644 index 515bfb22ca0e1e..00000000000000 --- a/deps/npm/node_modules/node-gyp/node_modules/which/package.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "author": "GitHub Inc.", - "name": "which", - "description": "Like which(1) unix command. Find the first instance of an executable in the PATH.", - "version": "4.0.0", - "repository": { - "type": "git", - "url": "https://github.com/npm/node-which.git" - }, - "main": "lib/index.js", - "bin": { - "node-which": "./bin/which.js" - }, - "license": "ISC", - "dependencies": { - "isexe": "^3.1.1" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.18.0", - "tap": "^16.3.0" - }, - "scripts": { - "test": "tap", - "lint": "eslint \"**/*.js\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "files": [ - "bin/", - "lib/" - ], - "tap": { - "check-coverage": true, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "engines": { - "node": "^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "ciVersions": [ - "16.13.0", - "16.x", - "18.0.0", - "18.x" - ], - "version": "4.18.0", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/node-gyp/node_modules/yallist/LICENSE.md b/deps/npm/node_modules/node-gyp/node_modules/yallist/LICENSE.md new file mode 100644 index 00000000000000..881248b6d7f0ca --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/yallist/LICENSE.md @@ -0,0 +1,63 @@ +All packages under `src/` are licensed according to the terms in +their respective `LICENSE` or `LICENSE.md` files. + +The remainder of this project is licensed under the Blue Oak +Model License, as follows: + +----- + +# Blue Oak Model License + +Version 1.0.0 + +## Purpose + +This license gives everyone as much permission to work with +this software as possible, while protecting contributors +from liability. + +## Acceptance + +In order to receive this license, you must agree to its +rules. The rules of this license are both obligations +under that agreement and conditions to your license. +You must not do anything with this software that triggers +a rule that you cannot or will not follow. + +## Copyright + +Each contributor licenses you to do everything with this +software that would otherwise infringe that contributor's +copyright in it. + +## Notices + +You must ensure that everyone who gets a copy of +any part of this software from you, with or without +changes, also gets the text of this license or a link to +. + +## Excuse + +If anyone notifies you in writing that you have not +complied with [Notices](#notices), you can keep your +license by taking all practical steps to comply within 30 +days after the notice. If you do not do so, your license +ends immediately. + +## Patent + +Each contributor licenses you to do everything with this +software that would otherwise infringe any patent claims +they can license or become able to license. + +## Reliability + +No contributor can revoke this license. + +## No Liability + +***As far as the law allows, this software comes as is, +without any warranty or condition, and no contributor +will be liable to anyone for any damages related to this +software or this license, under any kind of legal claim.*** diff --git a/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/commonjs/index.js b/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/commonjs/index.js new file mode 100644 index 00000000000000..c1e1e4741689d9 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/commonjs/index.js @@ -0,0 +1,384 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.Node = exports.Yallist = void 0; +class Yallist { + tail; + head; + length = 0; + static create(list = []) { + return new Yallist(list); + } + constructor(list = []) { + for (const item of list) { + this.push(item); + } + } + *[Symbol.iterator]() { + for (let walker = this.head; walker; walker = walker.next) { + yield walker.value; + } + } + removeNode(node) { + if (node.list !== this) { + throw new Error('removing node which does not belong to this list'); + } + const next = node.next; + const prev = node.prev; + if (next) { + next.prev = prev; + } + if (prev) { + prev.next = next; + } + if (node === this.head) { + this.head = next; + } + if (node === this.tail) { + this.tail = prev; + } + this.length--; + node.next = undefined; + node.prev = undefined; + node.list = undefined; + return next; + } + unshiftNode(node) { + if (node === this.head) { + return; + } + if (node.list) { + node.list.removeNode(node); + } + const head = this.head; + node.list = this; + node.next = head; + if (head) { + head.prev = node; + } + this.head = node; + if (!this.tail) { + this.tail = node; + } + this.length++; + } + pushNode(node) { + if (node === this.tail) { + return; + } + if (node.list) { + node.list.removeNode(node); + } + const tail = this.tail; + node.list = this; + node.prev = tail; + if (tail) { + tail.next = node; + } + this.tail = node; + if (!this.head) { + this.head = node; + } + this.length++; + } + push(...args) { + for (let i = 0, l = args.length; i < l; i++) { + push(this, args[i]); + } + return this.length; + } + unshift(...args) { + for (var i = 0, l = args.length; i < l; i++) { + unshift(this, args[i]); + } + return this.length; + } + pop() { + if (!this.tail) { + return undefined; + } + const res = this.tail.value; + const t = this.tail; + this.tail = this.tail.prev; + if (this.tail) { + this.tail.next = undefined; + } + else { + this.head = undefined; + } + t.list = undefined; + this.length--; + return res; + } + shift() { + if (!this.head) { + return undefined; + } + const res = this.head.value; + const h = this.head; + this.head = this.head.next; + if (this.head) { + this.head.prev = undefined; + } + else { + this.tail = undefined; + } + h.list = undefined; + this.length--; + return res; + } + forEach(fn, thisp) { + thisp = thisp || this; + for (let walker = this.head, i = 0; !!walker; i++) { + fn.call(thisp, walker.value, i, this); + walker = walker.next; + } + } + forEachReverse(fn, thisp) { + thisp = thisp || this; + for (let walker = this.tail, i = this.length - 1; !!walker; i--) { + fn.call(thisp, walker.value, i, this); + walker = walker.prev; + } + } + get(n) { + let i = 0; + let walker = this.head; + for (; !!walker && i < n; i++) { + walker = walker.next; + } + if (i === n && !!walker) { + return walker.value; + } + } + getReverse(n) { + let i = 0; + let walker = this.tail; + for (; !!walker && i < n; i++) { + // abort out of the list early if we hit a cycle + walker = walker.prev; + } + if (i === n && !!walker) { + return walker.value; + } + } + map(fn, thisp) { + thisp = thisp || this; + const res = new Yallist(); + for (let walker = this.head; !!walker;) { + res.push(fn.call(thisp, walker.value, this)); + walker = walker.next; + } + return res; + } + mapReverse(fn, thisp) { + thisp = thisp || this; + var res = new Yallist(); + for (let walker = this.tail; !!walker;) { + res.push(fn.call(thisp, walker.value, this)); + walker = walker.prev; + } + return res; + } + reduce(fn, initial) { + let acc; + let walker = this.head; + if (arguments.length > 1) { + acc = initial; + } + else if (this.head) { + walker = this.head.next; + acc = this.head.value; + } + else { + throw new TypeError('Reduce of empty list with no initial value'); + } + for (var i = 0; !!walker; i++) { + acc = fn(acc, walker.value, i); + walker = walker.next; + } + return acc; + } + reduceReverse(fn, initial) { + let acc; + let walker = this.tail; + if (arguments.length > 1) { + acc = initial; + } + else if (this.tail) { + walker = this.tail.prev; + acc = this.tail.value; + } + else { + throw new TypeError('Reduce of empty list with no initial value'); + } + for (let i = this.length - 1; !!walker; i--) { + acc = fn(acc, walker.value, i); + walker = walker.prev; + } + return acc; + } + toArray() { + const arr = new Array(this.length); + for (let i = 0, walker = this.head; !!walker; i++) { + arr[i] = walker.value; + walker = walker.next; + } + return arr; + } + toArrayReverse() { + const arr = new Array(this.length); + for (let i = 0, walker = this.tail; !!walker; i++) { + arr[i] = walker.value; + walker = walker.prev; + } + return arr; + } + slice(from = 0, to = this.length) { + if (to < 0) { + to += this.length; + } + if (from < 0) { + from += this.length; + } + const ret = new Yallist(); + if (to < from || to < 0) { + return ret; + } + if (from < 0) { + from = 0; + } + if (to > this.length) { + to = this.length; + } + let walker = this.head; + let i = 0; + for (i = 0; !!walker && i < from; i++) { + walker = walker.next; + } + for (; !!walker && i < to; i++, walker = walker.next) { + ret.push(walker.value); + } + return ret; + } + sliceReverse(from = 0, to = this.length) { + if (to < 0) { + to += this.length; + } + if (from < 0) { + from += this.length; + } + const ret = new Yallist(); + if (to < from || to < 0) { + return ret; + } + if (from < 0) { + from = 0; + } + if (to > this.length) { + to = this.length; + } + let i = this.length; + let walker = this.tail; + for (; !!walker && i > to; i--) { + walker = walker.prev; + } + for (; !!walker && i > from; i--, walker = walker.prev) { + ret.push(walker.value); + } + return ret; + } + splice(start, deleteCount = 0, ...nodes) { + if (start > this.length) { + start = this.length - 1; + } + if (start < 0) { + start = this.length + start; + } + let walker = this.head; + for (let i = 0; !!walker && i < start; i++) { + walker = walker.next; + } + const ret = []; + for (let i = 0; !!walker && i < deleteCount; i++) { + ret.push(walker.value); + walker = this.removeNode(walker); + } + if (!walker) { + walker = this.tail; + } + else if (walker !== this.tail) { + walker = walker.prev; + } + for (const v of nodes) { + walker = insertAfter(this, walker, v); + } + return ret; + } + reverse() { + const head = this.head; + const tail = this.tail; + for (let walker = head; !!walker; walker = walker.prev) { + const p = walker.prev; + walker.prev = walker.next; + walker.next = p; + } + this.head = tail; + this.tail = head; + return this; + } +} +exports.Yallist = Yallist; +// insertAfter undefined means "make the node the new head of list" +function insertAfter(self, node, value) { + const prev = node; + const next = node ? node.next : self.head; + const inserted = new Node(value, prev, next, self); + if (inserted.next === undefined) { + self.tail = inserted; + } + if (inserted.prev === undefined) { + self.head = inserted; + } + self.length++; + return inserted; +} +function push(self, item) { + self.tail = new Node(item, self.tail, undefined, self); + if (!self.head) { + self.head = self.tail; + } + self.length++; +} +function unshift(self, item) { + self.head = new Node(item, undefined, self.head, self); + if (!self.tail) { + self.tail = self.head; + } + self.length++; +} +class Node { + list; + next; + prev; + value; + constructor(value, prev, next, list) { + this.list = list; + this.value = value; + if (prev) { + prev.next = this; + this.prev = prev; + } + else { + this.prev = undefined; + } + if (next) { + next.prev = this; + this.next = next; + } + else { + this.next = undefined; + } + } +} +exports.Node = Node; +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/commonjs/package.json b/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/commonjs/package.json new file mode 100644 index 00000000000000..5bbefffbabee39 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/commonjs/package.json @@ -0,0 +1,3 @@ +{ + "type": "commonjs" +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/esm/index.js b/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/esm/index.js new file mode 100644 index 00000000000000..3d81c5113b93a8 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/esm/index.js @@ -0,0 +1,379 @@ +export class Yallist { + tail; + head; + length = 0; + static create(list = []) { + return new Yallist(list); + } + constructor(list = []) { + for (const item of list) { + this.push(item); + } + } + *[Symbol.iterator]() { + for (let walker = this.head; walker; walker = walker.next) { + yield walker.value; + } + } + removeNode(node) { + if (node.list !== this) { + throw new Error('removing node which does not belong to this list'); + } + const next = node.next; + const prev = node.prev; + if (next) { + next.prev = prev; + } + if (prev) { + prev.next = next; + } + if (node === this.head) { + this.head = next; + } + if (node === this.tail) { + this.tail = prev; + } + this.length--; + node.next = undefined; + node.prev = undefined; + node.list = undefined; + return next; + } + unshiftNode(node) { + if (node === this.head) { + return; + } + if (node.list) { + node.list.removeNode(node); + } + const head = this.head; + node.list = this; + node.next = head; + if (head) { + head.prev = node; + } + this.head = node; + if (!this.tail) { + this.tail = node; + } + this.length++; + } + pushNode(node) { + if (node === this.tail) { + return; + } + if (node.list) { + node.list.removeNode(node); + } + const tail = this.tail; + node.list = this; + node.prev = tail; + if (tail) { + tail.next = node; + } + this.tail = node; + if (!this.head) { + this.head = node; + } + this.length++; + } + push(...args) { + for (let i = 0, l = args.length; i < l; i++) { + push(this, args[i]); + } + return this.length; + } + unshift(...args) { + for (var i = 0, l = args.length; i < l; i++) { + unshift(this, args[i]); + } + return this.length; + } + pop() { + if (!this.tail) { + return undefined; + } + const res = this.tail.value; + const t = this.tail; + this.tail = this.tail.prev; + if (this.tail) { + this.tail.next = undefined; + } + else { + this.head = undefined; + } + t.list = undefined; + this.length--; + return res; + } + shift() { + if (!this.head) { + return undefined; + } + const res = this.head.value; + const h = this.head; + this.head = this.head.next; + if (this.head) { + this.head.prev = undefined; + } + else { + this.tail = undefined; + } + h.list = undefined; + this.length--; + return res; + } + forEach(fn, thisp) { + thisp = thisp || this; + for (let walker = this.head, i = 0; !!walker; i++) { + fn.call(thisp, walker.value, i, this); + walker = walker.next; + } + } + forEachReverse(fn, thisp) { + thisp = thisp || this; + for (let walker = this.tail, i = this.length - 1; !!walker; i--) { + fn.call(thisp, walker.value, i, this); + walker = walker.prev; + } + } + get(n) { + let i = 0; + let walker = this.head; + for (; !!walker && i < n; i++) { + walker = walker.next; + } + if (i === n && !!walker) { + return walker.value; + } + } + getReverse(n) { + let i = 0; + let walker = this.tail; + for (; !!walker && i < n; i++) { + // abort out of the list early if we hit a cycle + walker = walker.prev; + } + if (i === n && !!walker) { + return walker.value; + } + } + map(fn, thisp) { + thisp = thisp || this; + const res = new Yallist(); + for (let walker = this.head; !!walker;) { + res.push(fn.call(thisp, walker.value, this)); + walker = walker.next; + } + return res; + } + mapReverse(fn, thisp) { + thisp = thisp || this; + var res = new Yallist(); + for (let walker = this.tail; !!walker;) { + res.push(fn.call(thisp, walker.value, this)); + walker = walker.prev; + } + return res; + } + reduce(fn, initial) { + let acc; + let walker = this.head; + if (arguments.length > 1) { + acc = initial; + } + else if (this.head) { + walker = this.head.next; + acc = this.head.value; + } + else { + throw new TypeError('Reduce of empty list with no initial value'); + } + for (var i = 0; !!walker; i++) { + acc = fn(acc, walker.value, i); + walker = walker.next; + } + return acc; + } + reduceReverse(fn, initial) { + let acc; + let walker = this.tail; + if (arguments.length > 1) { + acc = initial; + } + else if (this.tail) { + walker = this.tail.prev; + acc = this.tail.value; + } + else { + throw new TypeError('Reduce of empty list with no initial value'); + } + for (let i = this.length - 1; !!walker; i--) { + acc = fn(acc, walker.value, i); + walker = walker.prev; + } + return acc; + } + toArray() { + const arr = new Array(this.length); + for (let i = 0, walker = this.head; !!walker; i++) { + arr[i] = walker.value; + walker = walker.next; + } + return arr; + } + toArrayReverse() { + const arr = new Array(this.length); + for (let i = 0, walker = this.tail; !!walker; i++) { + arr[i] = walker.value; + walker = walker.prev; + } + return arr; + } + slice(from = 0, to = this.length) { + if (to < 0) { + to += this.length; + } + if (from < 0) { + from += this.length; + } + const ret = new Yallist(); + if (to < from || to < 0) { + return ret; + } + if (from < 0) { + from = 0; + } + if (to > this.length) { + to = this.length; + } + let walker = this.head; + let i = 0; + for (i = 0; !!walker && i < from; i++) { + walker = walker.next; + } + for (; !!walker && i < to; i++, walker = walker.next) { + ret.push(walker.value); + } + return ret; + } + sliceReverse(from = 0, to = this.length) { + if (to < 0) { + to += this.length; + } + if (from < 0) { + from += this.length; + } + const ret = new Yallist(); + if (to < from || to < 0) { + return ret; + } + if (from < 0) { + from = 0; + } + if (to > this.length) { + to = this.length; + } + let i = this.length; + let walker = this.tail; + for (; !!walker && i > to; i--) { + walker = walker.prev; + } + for (; !!walker && i > from; i--, walker = walker.prev) { + ret.push(walker.value); + } + return ret; + } + splice(start, deleteCount = 0, ...nodes) { + if (start > this.length) { + start = this.length - 1; + } + if (start < 0) { + start = this.length + start; + } + let walker = this.head; + for (let i = 0; !!walker && i < start; i++) { + walker = walker.next; + } + const ret = []; + for (let i = 0; !!walker && i < deleteCount; i++) { + ret.push(walker.value); + walker = this.removeNode(walker); + } + if (!walker) { + walker = this.tail; + } + else if (walker !== this.tail) { + walker = walker.prev; + } + for (const v of nodes) { + walker = insertAfter(this, walker, v); + } + return ret; + } + reverse() { + const head = this.head; + const tail = this.tail; + for (let walker = head; !!walker; walker = walker.prev) { + const p = walker.prev; + walker.prev = walker.next; + walker.next = p; + } + this.head = tail; + this.tail = head; + return this; + } +} +// insertAfter undefined means "make the node the new head of list" +function insertAfter(self, node, value) { + const prev = node; + const next = node ? node.next : self.head; + const inserted = new Node(value, prev, next, self); + if (inserted.next === undefined) { + self.tail = inserted; + } + if (inserted.prev === undefined) { + self.head = inserted; + } + self.length++; + return inserted; +} +function push(self, item) { + self.tail = new Node(item, self.tail, undefined, self); + if (!self.head) { + self.head = self.tail; + } + self.length++; +} +function unshift(self, item) { + self.head = new Node(item, undefined, self.head, self); + if (!self.tail) { + self.tail = self.head; + } + self.length++; +} +export class Node { + list; + next; + prev; + value; + constructor(value, prev, next, list) { + this.list = list; + this.value = value; + if (prev) { + prev.next = this; + this.prev = prev; + } + else { + this.prev = undefined; + } + if (next) { + next.prev = this; + this.next = next; + } + else { + this.next = undefined; + } + } +} +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/esm/package.json b/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/esm/package.json new file mode 100644 index 00000000000000..3dbc1ca591c055 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/yallist/dist/esm/package.json @@ -0,0 +1,3 @@ +{ + "type": "module" +} diff --git a/deps/npm/node_modules/node-gyp/node_modules/yallist/package.json b/deps/npm/node_modules/node-gyp/node_modules/yallist/package.json new file mode 100644 index 00000000000000..2f5247808bbea8 --- /dev/null +++ b/deps/npm/node_modules/node-gyp/node_modules/yallist/package.json @@ -0,0 +1,68 @@ +{ + "name": "yallist", + "version": "5.0.0", + "description": "Yet Another Linked List", + "files": [ + "dist" + ], + "devDependencies": { + "prettier": "^3.2.5", + "tap": "^18.7.2", + "tshy": "^1.13.1", + "typedoc": "^0.25.13" + }, + "scripts": { + "preversion": "npm test", + "postversion": "npm publish", + "prepublishOnly": "git push origin --follow-tags", + "prepare": "tshy", + "pretest": "npm run prepare", + "presnap": "npm run prepare", + "test": "tap", + "snap": "tap", + "format": "prettier --write . --loglevel warn --ignore-path ../../.prettierignore --cache", + "typedoc": "typedoc" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/isaacs/yallist.git" + }, + "author": "Isaac Z. Schlueter (http://blog.izs.me/)", + "license": "BlueOak-1.0.0", + "tshy": { + "exports": { + "./package.json": "./package.json", + ".": "./src/index.ts" + } + }, + "exports": { + "./package.json": "./package.json", + ".": { + "import": { + "types": "./dist/esm/index.d.ts", + "default": "./dist/esm/index.js" + }, + "require": { + "types": "./dist/commonjs/index.d.ts", + "default": "./dist/commonjs/index.js" + } + } + }, + "main": "./dist/commonjs/index.js", + "types": "./dist/commonjs/index.d.ts", + "type": "module", + "prettier": { + "semi": false, + "printWidth": 70, + "tabWidth": 2, + "useTabs": false, + "singleQuote": true, + "jsxSingleQuote": false, + "bracketSameLine": true, + "arrowParens": "avoid", + "endOfLine": "lf" + }, + "engines": { + "node": ">=18" + } +} diff --git a/deps/npm/node_modules/node-gyp/package.json b/deps/npm/node_modules/node-gyp/package.json index 8e2ea42510dd14..4a1cfb0eb1a283 100644 --- a/deps/npm/node_modules/node-gyp/package.json +++ b/deps/npm/node_modules/node-gyp/package.json @@ -11,7 +11,7 @@ "bindings", "gyp" ], - "version": "10.2.0", + "version": "11.0.0", "installVersion": 11, "author": "Nathan Rajlich (http://tootallnate.net)", "repository": { @@ -26,26 +26,27 @@ "exponential-backoff": "^3.1.1", "glob": "^10.3.10", "graceful-fs": "^4.2.6", - "make-fetch-happen": "^13.0.0", - "nopt": "^7.0.0", - "proc-log": "^4.1.0", + "make-fetch-happen": "^14.0.3", + "nopt": "^8.0.0", + "proc-log": "^5.0.0", "semver": "^7.3.5", - "tar": "^6.2.1", - "which": "^4.0.0" + "tar": "^7.4.3", + "which": "^5.0.0" }, "engines": { - "node": "^16.14.0 || >=18.0.0" + "node": "^18.17.0 || >=20.5.0" }, "devDependencies": { "bindings": "^1.5.0", "cross-env": "^7.0.3", - "mocha": "^10.2.0", + "eslint": "^9.16.0", + "mocha": "^11.0.1", "nan": "^2.14.2", - "require-inject": "^1.4.4", - "standard": "^17.0.0" + "neostandard": "^0.11.9", + "require-inject": "^1.4.4" }, "scripts": { - "lint": "standard \"*/*.js\" \"test/**/*.js\" \".github/**/*.js\"", + "lint": "eslint \"*/*.js\" \"test/**/*.js\" \".github/**/*.js\"", "test": "cross-env NODE_GYP_NULL_LOGGER=true mocha --timeout 15000 test/test-download.js test/test-*" } } diff --git a/deps/npm/node_modules/npm-install-checks/lib/current-env.js b/deps/npm/node_modules/npm-install-checks/lib/current-env.js index 9babde1f277ff1..31f154aac59b32 100644 --- a/deps/npm/node_modules/npm-install-checks/lib/current-env.js +++ b/deps/npm/node_modules/npm-install-checks/lib/current-env.js @@ -1,5 +1,6 @@ const process = require('node:process') const nodeOs = require('node:os') +const fs = require('node:fs') function isMusl (file) { return file.includes('libc.musl-') || file.includes('ld-musl-') @@ -13,12 +14,23 @@ function cpu () { return process.arch } -function libc (osName) { - // this is to make it faster on non linux machines - if (osName !== 'linux') { +const LDD_PATH = '/usr/bin/ldd' +function getFamilyFromFilesystem () { + try { + const content = fs.readFileSync(LDD_PATH, 'utf-8') + if (content.includes('musl')) { + return 'musl' + } + if (content.includes('GNU C Library')) { + return 'glibc' + } + return null + } catch { return undefined } - let family +} + +function getFamilyFromReport () { const originalExclude = process.report.excludeNetwork process.report.excludeNetwork = true const report = process.report.getReport() @@ -27,6 +39,22 @@ function libc (osName) { family = 'glibc' } else if (Array.isArray(report.sharedObjects) && report.sharedObjects.some(isMusl)) { family = 'musl' + } else { + family = null + } + return family +} + +let family +function libc (osName) { + if (osName !== 'linux') { + return undefined + } + if (family === undefined) { + family = getFamilyFromFilesystem() + if (family === undefined) { + family = getFamilyFromReport() + } } return family } diff --git a/deps/npm/node_modules/npm-install-checks/package.json b/deps/npm/node_modules/npm-install-checks/package.json index e9e69575a6dc6d..967f5f659b2fac 100644 --- a/deps/npm/node_modules/npm-install-checks/package.json +++ b/deps/npm/node_modules/npm-install-checks/package.json @@ -1,6 +1,6 @@ { "name": "npm-install-checks", - "version": "7.1.0", + "version": "7.1.1", "description": "Check the engines and platform fields in package.json", "main": "lib/index.js", "dependencies": { @@ -8,7 +8,7 @@ }, "devDependencies": { "@npmcli/eslint-config": "^5.0.0", - "@npmcli/template-oss": "4.23.3", + "@npmcli/template-oss": "4.23.4", "tap": "^16.0.1" }, "scripts": { @@ -40,7 +40,7 @@ "author": "GitHub Inc.", "templateOSS": { "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.23.3", + "version": "4.23.4", "publish": "true" }, "tap": { diff --git a/deps/npm/node_modules/npm-registry-fetch/lib/check-response.js b/deps/npm/node_modules/npm-registry-fetch/lib/check-response.js index 65eea2963b0b4c..2f183082ab2ce2 100644 --- a/deps/npm/node_modules/npm-registry-fetch/lib/check-response.js +++ b/deps/npm/node_modules/npm-registry-fetch/lib/check-response.js @@ -48,10 +48,18 @@ function logRequest (method, res, startTime) { const cacheStr = cacheStatus ? ` (cache ${cacheStatus})` : '' const urlStr = cleanUrl(res.url) - log.http( - 'fetch', - `${method.toUpperCase()} ${res.status} ${urlStr} ${elapsedTime}ms${attemptStr}${cacheStr}` - ) + // If make-fetch-happen reports a cache hit, then there was no fetch + if (cacheStatus === 'hit') { + log.http( + 'cache', + `${urlStr} ${elapsedTime}ms${attemptStr}${cacheStr}` + ) + } else { + log.http( + 'fetch', + `${method.toUpperCase()} ${res.status} ${urlStr} ${elapsedTime}ms${attemptStr}${cacheStr}` + ) + } } function checkErrors (method, res, startTime, opts) { diff --git a/deps/npm/node_modules/npm-registry-fetch/package.json b/deps/npm/node_modules/npm-registry-fetch/package.json index 559473b964aaa5..bd7a79d35e26ac 100644 --- a/deps/npm/node_modules/npm-registry-fetch/package.json +++ b/deps/npm/node_modules/npm-registry-fetch/package.json @@ -1,6 +1,6 @@ { "name": "npm-registry-fetch", - "version": "18.0.1", + "version": "18.0.2", "description": "Fetch-based http client for use with npm registry APIs", "main": "lib", "files": [ @@ -42,8 +42,8 @@ }, "devDependencies": { "@npmcli/eslint-config": "^5.0.0", - "@npmcli/template-oss": "4.23.3", - "cacache": "^18.0.0", + "@npmcli/template-oss": "4.23.4", + "cacache": "^19.0.1", "nock": "^13.2.4", "require-inject": "^1.4.4", "ssri": "^12.0.0", @@ -62,7 +62,7 @@ }, "templateOSS": { "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.23.3", + "version": "4.23.4", "publish": "true" } } diff --git a/deps/npm/node_modules/package-json-from-dist/dist/commonjs/index.js b/deps/npm/node_modules/package-json-from-dist/dist/commonjs/index.js index 5cff210d855cb0..b966ac9fef535b 100644 --- a/deps/npm/node_modules/package-json-from-dist/dist/commonjs/index.js +++ b/deps/npm/node_modules/package-json-from-dist/dist/commonjs/index.js @@ -5,6 +5,8 @@ const node_fs_1 = require("node:fs"); const node_path_1 = require("node:path"); const node_url_1 = require("node:url"); const NM = `${node_path_1.sep}node_modules${node_path_1.sep}`; +const STORE = `.store${node_path_1.sep}`; +const PKG = `${node_path_1.sep}package${node_path_1.sep}`; const DIST = `${node_path_1.sep}dist${node_path_1.sep}`; /** * Find the package.json file, either from a TypeScript file somewhere not @@ -59,8 +61,16 @@ const findPackageJson = (from, pathFromSrc = '../package.json') => { // inside of node_modules. find the dist directly under package name. const nm = __dirname.substring(0, nms + NM.length); const pkgDir = __dirname.substring(nms + NM.length); + // affordance for yarn berry, which puts package contents in + // '.../node_modules/.store/${id}-${hash}/package/...' + if (pkgDir.startsWith(STORE)) { + const pkg = pkgDir.indexOf(PKG, STORE.length); + if (pkg) { + return (0, node_path_1.resolve)(nm, pkgDir.substring(0, pkg + PKG.length), 'package.json'); + } + } const pkgName = pkgDir.startsWith('@') ? - pkgDir.split(node_path_1.sep).slice(0, 2).join(node_path_1.sep) + pkgDir.split(node_path_1.sep, 2).join(node_path_1.sep) : String(pkgDir.split(node_path_1.sep)[0]); return (0, node_path_1.resolve)(nm, pkgName, 'package.json'); } diff --git a/deps/npm/node_modules/package-json-from-dist/dist/esm/index.js b/deps/npm/node_modules/package-json-from-dist/dist/esm/index.js index 0627645f9c35a4..426ad3c2d18597 100644 --- a/deps/npm/node_modules/package-json-from-dist/dist/esm/index.js +++ b/deps/npm/node_modules/package-json-from-dist/dist/esm/index.js @@ -2,6 +2,8 @@ import { readFileSync } from 'node:fs'; import { dirname, resolve, sep } from 'node:path'; import { fileURLToPath } from 'node:url'; const NM = `${sep}node_modules${sep}`; +const STORE = `.store${sep}`; +const PKG = `${sep}package${sep}`; const DIST = `${sep}dist${sep}`; /** * Find the package.json file, either from a TypeScript file somewhere not @@ -56,8 +58,16 @@ export const findPackageJson = (from, pathFromSrc = '../package.json') => { // inside of node_modules. find the dist directly under package name. const nm = __dirname.substring(0, nms + NM.length); const pkgDir = __dirname.substring(nms + NM.length); + // affordance for yarn berry, which puts package contents in + // '.../node_modules/.store/${id}-${hash}/package/...' + if (pkgDir.startsWith(STORE)) { + const pkg = pkgDir.indexOf(PKG, STORE.length); + if (pkg) { + return resolve(nm, pkgDir.substring(0, pkg + PKG.length), 'package.json'); + } + } const pkgName = pkgDir.startsWith('@') ? - pkgDir.split(sep).slice(0, 2).join(sep) + pkgDir.split(sep, 2).join(sep) : String(pkgDir.split(sep)[0]); return resolve(nm, pkgName, 'package.json'); } diff --git a/deps/npm/node_modules/package-json-from-dist/package.json b/deps/npm/node_modules/package-json-from-dist/package.json index 2d5526e87b7fa0..a2d03c3269d72d 100644 --- a/deps/npm/node_modules/package-json-from-dist/package.json +++ b/deps/npm/node_modules/package-json-from-dist/package.json @@ -1,6 +1,6 @@ { "name": "package-json-from-dist", - "version": "1.0.0", + "version": "1.0.1", "description": "Load the local package.json from either src or dist folder", "main": "./dist/commonjs/index.js", "exports": { @@ -28,7 +28,7 @@ "presnap": "npm run prepare", "test": "tap", "snap": "tap", - "format": "prettier --write . --loglevel warn --ignore-path ../../.prettierignore --cache", + "format": "prettier --write . --log-level warn", "typedoc": "typedoc" }, "author": "Isaac Z. Schlueter (https://izs.me)", diff --git a/deps/npm/node_modules/pacote/lib/dir.js b/deps/npm/node_modules/pacote/lib/dir.js index f3229b34e463ab..4ae97c216fe64f 100644 --- a/deps/npm/node_modules/pacote/lib/dir.js +++ b/deps/npm/node_modules/pacote/lib/dir.js @@ -39,6 +39,8 @@ class DirFetcher extends Fetcher { const stdio = this.opts.foregroundScripts ? 'inherit' : 'pipe' return runScript({ + // this || undefined is because runScript will be unhappy with the default null value + scriptShell: this.opts.scriptShell || undefined, pkg: mani, event: 'prepare', path: this.resolved, diff --git a/deps/npm/node_modules/pacote/lib/fetcher.js b/deps/npm/node_modules/pacote/lib/fetcher.js index cc2c2db70c697d..f2ac97619d3af1 100644 --- a/deps/npm/node_modules/pacote/lib/fetcher.js +++ b/deps/npm/node_modules/pacote/lib/fetcher.js @@ -188,7 +188,15 @@ class FetcherBase { // private // Note: cacache will raise a EINTEGRITY error if the integrity doesn't match #tarballFromCache () { - return cacache.get.stream.byDigest(this.cache, this.integrity, this.opts) + const startTime = Date.now() + const stream = cacache.get.stream.byDigest(this.cache, this.integrity, this.opts) + const elapsedTime = Date.now() - startTime + // cache is good, so log it as a hit in particular since there was no fetch logged + log.http( + 'cache', + `${this.spec} ${elapsedTime}ms (cache hit)` + ) + return stream } get [_.cacheFetches] () { diff --git a/deps/npm/node_modules/pacote/package.json b/deps/npm/node_modules/pacote/package.json index 0eb8261af96e0c..71c9aa1ce32572 100644 --- a/deps/npm/node_modules/pacote/package.json +++ b/deps/npm/node_modules/pacote/package.json @@ -1,6 +1,6 @@ { "name": "pacote", - "version": "19.0.0", + "version": "19.0.1", "description": "JavaScript package downloader", "author": "GitHub Inc.", "bin": { @@ -59,7 +59,7 @@ "npm-registry-fetch": "^18.0.0", "proc-log": "^5.0.0", "promise-retry": "^2.0.1", - "sigstore": "^2.2.0", + "sigstore": "^3.0.0", "ssri": "^12.0.0", "tar": "^6.1.11" }, diff --git a/deps/npm/node_modules/promise-call-limit/dist/commonjs/index.js b/deps/npm/node_modules/promise-call-limit/dist/commonjs/index.js index 6ce5cfcef9559e..b32a85bb11aa39 100644 --- a/deps/npm/node_modules/promise-call-limit/dist/commonjs/index.js +++ b/deps/npm/node_modules/promise-call-limit/dist/commonjs/index.js @@ -29,8 +29,8 @@ const os = __importStar(require("node:os")); // cpus() cpus() can return an empty list if /proc is not mounted, use 1 in // this case /* c8 ignore start */ -const defLimit = 'availableParallelism' in os - ? Math.max(1, os.availableParallelism() - 1) +const defLimit = 'availableParallelism' in os ? + Math.max(1, os.availableParallelism() - 1) : Math.max(1, os.cpus().length - 1); const callLimit = (queue, { limit = defLimit, rejectLate } = {}) => new Promise((res, rej) => { let active = 0; diff --git a/deps/npm/node_modules/promise-call-limit/dist/esm/index.js b/deps/npm/node_modules/promise-call-limit/dist/esm/index.js index 030099929b3483..fe709db7fc04cc 100644 --- a/deps/npm/node_modules/promise-call-limit/dist/esm/index.js +++ b/deps/npm/node_modules/promise-call-limit/dist/esm/index.js @@ -3,8 +3,8 @@ import * as os from 'node:os'; // cpus() cpus() can return an empty list if /proc is not mounted, use 1 in // this case /* c8 ignore start */ -const defLimit = 'availableParallelism' in os - ? Math.max(1, os.availableParallelism() - 1) +const defLimit = 'availableParallelism' in os ? + Math.max(1, os.availableParallelism() - 1) : Math.max(1, os.cpus().length - 1); export const callLimit = (queue, { limit = defLimit, rejectLate } = {}) => new Promise((res, rej) => { let active = 0; diff --git a/deps/npm/node_modules/promise-call-limit/package.json b/deps/npm/node_modules/promise-call-limit/package.json index a3aa548d6538ac..ab14595366e223 100644 --- a/deps/npm/node_modules/promise-call-limit/package.json +++ b/deps/npm/node_modules/promise-call-limit/package.json @@ -1,6 +1,6 @@ { "name": "promise-call-limit", - "version": "3.0.1", + "version": "3.0.2", "files": [ "dist" ], @@ -18,16 +18,17 @@ "test": "tap", "preversion": "npm test", "postversion": "npm publish", - "prepublishOnly": "git push origin --follow-tags" + "prepublishOnly": "git push origin --follow-tags", + "format": "prettier --write . --log-level warn --cache" }, "devDependencies": { - "prettier": "^3.2.1", - "tap": "^18.6.1", - "tshy": "^1.8.2", - "format": "prettier --write . --loglevel warn --ignore-path ../../.prettierignore --cache", - "typedoc": "typedoc" + "prettier": "^3.3.3", + "tap": "^21.0.1", + "tshy": "^3.0.2", + "typedoc": "^0.26.6" }, "prettier": { + "experimentalTernaries": true, "semi": false, "printWidth": 70, "tabWidth": 2, @@ -62,5 +63,6 @@ }, "main": "./dist/commonjs/index.js", "types": "./dist/commonjs/index.d.ts", - "type": "module" + "type": "module", + "module": "./dist/esm/index.js" } diff --git a/deps/npm/node_modules/sigstore/dist/config.js b/deps/npm/node_modules/sigstore/dist/config.js index b4f0eea74fa4b4..e8b2392f97f236 100644 --- a/deps/npm/node_modules/sigstore/dist/config.js +++ b/deps/npm/node_modules/sigstore/dist/config.js @@ -1,6 +1,9 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.createVerificationPolicy = exports.createKeyFinder = exports.createBundleBuilder = exports.DEFAULT_TIMEOUT = exports.DEFAULT_RETRY = void 0; +exports.DEFAULT_TIMEOUT = exports.DEFAULT_RETRY = void 0; +exports.createBundleBuilder = createBundleBuilder; +exports.createKeyFinder = createKeyFinder; +exports.createVerificationPolicy = createVerificationPolicy; /* Copyright 2023 The Sigstore Authors. @@ -30,10 +33,12 @@ function createBundleBuilder(bundleType, options) { case 'messageSignature': return new sign_1.MessageSignatureBundleBuilder(bundlerOptions); case 'dsseEnvelope': - return new sign_1.DSSEBundleBuilder(bundlerOptions); + return new sign_1.DSSEBundleBuilder({ + ...bundlerOptions, + certificateChain: options.legacyCompatibility, + }); } } -exports.createBundleBuilder = createBundleBuilder; // Translates the public KeySelector type into the KeyFinderFunc type needed by // the verifier. function createKeyFinder(keySelector) { @@ -51,7 +56,6 @@ function createKeyFinder(keySelector) { }; }; } -exports.createKeyFinder = createKeyFinder; function createVerificationPolicy(options) { const policy = {}; const san = options.certificateIdentityEmail || options.certificateIdentityURI; @@ -63,7 +67,6 @@ function createVerificationPolicy(options) { } return policy; } -exports.createVerificationPolicy = createVerificationPolicy; // Instantiate the FulcioSigner based on the supplied options. function initSigner(options) { return new sign_1.FulcioSigner({ @@ -92,6 +95,7 @@ function initWitnesses(options) { if (isRekorEnabled(options)) { witnesses.push(new sign_1.RekorWitness({ rekorBaseURL: options.rekorURL, + entryType: options.legacyCompatibility ? 'intoto' : 'dsse', fetchOnConflict: false, retry: options.retry ?? exports.DEFAULT_RETRY, timeout: options.timeout ?? exports.DEFAULT_TIMEOUT, diff --git a/deps/npm/node_modules/sigstore/dist/sigstore.js b/deps/npm/node_modules/sigstore/dist/sigstore.js index 79d3440670cd50..c45524bbe21c22 100644 --- a/deps/npm/node_modules/sigstore/dist/sigstore.js +++ b/deps/npm/node_modules/sigstore/dist/sigstore.js @@ -23,7 +23,10 @@ var __importStar = (this && this.__importStar) || function (mod) { return result; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.createVerifier = exports.verify = exports.attest = exports.sign = void 0; +exports.sign = sign; +exports.attest = attest; +exports.verify = verify; +exports.createVerifier = createVerifier; /* Copyright 2023 The Sigstore Authors. @@ -50,7 +53,6 @@ options = {}) { const bundle = await bundler.create({ data: payload }); return (0, bundle_1.bundleToJSON)(bundle); } -exports.sign = sign; async function attest(payload, payloadType, /* istanbul ignore next */ options = {}) { @@ -58,7 +60,6 @@ options = {}) { const bundle = await bundler.create({ data: payload, type: payloadType }); return (0, bundle_1.bundleToJSON)(bundle); } -exports.attest = attest; async function verify(bundle, dataOrOptions, options) { let data; if (Buffer.isBuffer(dataOrOptions)) { @@ -69,7 +70,6 @@ async function verify(bundle, dataOrOptions, options) { } return createVerifier(options).then((verifier) => verifier.verify(bundle, data)); } -exports.verify = verify; async function createVerifier( /* istanbul ignore next */ options = {}) { @@ -100,4 +100,3 @@ options = {}) { }, }; } -exports.createVerifier = createVerifier; diff --git a/deps/npm/node_modules/@sigstore/bundle/LICENSE b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/LICENSE similarity index 100% rename from deps/npm/node_modules/@sigstore/bundle/LICENSE rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/LICENSE diff --git a/deps/npm/node_modules/@sigstore/bundle/dist/build.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/build.js similarity index 87% rename from deps/npm/node_modules/@sigstore/bundle/dist/build.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/build.js index 65c71b100ad58f..ade736407554c6 100644 --- a/deps/npm/node_modules/@sigstore/bundle/dist/build.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/build.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.toDSSEBundle = exports.toMessageSignatureBundle = void 0; +exports.toMessageSignatureBundle = toMessageSignatureBundle; +exports.toDSSEBundle = toDSSEBundle; /* Copyright 2023 The Sigstore Authors. @@ -21,9 +22,9 @@ const bundle_1 = require("./bundle"); // Message signature bundle - $case: 'messageSignature' function toMessageSignatureBundle(options) { return { - mediaType: options.singleCertificate - ? bundle_1.BUNDLE_V03_MEDIA_TYPE - : bundle_1.BUNDLE_V02_MEDIA_TYPE, + mediaType: options.certificateChain + ? bundle_1.BUNDLE_V02_MEDIA_TYPE + : bundle_1.BUNDLE_V03_MEDIA_TYPE, content: { $case: 'messageSignature', messageSignature: { @@ -37,13 +38,12 @@ function toMessageSignatureBundle(options) { verificationMaterial: toVerificationMaterial(options), }; } -exports.toMessageSignatureBundle = toMessageSignatureBundle; // DSSE envelope bundle - $case: 'dsseEnvelope' function toDSSEBundle(options) { return { - mediaType: options.singleCertificate - ? bundle_1.BUNDLE_V03_MEDIA_TYPE - : bundle_1.BUNDLE_V02_MEDIA_TYPE, + mediaType: options.certificateChain + ? bundle_1.BUNDLE_V02_MEDIA_TYPE + : bundle_1.BUNDLE_V03_MEDIA_TYPE, content: { $case: 'dsseEnvelope', dsseEnvelope: toEnvelope(options), @@ -51,7 +51,6 @@ function toDSSEBundle(options) { verificationMaterial: toVerificationMaterial(options), }; } -exports.toDSSEBundle = toDSSEBundle; function toEnvelope(options) { return { payloadType: options.artifactType, @@ -75,13 +74,7 @@ function toVerificationMaterial(options) { } function toKeyContent(options) { if (options.certificate) { - if (options.singleCertificate) { - return { - $case: 'certificate', - certificate: { rawBytes: options.certificate }, - }; - } - else { + if (options.certificateChain) { return { $case: 'x509CertificateChain', x509CertificateChain: { @@ -89,6 +82,12 @@ function toKeyContent(options) { }, }; } + else { + return { + $case: 'certificate', + certificate: { rawBytes: options.certificate }, + }; + } } else { return { diff --git a/deps/npm/node_modules/@sigstore/bundle/dist/bundle.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/bundle.js similarity index 79% rename from deps/npm/node_modules/@sigstore/bundle/dist/bundle.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/bundle.js index dbd35df2ca2bb3..eb67a0ddc17bbb 100644 --- a/deps/npm/node_modules/@sigstore/bundle/dist/bundle.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/bundle.js @@ -1,6 +1,10 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.isBundleWithDsseEnvelope = exports.isBundleWithMessageSignature = exports.isBundleWithPublicKey = exports.isBundleWithCertificateChain = exports.BUNDLE_V03_MEDIA_TYPE = exports.BUNDLE_V03_LEGACY_MEDIA_TYPE = exports.BUNDLE_V02_MEDIA_TYPE = exports.BUNDLE_V01_MEDIA_TYPE = void 0; +exports.BUNDLE_V03_MEDIA_TYPE = exports.BUNDLE_V03_LEGACY_MEDIA_TYPE = exports.BUNDLE_V02_MEDIA_TYPE = exports.BUNDLE_V01_MEDIA_TYPE = void 0; +exports.isBundleWithCertificateChain = isBundleWithCertificateChain; +exports.isBundleWithPublicKey = isBundleWithPublicKey; +exports.isBundleWithMessageSignature = isBundleWithMessageSignature; +exports.isBundleWithDsseEnvelope = isBundleWithDsseEnvelope; exports.BUNDLE_V01_MEDIA_TYPE = 'application/vnd.dev.sigstore.bundle+json;version=0.1'; exports.BUNDLE_V02_MEDIA_TYPE = 'application/vnd.dev.sigstore.bundle+json;version=0.2'; exports.BUNDLE_V03_LEGACY_MEDIA_TYPE = 'application/vnd.dev.sigstore.bundle+json;version=0.3'; @@ -9,16 +13,12 @@ exports.BUNDLE_V03_MEDIA_TYPE = 'application/vnd.dev.sigstore.bundle.v0.3+json'; function isBundleWithCertificateChain(b) { return b.verificationMaterial.content.$case === 'x509CertificateChain'; } -exports.isBundleWithCertificateChain = isBundleWithCertificateChain; function isBundleWithPublicKey(b) { return b.verificationMaterial.content.$case === 'publicKey'; } -exports.isBundleWithPublicKey = isBundleWithPublicKey; function isBundleWithMessageSignature(b) { return b.content.$case === 'messageSignature'; } -exports.isBundleWithMessageSignature = isBundleWithMessageSignature; function isBundleWithDsseEnvelope(b) { return b.content.$case === 'dsseEnvelope'; } -exports.isBundleWithDsseEnvelope = isBundleWithDsseEnvelope; diff --git a/deps/npm/node_modules/@sigstore/bundle/dist/error.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/error.js similarity index 100% rename from deps/npm/node_modules/@sigstore/bundle/dist/error.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/error.js diff --git a/deps/npm/node_modules/@sigstore/bundle/dist/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/bundle/dist/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/index.js diff --git a/deps/npm/node_modules/@sigstore/bundle/dist/serialized.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/serialized.js similarity index 100% rename from deps/npm/node_modules/@sigstore/bundle/dist/serialized.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/serialized.js diff --git a/deps/npm/node_modules/@sigstore/bundle/dist/utility.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/utility.js similarity index 100% rename from deps/npm/node_modules/@sigstore/bundle/dist/utility.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/utility.js diff --git a/deps/npm/node_modules/@sigstore/bundle/dist/validate.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/validate.js similarity index 98% rename from deps/npm/node_modules/@sigstore/bundle/dist/validate.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/validate.js index 67079cd1f680a9..21b8b5ee293ba1 100644 --- a/deps/npm/node_modules/@sigstore/bundle/dist/validate.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/dist/validate.js @@ -1,6 +1,10 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.assertBundleLatest = exports.assertBundleV02 = exports.isBundleV01 = exports.assertBundleV01 = exports.assertBundle = void 0; +exports.assertBundle = assertBundle; +exports.assertBundleV01 = assertBundleV01; +exports.isBundleV01 = isBundleV01; +exports.assertBundleV02 = assertBundleV02; +exports.assertBundleLatest = assertBundleLatest; /* Copyright 2023 The Sigstore Authors. @@ -27,7 +31,6 @@ function assertBundle(b) { throw new error_1.ValidationError('invalid bundle', invalidValues); } } -exports.assertBundle = assertBundle; // Asserts that the given bundle conforms to the v0.1 bundle format. function assertBundleV01(b) { const invalidValues = []; @@ -37,7 +40,6 @@ function assertBundleV01(b) { throw new error_1.ValidationError('invalid v0.1 bundle', invalidValues); } } -exports.assertBundleV01 = assertBundleV01; // Type guard to determine if Bundle is a v0.1 bundle. function isBundleV01(b) { try { @@ -48,7 +50,6 @@ function isBundleV01(b) { return false; } } -exports.isBundleV01 = isBundleV01; // Asserts that the given bundle conforms to the v0.2 bundle format. function assertBundleV02(b) { const invalidValues = []; @@ -58,7 +59,6 @@ function assertBundleV02(b) { throw new error_1.ValidationError('invalid v0.2 bundle', invalidValues); } } -exports.assertBundleV02 = assertBundleV02; // Asserts that the given bundle conforms to the newest (0.3) bundle format. function assertBundleLatest(b) { const invalidValues = []; @@ -69,7 +69,6 @@ function assertBundleLatest(b) { throw new error_1.ValidationError('invalid bundle', invalidValues); } } -exports.assertBundleLatest = assertBundleLatest; function validateBundleBase(b) { const invalidValues = []; // Media type validation @@ -192,6 +191,7 @@ function validateInclusionProof(b) { // Necessary for V03 and later bundles function validateNoCertificateChain(b) { const invalidValues = []; + /* istanbul ignore next */ if (b.verificationMaterial?.content?.$case === 'x509CertificateChain') { invalidValues.push('verificationMaterial.content.$case'); } diff --git a/deps/npm/node_modules/@sigstore/bundle/package.json b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/package.json similarity index 92% rename from deps/npm/node_modules/@sigstore/bundle/package.json rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/package.json index dd853897226d2f..ee5d2b92b801a5 100644 --- a/deps/npm/node_modules/@sigstore/bundle/package.json +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/bundle/package.json @@ -1,6 +1,6 @@ { "name": "@sigstore/bundle", - "version": "2.3.2", + "version": "3.0.0", "description": "Sigstore bundle type", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -30,6 +30,6 @@ "@sigstore/protobuf-specs": "^0.3.2" }, "engines": { - "node": "^16.14.0 || >=18.0.0" + "node": "^18.17.0 || >=20.5.0" } } diff --git a/deps/npm/node_modules/@sigstore/core/LICENSE b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/LICENSE similarity index 100% rename from deps/npm/node_modules/@sigstore/core/LICENSE rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/LICENSE diff --git a/deps/npm/node_modules/@sigstore/core/dist/asn1/error.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/error.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/asn1/error.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/error.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/asn1/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/asn1/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/index.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/asn1/length.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/length.js similarity index 97% rename from deps/npm/node_modules/@sigstore/core/dist/asn1/length.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/length.js index 36fdaf5b9777fd..cb7ebf09dbefa4 100644 --- a/deps/npm/node_modules/@sigstore/core/dist/asn1/length.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/length.js @@ -15,7 +15,8 @@ See the License for the specific language governing permissions and limitations under the License. */ Object.defineProperty(exports, "__esModule", { value: true }); -exports.encodeLength = exports.decodeLength = void 0; +exports.decodeLength = decodeLength; +exports.encodeLength = encodeLength; const error_1 = require("./error"); // Decodes the length of a DER-encoded ANS.1 element from the supplied stream. // https://learn.microsoft.com/en-us/windows/win32/seccertenroll/about-encoded-length-and-value-bytes @@ -44,7 +45,6 @@ function decodeLength(stream) { } return len; } -exports.decodeLength = decodeLength; // Translates the supplied value to a DER-encoded length. function encodeLength(len) { if (len < 128) { @@ -60,4 +60,3 @@ function encodeLength(len) { } return Buffer.from([0x80 | bytes.length, ...bytes]); } -exports.encodeLength = encodeLength; diff --git a/deps/npm/node_modules/@sigstore/core/dist/asn1/obj.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/obj.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/asn1/obj.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/obj.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/asn1/parse.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/parse.js similarity index 96% rename from deps/npm/node_modules/@sigstore/core/dist/asn1/parse.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/parse.js index 482c7239e83162..7fbb42632c60e8 100644 --- a/deps/npm/node_modules/@sigstore/core/dist/asn1/parse.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/parse.js @@ -1,6 +1,11 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.parseBitString = exports.parseBoolean = exports.parseOID = exports.parseTime = exports.parseStringASCII = exports.parseInteger = void 0; +exports.parseInteger = parseInteger; +exports.parseStringASCII = parseStringASCII; +exports.parseTime = parseTime; +exports.parseOID = parseOID; +exports.parseBoolean = parseBoolean; +exports.parseBitString = parseBitString; /* Copyright 2023 The Sigstore Authors. @@ -43,13 +48,11 @@ function parseInteger(buf) { } return n; } -exports.parseInteger = parseInteger; // Parse an ASCII string from the DER-encoded buffer // https://learn.microsoft.com/en-us/windows/win32/seccertenroll/about-basic-types#boolean function parseStringASCII(buf) { return buf.toString('ascii'); } -exports.parseStringASCII = parseStringASCII; // Parse a Date from the DER-encoded buffer // https://www.rfc-editor.org/rfc/rfc5280#section-4.1.2.5.1 function parseTime(buf, shortYear) { @@ -70,7 +73,6 @@ function parseTime(buf, shortYear) { // Translate to ISO8601 format and parse return new Date(`${m[1]}-${m[2]}-${m[3]}T${m[4]}:${m[5]}:${m[6]}Z`); } -exports.parseTime = parseTime; // Parse an OID from the DER-encoded buffer // https://learn.microsoft.com/en-us/windows/win32/seccertenroll/about-object-identifier function parseOID(buf) { @@ -95,13 +97,11 @@ function parseOID(buf) { } return oid; } -exports.parseOID = parseOID; // Parse a boolean from the DER-encoded buffer // https://learn.microsoft.com/en-us/windows/win32/seccertenroll/about-basic-types#boolean function parseBoolean(buf) { return buf[0] !== 0; } -exports.parseBoolean = parseBoolean; // Parse a bit string from the DER-encoded buffer // https://learn.microsoft.com/en-us/windows/win32/seccertenroll/about-bit-string function parseBitString(buf) { @@ -122,4 +122,3 @@ function parseBitString(buf) { } return bits; } -exports.parseBitString = parseBitString; diff --git a/deps/npm/node_modules/@sigstore/core/dist/asn1/tag.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/tag.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/asn1/tag.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/asn1/tag.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/crypto.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/crypto.js similarity index 83% rename from deps/npm/node_modules/@sigstore/core/dist/crypto.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/crypto.js index dbe65b165d3574..296b5ba43e86a0 100644 --- a/deps/npm/node_modules/@sigstore/core/dist/crypto.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/crypto.js @@ -3,7 +3,10 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.bufferEqual = exports.verify = exports.hash = exports.digest = exports.createPublicKey = void 0; +exports.createPublicKey = createPublicKey; +exports.digest = digest; +exports.verify = verify; +exports.bufferEqual = bufferEqual; /* Copyright 2023 The Sigstore Authors. @@ -20,7 +23,6 @@ See the License for the specific language governing permissions and limitations under the License. */ const crypto_1 = __importDefault(require("crypto")); -const SHA256_ALGORITHM = 'sha256'; function createPublicKey(key, type = 'spki') { if (typeof key === 'string') { return crypto_1.default.createPublicKey(key); @@ -29,7 +31,6 @@ function createPublicKey(key, type = 'spki') { return crypto_1.default.createPublicKey({ key, format: 'der', type: type }); } } -exports.createPublicKey = createPublicKey; function digest(algorithm, ...data) { const hash = crypto_1.default.createHash(algorithm); for (const d of data) { @@ -37,16 +38,6 @@ function digest(algorithm, ...data) { } return hash.digest(); } -exports.digest = digest; -// TODO: deprecate this in favor of digest() -function hash(...data) { - const hash = crypto_1.default.createHash(SHA256_ALGORITHM); - for (const d of data) { - hash.update(d); - } - return hash.digest(); -} -exports.hash = hash; function verify(data, key, signature, algorithm) { // The try/catch is to work around an issue in Node 14.x where verify throws // an error in some scenarios if the signature is invalid. @@ -58,7 +49,6 @@ function verify(data, key, signature, algorithm) { return false; } } -exports.verify = verify; function bufferEqual(a, b) { try { return crypto_1.default.timingSafeEqual(a, b); @@ -68,4 +58,3 @@ function bufferEqual(a, b) { return false; } } -exports.bufferEqual = bufferEqual; diff --git a/deps/npm/node_modules/@sigstore/core/dist/dsse.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/dsse.js similarity index 96% rename from deps/npm/node_modules/@sigstore/core/dist/dsse.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/dsse.js index a78783c919a256..ca7b63630e2ba9 100644 --- a/deps/npm/node_modules/@sigstore/core/dist/dsse.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/dsse.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.preAuthEncoding = void 0; +exports.preAuthEncoding = preAuthEncoding; /* Copyright 2023 The Sigstore Authors. @@ -28,4 +28,3 @@ function preAuthEncoding(payloadType, payload) { ].join(' '); return Buffer.concat([Buffer.from(prefix, 'ascii'), payload]); } -exports.preAuthEncoding = preAuthEncoding; diff --git a/deps/npm/node_modules/@sigstore/core/dist/encoding.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/encoding.js similarity index 94% rename from deps/npm/node_modules/@sigstore/core/dist/encoding.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/encoding.js index b020ac4d6ecd42..7113af66db4c2d 100644 --- a/deps/npm/node_modules/@sigstore/core/dist/encoding.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/encoding.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.base64Decode = exports.base64Encode = void 0; +exports.base64Encode = base64Encode; +exports.base64Decode = base64Decode; /* Copyright 2023 The Sigstore Authors. @@ -21,8 +22,6 @@ const UTF8_ENCODING = 'utf-8'; function base64Encode(str) { return Buffer.from(str, UTF8_ENCODING).toString(BASE64_ENCODING); } -exports.base64Encode = base64Encode; function base64Decode(str) { return Buffer.from(str, BASE64_ENCODING).toString(UTF8_ENCODING); } -exports.base64Decode = base64Decode; diff --git a/deps/npm/node_modules/@sigstore/core/dist/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/index.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/json.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/json.js similarity index 98% rename from deps/npm/node_modules/@sigstore/core/dist/json.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/json.js index a50df7233c7c58..7808d033b98cc9 100644 --- a/deps/npm/node_modules/@sigstore/core/dist/json.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/json.js @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. */ Object.defineProperty(exports, "__esModule", { value: true }); -exports.canonicalize = void 0; +exports.canonicalize = canonicalize; // JSON canonicalization per https://github.com/cyberphone/json-canonicalization // eslint-disable-next-line @typescript-eslint/no-explicit-any function canonicalize(object) { @@ -58,4 +58,3 @@ function canonicalize(object) { } return buffer; } -exports.canonicalize = canonicalize; diff --git a/deps/npm/node_modules/@sigstore/core/dist/oid.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/oid.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/oid.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/oid.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/pem.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/pem.js similarity index 97% rename from deps/npm/node_modules/@sigstore/core/dist/pem.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/pem.js index f35bc3835bbd10..f1241d28d586ec 100644 --- a/deps/npm/node_modules/@sigstore/core/dist/pem.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/pem.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.fromDER = exports.toDER = void 0; +exports.toDER = toDER; +exports.fromDER = fromDER; /* Copyright 2023 The Sigstore Authors. @@ -28,7 +29,6 @@ function toDER(certificate) { }); return Buffer.from(der, 'base64'); } -exports.toDER = toDER; // Translates a DER-encoded buffer into a PEM-encoded string. Standard PEM // encoding dictates that each certificate should have a trailing newline after // the footer. @@ -41,4 +41,3 @@ function fromDER(certificate, type = 'CERTIFICATE') { .join('\n') .concat('\n'); } -exports.fromDER = fromDER; diff --git a/deps/npm/node_modules/@sigstore/core/dist/rfc3161/error.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/rfc3161/error.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/rfc3161/error.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/rfc3161/error.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/rfc3161/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/rfc3161/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/rfc3161/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/rfc3161/index.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/rfc3161/timestamp.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/rfc3161/timestamp.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/rfc3161/timestamp.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/rfc3161/timestamp.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/rfc3161/tstinfo.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/rfc3161/tstinfo.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/rfc3161/tstinfo.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/rfc3161/tstinfo.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/stream.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/stream.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/stream.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/stream.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/x509/cert.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/x509/cert.js similarity index 96% rename from deps/npm/node_modules/@sigstore/core/dist/x509/cert.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/x509/cert.js index 16c0c40d858d8a..72ea8e0738bc83 100644 --- a/deps/npm/node_modules/@sigstore/core/dist/x509/cert.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/x509/cert.js @@ -97,13 +97,15 @@ class X509Certificate { } get subjectAltName() { const ext = this.extSubjectAltName; - return ext?.uri || ext?.rfc822Name; + return ext?.uri || /* istanbul ignore next */ ext?.rfc822Name; } get extensions() { // The extension list is the first (and only) element of the extensions // context specific tag + /* istanbul ignore next */ const extSeq = this.extensionsObj?.subs[0]; - return extSeq?.subs || /* istanbul ignore next */ []; + /* istanbul ignore next */ + return extSeq?.subs || []; } get extKeyUsage() { const ext = this.findExtension(EXTENSION_OID_KEY_USAGE); @@ -135,8 +137,10 @@ class X509Certificate { const ca = this.extBasicConstraints?.isCA || false; // If the KeyUsage extension is present, keyCertSign must be set if (this.extKeyUsage) { - ca && this.extKeyUsage.keyCertSign; + return ca && this.extKeyUsage.keyCertSign; } + // TODO: test coverage for this case + /* istanbul ignore next */ return ca; } extension(oid) { diff --git a/deps/npm/node_modules/@sigstore/core/dist/x509/ext.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/x509/ext.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/x509/ext.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/x509/ext.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/x509/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/x509/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/x509/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/x509/index.js diff --git a/deps/npm/node_modules/@sigstore/core/dist/x509/sct.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/x509/sct.js similarity index 100% rename from deps/npm/node_modules/@sigstore/core/dist/x509/sct.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/dist/x509/sct.js diff --git a/deps/npm/node_modules/@sigstore/core/package.json b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/package.json similarity index 92% rename from deps/npm/node_modules/@sigstore/core/package.json rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/core/package.json index 621ff1715bcd1c..af5dd281ac90e4 100644 --- a/deps/npm/node_modules/@sigstore/core/package.json +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/core/package.json @@ -1,6 +1,6 @@ { "name": "@sigstore/core", - "version": "1.1.0", + "version": "2.0.0", "description": "Base library for Sigstore", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -26,6 +26,6 @@ "provenance": true }, "engines": { - "node": "^16.14.0 || >=18.0.0" + "node": "^18.17.0 || >=20.5.0" } } diff --git a/deps/npm/node_modules/@sigstore/sign/LICENSE b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/LICENSE similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/LICENSE rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/LICENSE diff --git a/deps/npm/node_modules/@sigstore/sign/dist/bundler/base.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/base.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/bundler/base.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/base.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/bundler/bundle.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/bundle.js similarity index 93% rename from deps/npm/node_modules/@sigstore/sign/dist/bundler/bundle.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/bundle.js index 7c2ca9164f0dfe..ed32286ad88efd 100644 --- a/deps/npm/node_modules/@sigstore/sign/dist/bundler/bundle.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/bundle.js @@ -23,7 +23,8 @@ var __importStar = (this && this.__importStar) || function (mod) { return result; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.toDSSEBundle = exports.toMessageSignatureBundle = void 0; +exports.toMessageSignatureBundle = toMessageSignatureBundle; +exports.toDSSEBundle = toDSSEBundle; /* Copyright 2023 The Sigstore Authors. @@ -44,7 +45,7 @@ const util_1 = require("../util"); // Helper functions for assembling the parts of a Sigstore bundle // Message signature bundle - $case: 'messageSignature' function toMessageSignatureBundle(artifact, signature) { - const digest = util_1.crypto.hash(artifact.data); + const digest = util_1.crypto.digest('sha256', artifact.data); return sigstore.toMessageSignatureBundle({ digest, signature: signature.signature, @@ -52,11 +53,11 @@ function toMessageSignatureBundle(artifact, signature) { ? util_1.pem.toDER(signature.key.certificate) : undefined, keyHint: signature.key.$case === 'publicKey' ? signature.key.hint : undefined, + certificateChain: true, }); } -exports.toMessageSignatureBundle = toMessageSignatureBundle; // DSSE envelope bundle - $case: 'dsseEnvelope' -function toDSSEBundle(artifact, signature, singleCertificate) { +function toDSSEBundle(artifact, signature, certificateChain) { return sigstore.toDSSEBundle({ artifact: artifact.data, artifactType: artifact.type, @@ -65,7 +66,6 @@ function toDSSEBundle(artifact, signature, singleCertificate) { ? util_1.pem.toDER(signature.key.certificate) : undefined, keyHint: signature.key.$case === 'publicKey' ? signature.key.hint : undefined, - singleCertificate, + certificateChain, }); } -exports.toDSSEBundle = toDSSEBundle; diff --git a/deps/npm/node_modules/@sigstore/sign/dist/bundler/dsse.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/dsse.js similarity index 93% rename from deps/npm/node_modules/@sigstore/sign/dist/bundler/dsse.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/dsse.js index 621700df93842a..86046ba8f3013b 100644 --- a/deps/npm/node_modules/@sigstore/sign/dist/bundler/dsse.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/dsse.js @@ -23,7 +23,7 @@ const bundle_1 = require("./bundle"); class DSSEBundleBuilder extends base_1.BaseBundleBuilder { constructor(options) { super(options); - this.singleCertificate = options.singleCertificate ?? false; + this.certificateChain = options.certificateChain ?? false; } // DSSE requires the artifact to be pre-encoded with the payload type // before the signature is generated. @@ -33,7 +33,7 @@ class DSSEBundleBuilder extends base_1.BaseBundleBuilder { } // Packages the artifact and signature into a DSSE bundle async package(artifact, signature) { - return (0, bundle_1.toDSSEBundle)(artifactDefaults(artifact), signature, this.singleCertificate); + return (0, bundle_1.toDSSEBundle)(artifactDefaults(artifact), signature, this.certificateChain); } } exports.DSSEBundleBuilder = DSSEBundleBuilder; diff --git a/deps/npm/node_modules/@sigstore/sign/dist/bundler/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/bundler/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/index.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/bundler/message.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/message.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/bundler/message.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/bundler/message.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/error.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/error.js similarity index 95% rename from deps/npm/node_modules/@sigstore/sign/dist/error.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/error.js index d57e4567fb89ee..d28f1913cc77e9 100644 --- a/deps/npm/node_modules/@sigstore/sign/dist/error.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/error.js @@ -15,7 +15,8 @@ See the License for the specific language governing permissions and limitations under the License. */ Object.defineProperty(exports, "__esModule", { value: true }); -exports.internalError = exports.InternalError = void 0; +exports.InternalError = void 0; +exports.internalError = internalError; const error_1 = require("./external/error"); class InternalError extends Error { constructor({ code, message, cause, }) { @@ -36,4 +37,3 @@ function internalError(err, code, message) { cause: err, }); } -exports.internalError = internalError; diff --git a/deps/npm/node_modules/@sigstore/sign/dist/external/error.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/error.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/external/error.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/error.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/external/fetch.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/fetch.js similarity index 95% rename from deps/npm/node_modules/@sigstore/sign/dist/external/fetch.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/fetch.js index b2d81bde7be16f..116090f3c641ef 100644 --- a/deps/npm/node_modules/@sigstore/sign/dist/external/fetch.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/fetch.js @@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.fetchWithRetry = void 0; +exports.fetchWithRetry = fetchWithRetry; /* Copyright 2023 The Sigstore Authors. @@ -58,14 +58,13 @@ async function fetchWithRetry(url, options) { } }, retryOpts(options.retry)); } -exports.fetchWithRetry = fetchWithRetry; // Translate a Response into an HTTPError instance. This will attempt to parse // the response body for a message, but will default to the statusText if none // is found. const errorFromResponse = async (response) => { let message = response.statusText; - const location = response.headers?.get(HTTP2_HEADER_LOCATION) || undefined; - const contentType = response.headers?.get(HTTP2_HEADER_CONTENT_TYPE); + const location = response.headers.get(HTTP2_HEADER_LOCATION) || undefined; + const contentType = response.headers.get(HTTP2_HEADER_CONTENT_TYPE); // If response type is JSON, try to parse the body for a message if (contentType?.includes('application/json')) { try { diff --git a/deps/npm/node_modules/@sigstore/sign/dist/external/fulcio.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/fulcio.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/external/fulcio.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/fulcio.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/external/rekor.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/rekor.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/external/rekor.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/rekor.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/external/tsa.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/tsa.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/external/tsa.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/external/tsa.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/identity/ci.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/identity/ci.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/identity/ci.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/identity/ci.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/identity/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/identity/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/identity/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/identity/index.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/identity/provider.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/identity/provider.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/identity/provider.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/identity/provider.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/index.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/signer/fulcio/ca.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/fulcio/ca.js similarity index 96% rename from deps/npm/node_modules/@sigstore/sign/dist/signer/fulcio/ca.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/fulcio/ca.js index 81b421eabadb2e..f01703cfab5645 100644 --- a/deps/npm/node_modules/@sigstore/sign/dist/signer/fulcio/ca.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/fulcio/ca.js @@ -35,7 +35,6 @@ class CAClient { const cert = resp.signedCertificateEmbeddedSct ? resp.signedCertificateEmbeddedSct : resp.signedCertificateDetachedSct; - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion return cert.chain.certificates; } catch (err) { diff --git a/deps/npm/node_modules/@sigstore/sign/dist/signer/fulcio/ephemeral.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/fulcio/ephemeral.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/signer/fulcio/ephemeral.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/fulcio/ephemeral.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/signer/fulcio/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/fulcio/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/signer/fulcio/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/fulcio/index.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/signer/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/signer/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/index.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/signer/signer.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/signer.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/signer/signer.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/signer/signer.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/types/fetch.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/types/fetch.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/types/fetch.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/types/fetch.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/util/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/util/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/util/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/util/index.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/util/oidc.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/util/oidc.js similarity index 96% rename from deps/npm/node_modules/@sigstore/sign/dist/util/oidc.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/util/oidc.js index 2f5947d7b6b878..37c5b168ee12e6 100644 --- a/deps/npm/node_modules/@sigstore/sign/dist/util/oidc.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/util/oidc.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.extractJWTSubject = void 0; +exports.extractJWTSubject = extractJWTSubject; /* Copyright 2023 The Sigstore Authors. @@ -28,4 +28,3 @@ function extractJWTSubject(jwt) { return payload.sub; } } -exports.extractJWTSubject = extractJWTSubject; diff --git a/deps/npm/node_modules/@sigstore/sign/dist/util/ua.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/util/ua.js similarity index 95% rename from deps/npm/node_modules/@sigstore/sign/dist/util/ua.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/util/ua.js index c142330eb8338c..b15ff2070fb9fc 100644 --- a/deps/npm/node_modules/@sigstore/sign/dist/util/ua.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/util/ua.js @@ -23,7 +23,6 @@ const os_1 = __importDefault(require("os")); // Format User-Agent: / () // source: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent const getUserAgent = () => { - // eslint-disable-next-line @typescript-eslint/no-var-requires const packageVersion = require('../../package.json').version; const nodeVersion = process.version; const platformName = os_1.default.platform(); diff --git a/deps/npm/node_modules/@sigstore/sign/dist/witness/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/witness/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/index.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/witness/tlog/client.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tlog/client.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/witness/tlog/client.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tlog/client.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/witness/tlog/entry.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tlog/entry.js similarity index 87% rename from deps/npm/node_modules/@sigstore/sign/dist/witness/tlog/entry.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tlog/entry.js index f6c165380ba45d..69a3b477e54429 100644 --- a/deps/npm/node_modules/@sigstore/sign/dist/witness/tlog/entry.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tlog/entry.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.toProposedEntry = void 0; +exports.toProposedEntry = toProposedEntry; /* Copyright 2023 The Sigstore Authors. @@ -18,21 +18,21 @@ limitations under the License. */ const bundle_1 = require("@sigstore/bundle"); const util_1 = require("../../util"); +const SHA256_ALGORITHM = 'sha256'; function toProposedEntry(content, publicKey, // TODO: Remove this parameter once have completely switched to 'dsse' entries -entryType = 'intoto') { +entryType = 'dsse') { switch (content.$case) { case 'dsseEnvelope': - // TODO: Remove this conditional once have completely switched to 'dsse' entries - if (entryType === 'dsse') { - return toProposedDSSEEntry(content.dsseEnvelope, publicKey); + // TODO: Remove this conditional once have completely ditched "intoto" entries + if (entryType === 'intoto') { + return toProposedIntotoEntry(content.dsseEnvelope, publicKey); } - return toProposedIntotoEntry(content.dsseEnvelope, publicKey); + return toProposedDSSEEntry(content.dsseEnvelope, publicKey); case 'messageSignature': return toProposedHashedRekordEntry(content.messageSignature, publicKey); } } -exports.toProposedEntry = toProposedEntry; // Returns a properly formatted Rekor "hashedrekord" entry for the given digest // and signature function toProposedHashedRekordEntry(messageSignature, publicKey) { @@ -45,7 +45,7 @@ function toProposedHashedRekordEntry(messageSignature, publicKey) { spec: { data: { hash: { - algorithm: 'sha256', + algorithm: SHA256_ALGORITHM, value: hexDigest, }, }, @@ -78,7 +78,9 @@ function toProposedDSSEEntry(envelope, publicKey) { // envelope and signature function toProposedIntotoEntry(envelope, publicKey) { // Calculate the value for the payloadHash field in the Rekor entry - const payloadHash = util_1.crypto.hash(envelope.payload).toString('hex'); + const payloadHash = util_1.crypto + .digest(SHA256_ALGORITHM, envelope.payload) + .toString('hex'); // Calculate the value for the hash field in the Rekor entry const envelopeHash = calculateDSSEHash(envelope, publicKey); // Collect values for re-creating the DSSE envelope. @@ -107,8 +109,8 @@ function toProposedIntotoEntry(envelope, publicKey) { spec: { content: { envelope: dsse, - hash: { algorithm: 'sha256', value: envelopeHash }, - payloadHash: { algorithm: 'sha256', value: payloadHash }, + hash: { algorithm: SHA256_ALGORITHM, value: envelopeHash }, + payloadHash: { algorithm: SHA256_ALGORITHM, value: payloadHash }, }, }, }; @@ -132,5 +134,7 @@ function calculateDSSEHash(envelope, publicKey) { if (envelope.signatures[0].keyid.length > 0) { dsse.signatures[0].keyid = envelope.signatures[0].keyid; } - return util_1.crypto.hash(util_1.json.canonicalize(dsse)).toString('hex'); + return util_1.crypto + .digest(SHA256_ALGORITHM, util_1.json.canonicalize(dsse)) + .toString('hex'); } diff --git a/deps/npm/node_modules/@sigstore/sign/dist/witness/tlog/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tlog/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/witness/tlog/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tlog/index.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/witness/tsa/client.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tsa/client.js similarity index 86% rename from deps/npm/node_modules/@sigstore/sign/dist/witness/tsa/client.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tsa/client.js index a334deb00b7756..754de3748dbb36 100644 --- a/deps/npm/node_modules/@sigstore/sign/dist/witness/tsa/client.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tsa/client.js @@ -19,6 +19,7 @@ limitations under the License. const error_1 = require("../../error"); const tsa_1 = require("../../external/tsa"); const util_1 = require("../../util"); +const SHA256_ALGORITHM = 'sha256'; class TSAClient { constructor(options) { this.tsa = new tsa_1.TimestampAuthority({ @@ -29,8 +30,10 @@ class TSAClient { } async createTimestamp(signature) { const request = { - artifactHash: util_1.crypto.hash(signature).toString('base64'), - hashAlgorithm: 'sha256', + artifactHash: util_1.crypto + .digest(SHA256_ALGORITHM, signature) + .toString('base64'), + hashAlgorithm: SHA256_ALGORITHM, }; try { return await this.tsa.createTimestamp(request); diff --git a/deps/npm/node_modules/@sigstore/sign/dist/witness/tsa/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tsa/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/witness/tsa/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/tsa/index.js diff --git a/deps/npm/node_modules/@sigstore/sign/dist/witness/witness.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/witness.js similarity index 100% rename from deps/npm/node_modules/@sigstore/sign/dist/witness/witness.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/dist/witness/witness.js diff --git a/deps/npm/node_modules/@sigstore/sign/package.json b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/package.json similarity index 78% rename from deps/npm/node_modules/@sigstore/sign/package.json rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/package.json index 4adb3d24c6fa68..fe05e8dc2d73ad 100644 --- a/deps/npm/node_modules/@sigstore/sign/package.json +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/sign/package.json @@ -1,6 +1,6 @@ { "name": "@sigstore/sign", - "version": "2.3.2", + "version": "3.0.0", "description": "Sigstore signing library", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -27,20 +27,20 @@ }, "devDependencies": { "@sigstore/jest": "^0.0.0", - "@sigstore/mock": "^0.7.4", - "@sigstore/rekor-types": "^2.0.0", + "@sigstore/mock": "^0.8.0", + "@sigstore/rekor-types": "^3.0.0", "@types/make-fetch-happen": "^10.0.4", "@types/promise-retry": "^1.1.6" }, "dependencies": { - "@sigstore/bundle": "^2.3.2", - "@sigstore/core": "^1.0.0", + "@sigstore/bundle": "^3.0.0", + "@sigstore/core": "^2.0.0", "@sigstore/protobuf-specs": "^0.3.2", - "make-fetch-happen": "^13.0.1", - "proc-log": "^4.2.0", + "make-fetch-happen": "^14.0.1", + "proc-log": "^5.0.0", "promise-retry": "^2.0.1" }, "engines": { - "node": "^16.14.0 || >=18.0.0" + "node": "^18.17.0 || >=20.5.0" } } diff --git a/deps/npm/node_modules/@sigstore/verify/dist/bundle/dsse.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/bundle/dsse.js similarity index 93% rename from deps/npm/node_modules/@sigstore/verify/dist/bundle/dsse.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/bundle/dsse.js index 193f875fd1014e..1033fc422aba09 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/bundle/dsse.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/bundle/dsse.js @@ -22,7 +22,7 @@ class DSSESignatureContent { this.env = env; } compareDigest(digest) { - return core_1.crypto.bufferEqual(digest, core_1.crypto.hash(this.env.payload)); + return core_1.crypto.bufferEqual(digest, core_1.crypto.digest('sha256', this.env.payload)); } compareSignature(signature) { return core_1.crypto.bufferEqual(signature, this.signature); diff --git a/deps/npm/node_modules/@sigstore/verify/dist/bundle/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/bundle/index.js similarity index 97% rename from deps/npm/node_modules/@sigstore/verify/dist/bundle/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/bundle/index.js index 63f8d4c4998811..4287d8032b75f0 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/bundle/index.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/bundle/index.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.signatureContent = exports.toSignedEntity = void 0; +exports.toSignedEntity = toSignedEntity; +exports.signatureContent = signatureContent; const core_1 = require("@sigstore/core"); const dsse_1 = require("./dsse"); const message_1 = require("./message"); @@ -26,7 +27,6 @@ function toSignedEntity(bundle, artifact) { timestamps, }; } -exports.toSignedEntity = toSignedEntity; function signatureContent(bundle, artifact) { switch (bundle.content.$case) { case 'dsseEnvelope': @@ -35,7 +35,6 @@ function signatureContent(bundle, artifact) { return new message_1.MessageSignatureContent(bundle.content.messageSignature, artifact); } } -exports.signatureContent = signatureContent; function key(bundle) { switch (bundle.verificationMaterial.content.$case) { case 'publicKey': diff --git a/deps/npm/node_modules/@sigstore/verify/dist/bundle/message.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/bundle/message.js similarity index 100% rename from deps/npm/node_modules/@sigstore/verify/dist/bundle/message.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/bundle/message.js diff --git a/deps/npm/node_modules/@sigstore/verify/dist/error.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/error.js similarity index 100% rename from deps/npm/node_modules/@sigstore/verify/dist/error.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/error.js diff --git a/deps/npm/node_modules/@sigstore/verify/dist/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/index.js similarity index 100% rename from deps/npm/node_modules/@sigstore/verify/dist/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/index.js diff --git a/deps/npm/node_modules/@sigstore/verify/dist/key/certificate.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/key/certificate.js similarity index 99% rename from deps/npm/node_modules/@sigstore/verify/dist/key/certificate.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/key/certificate.js index c9140dd98d58a6..a916de0e51e712 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/key/certificate.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/key/certificate.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.CertificateChainVerifier = exports.verifyCertificateChain = void 0; +exports.CertificateChainVerifier = void 0; +exports.verifyCertificateChain = verifyCertificateChain; const error_1 = require("../error"); const trust_1 = require("../trust"); function verifyCertificateChain(leaf, certificateAuthorities) { @@ -32,7 +33,6 @@ function verifyCertificateChain(leaf, certificateAuthorities) { cause: error, }); } -exports.verifyCertificateChain = verifyCertificateChain; class CertificateChainVerifier { constructor(opts) { this.untrustedCert = opts.untrustedCert; diff --git a/deps/npm/node_modules/@sigstore/verify/dist/key/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/key/index.js similarity index 97% rename from deps/npm/node_modules/@sigstore/verify/dist/key/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/key/index.js index 682a306803a991..cc894aab95a5d5 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/key/index.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/key/index.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyCertificate = exports.verifyPublicKey = void 0; +exports.verifyPublicKey = verifyPublicKey; +exports.verifyCertificate = verifyCertificate; /* Copyright 2023 The Sigstore Authors. @@ -34,7 +35,6 @@ function verifyPublicKey(hint, timestamps, trustMaterial) { }); return { key: key.publicKey }; } -exports.verifyPublicKey = verifyPublicKey; function verifyCertificate(leaf, timestamps, trustMaterial) { // Check that leaf certificate chains to a trusted CA const path = (0, certificate_1.verifyCertificateChain)(leaf, trustMaterial.certificateAuthorities); @@ -51,10 +51,10 @@ function verifyCertificate(leaf, timestamps, trustMaterial) { signer: getSigner(path[0]), }; } -exports.verifyCertificate = verifyCertificate; function getSigner(cert) { let issuer; const issuerExtension = cert.extension(OID_FULCIO_ISSUER_V2); + /* istanbul ignore next */ if (issuerExtension) { issuer = issuerExtension.valueObj.subs?.[0]?.value.toString('ascii'); } diff --git a/deps/npm/node_modules/@sigstore/verify/dist/key/sct.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/key/sct.js similarity index 97% rename from deps/npm/node_modules/@sigstore/verify/dist/key/sct.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/key/sct.js index aea412840e1039..8eca48738096ee 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/key/sct.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/key/sct.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifySCTs = void 0; +exports.verifySCTs = verifySCTs; /* Copyright 2023 The Sigstore Authors. @@ -52,7 +52,7 @@ function verifySCTs(cert, issuer, ctlogs) { // https://www.rfc-editor.org/rfc/rfc6962#section-3.2 const preCert = new core_1.ByteStream(); // Calculate hash of the issuer's public key - const issuerId = core_1.crypto.hash(issuer.publicKey); + const issuerId = core_1.crypto.digest('sha256', issuer.publicKey); preCert.appendView(issuerId); // Re-encodes the certificate to DER after removing the SCT extension const tbs = clone.tbsCertificate.toDER(); @@ -76,4 +76,3 @@ function verifySCTs(cert, issuer, ctlogs) { return sct.logID; }); } -exports.verifySCTs = verifySCTs; diff --git a/deps/npm/node_modules/@sigstore/verify/dist/policy.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/policy.js similarity index 93% rename from deps/npm/node_modules/@sigstore/verify/dist/policy.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/policy.js index 731e5c83328475..f5960cf047b84b 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/policy.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/policy.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyExtensions = exports.verifySubjectAlternativeName = void 0; +exports.verifySubjectAlternativeName = verifySubjectAlternativeName; +exports.verifyExtensions = verifyExtensions; const error_1 = require("./error"); function verifySubjectAlternativeName(policyIdentity, signerIdentity) { if (signerIdentity === undefined || !signerIdentity.match(policyIdentity)) { @@ -10,7 +11,6 @@ function verifySubjectAlternativeName(policyIdentity, signerIdentity) { }); } } -exports.verifySubjectAlternativeName = verifySubjectAlternativeName; function verifyExtensions(policyExtensions, signerExtensions = {}) { let key; for (key in policyExtensions) { @@ -22,4 +22,3 @@ function verifyExtensions(policyExtensions, signerExtensions = {}) { } } } -exports.verifyExtensions = verifyExtensions; diff --git a/deps/npm/node_modules/@sigstore/verify/dist/shared.types.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/shared.types.js similarity index 100% rename from deps/npm/node_modules/@sigstore/verify/dist/shared.types.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/shared.types.js diff --git a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/checkpoint.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/checkpoint.js similarity index 99% rename from deps/npm/node_modules/@sigstore/verify/dist/timestamp/checkpoint.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/checkpoint.js index 04a87383f0fd17..46619b675f8863 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/checkpoint.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/checkpoint.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyCheckpoint = void 0; +exports.verifyCheckpoint = verifyCheckpoint; /* Copyright 2023 The Sigstore Authors. @@ -61,7 +61,6 @@ function verifyCheckpoint(entry, tlogs) { }); } } -exports.verifyCheckpoint = verifyCheckpoint; // Verifies the signatures in the SignedNote. For each signature, the // corresponding transparency log is looked up by the key hint and the // signature is verified against the public key in the transparency log. diff --git a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/index.js similarity index 96% rename from deps/npm/node_modules/@sigstore/verify/dist/timestamp/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/index.js index 0da554f648d25e..56e948de19338d 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/index.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/index.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyTLogTimestamp = exports.verifyTSATimestamp = void 0; +exports.verifyTSATimestamp = verifyTSATimestamp; +exports.verifyTLogTimestamp = verifyTLogTimestamp; const error_1 = require("../error"); const checkpoint_1 = require("./checkpoint"); const merkle_1 = require("./merkle"); @@ -14,7 +15,6 @@ function verifyTSATimestamp(timestamp, data, timestampAuthorities) { timestamp: timestamp.signingTime, }; } -exports.verifyTSATimestamp = verifyTSATimestamp; function verifyTLogTimestamp(entry, tlogAuthorities) { let inclusionVerified = false; if (isTLogEntryWithInclusionPromise(entry)) { @@ -38,7 +38,6 @@ function verifyTLogTimestamp(entry, tlogAuthorities) { timestamp: new Date(Number(entry.integratedTime) * 1000), }; } -exports.verifyTLogTimestamp = verifyTLogTimestamp; function isTLogEntryWithInclusionPromise(entry) { return entry.inclusionPromise !== undefined; } diff --git a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/merkle.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/merkle.js similarity index 95% rename from deps/npm/node_modules/@sigstore/verify/dist/timestamp/merkle.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/merkle.js index 9895d01b7abc03..f57cae42002bd0 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/merkle.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/merkle.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyMerkleInclusion = void 0; +exports.verifyMerkleInclusion = verifyMerkleInclusion; /* Copyright 2023 The Sigstore Authors. @@ -53,7 +53,6 @@ function verifyMerkleInclusion(entry) { }); } } -exports.verifyMerkleInclusion = verifyMerkleInclusion; // Breaks down inclusion proof for a leaf at the specified index in a tree of // the specified size. The split point is where paths to the index leaf and // the (size - 1) leaf diverge. Returns lengths of the bottom and upper proof @@ -98,8 +97,8 @@ function bitLength(n) { // Hashing logic according to RFC6962. // https://datatracker.ietf.org/doc/html/rfc6962#section-2 function hashChildren(left, right) { - return core_1.crypto.hash(RFC6962_NODE_HASH_PREFIX, left, right); + return core_1.crypto.digest('sha256', RFC6962_NODE_HASH_PREFIX, left, right); } function hashLeaf(leaf) { - return core_1.crypto.hash(RFC6962_LEAF_HASH_PREFIX, leaf); + return core_1.crypto.digest('sha256', RFC6962_LEAF_HASH_PREFIX, leaf); } diff --git a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/set.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/set.js similarity index 98% rename from deps/npm/node_modules/@sigstore/verify/dist/timestamp/set.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/set.js index a6357c06999cba..5d3f47bb88746a 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/set.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/set.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyTLogSET = void 0; +exports.verifyTLogSET = verifyTLogSET; /* Copyright 2023 The Sigstore Authors. @@ -46,7 +46,6 @@ function verifyTLogSET(entry, tlogs) { }); } } -exports.verifyTLogSET = verifyTLogSET; // Returns a properly formatted "VerificationPayload" for one of the // transaction log entires in the given bundle which can be used for SET // verification. diff --git a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/tsa.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/tsa.js similarity index 98% rename from deps/npm/node_modules/@sigstore/verify/dist/timestamp/tsa.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/tsa.js index 7b095bc3a7f908..70388cd06c52d6 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/timestamp/tsa.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/timestamp/tsa.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyRFC3161Timestamp = void 0; +exports.verifyRFC3161Timestamp = verifyRFC3161Timestamp; const core_1 = require("@sigstore/core"); const error_1 = require("../error"); const certificate_1 = require("../key/certificate"); @@ -35,7 +35,6 @@ function verifyRFC3161Timestamp(timestamp, data, timestampAuthorities) { }); } } -exports.verifyRFC3161Timestamp = verifyRFC3161Timestamp; function verifyTimestampForCA(timestamp, data, ca) { const [leaf, ...cas] = ca.certChain; const signingKey = core_1.crypto.createPublicKey(leaf.publicKey); diff --git a/deps/npm/node_modules/@sigstore/verify/dist/tlog/dsse.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/dsse.js similarity index 98% rename from deps/npm/node_modules/@sigstore/verify/dist/tlog/dsse.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/dsse.js index bf430e61dde563..d71ed8c6e7ad9a 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/tlog/dsse.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/dsse.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyDSSETLogBody = void 0; +exports.verifyDSSETLogBody = verifyDSSETLogBody; /* Copyright 2023 The Sigstore Authors. @@ -29,7 +29,6 @@ function verifyDSSETLogBody(tlogEntry, content) { }); } } -exports.verifyDSSETLogBody = verifyDSSETLogBody; // Compare the given dsse v0.0.1 tlog entry to the given DSSE envelope. function verifyDSSE001TLogBody(tlogEntry, content) { // Ensure the bundle's DSSE only contains a single signature diff --git a/deps/npm/node_modules/@sigstore/verify/dist/tlog/hashedrekord.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/hashedrekord.js similarity index 97% rename from deps/npm/node_modules/@sigstore/verify/dist/tlog/hashedrekord.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/hashedrekord.js index d1758858f030d8..c4aa345b57ba7a 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/tlog/hashedrekord.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/hashedrekord.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyHashedRekordTLogBody = void 0; +exports.verifyHashedRekordTLogBody = verifyHashedRekordTLogBody; /* Copyright 2023 The Sigstore Authors. @@ -29,7 +29,6 @@ function verifyHashedRekordTLogBody(tlogEntry, content) { }); } } -exports.verifyHashedRekordTLogBody = verifyHashedRekordTLogBody; // Compare the given hashedrekord v0.0.1 tlog entry to the given message // signature function verifyHashedrekord001TLogBody(tlogEntry, content) { diff --git a/deps/npm/node_modules/@sigstore/verify/dist/tlog/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/index.js similarity index 98% rename from deps/npm/node_modules/@sigstore/verify/dist/tlog/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/index.js index adfc70ed51ad05..da235360c594a8 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/tlog/index.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/index.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyTLogBody = void 0; +exports.verifyTLogBody = verifyTLogBody; /* Copyright 2023 The Sigstore Authors. @@ -45,4 +45,3 @@ function verifyTLogBody(entry, sigContent) { }); } } -exports.verifyTLogBody = verifyTLogBody; diff --git a/deps/npm/node_modules/@sigstore/verify/dist/tlog/intoto.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/intoto.js similarity index 98% rename from deps/npm/node_modules/@sigstore/verify/dist/tlog/intoto.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/intoto.js index 74c7f50d763e1d..9096ae9418cc30 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/tlog/intoto.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/tlog/intoto.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.verifyIntotoTLogBody = void 0; +exports.verifyIntotoTLogBody = verifyIntotoTLogBody; /* Copyright 2023 The Sigstore Authors. @@ -29,7 +29,6 @@ function verifyIntotoTLogBody(tlogEntry, content) { }); } } -exports.verifyIntotoTLogBody = verifyIntotoTLogBody; // Compare the given intoto v0.0.2 tlog entry to the given DSSE envelope. function verifyIntoto002TLogBody(tlogEntry, content) { // Ensure the bundle's DSSE contains a single signature diff --git a/deps/npm/node_modules/@sigstore/verify/dist/trust/filter.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/trust/filter.js similarity index 93% rename from deps/npm/node_modules/@sigstore/verify/dist/trust/filter.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/trust/filter.js index c09d055913c4c7..880a16cf1940ea 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/trust/filter.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/trust/filter.js @@ -1,12 +1,12 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.filterTLogAuthorities = exports.filterCertAuthorities = void 0; +exports.filterCertAuthorities = filterCertAuthorities; +exports.filterTLogAuthorities = filterTLogAuthorities; function filterCertAuthorities(certAuthorities, criteria) { return certAuthorities.filter((ca) => { return (ca.validFor.start <= criteria.start && ca.validFor.end >= criteria.end); }); } -exports.filterCertAuthorities = filterCertAuthorities; // Filter the list of tlog instances to only those which match the given log // ID and have public keys which are valid for the given integrated time. function filterTLogAuthorities(tlogAuthorities, criteria) { @@ -21,4 +21,3 @@ function filterTLogAuthorities(tlogAuthorities, criteria) { criteria.targetDate <= tlog.validFor.end); }); } -exports.filterTLogAuthorities = filterTLogAuthorities; diff --git a/deps/npm/node_modules/@sigstore/verify/dist/trust/index.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/trust/index.js similarity index 95% rename from deps/npm/node_modules/@sigstore/verify/dist/trust/index.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/trust/index.js index 954de558415902..bfab2eb4f9975a 100644 --- a/deps/npm/node_modules/@sigstore/verify/dist/trust/index.js +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/trust/index.js @@ -1,6 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.toTrustMaterial = exports.filterTLogAuthorities = exports.filterCertAuthorities = void 0; +exports.filterTLogAuthorities = exports.filterCertAuthorities = void 0; +exports.toTrustMaterial = toTrustMaterial; /* Copyright 2023 The Sigstore Authors. @@ -34,7 +35,6 @@ function toTrustMaterial(root, keys) { publicKey: keyFinder, }; } -exports.toTrustMaterial = toTrustMaterial; function createTLogAuthority(tlogInstance) { const keyDetails = tlogInstance.publicKey.keyDetails; const keyType = keyDetails === protobuf_specs_1.PublicKeyDetails.PKCS1_RSA_PKCS1V5 || @@ -54,6 +54,7 @@ function createTLogAuthority(tlogInstance) { }; } function createCertAuthority(ca) { + /* istanbul ignore next */ return { certChain: ca.certChain.certificates.map((cert) => { return core_1.X509Certificate.parse(cert.rawBytes); @@ -76,6 +77,7 @@ function keyLocator(keys) { return { publicKey: core_1.crypto.createPublicKey(key.rawBytes), validFor: (date) => { + /* istanbul ignore next */ return ((key.validFor?.start || BEGINNING_OF_TIME) <= date && (key.validFor?.end || END_OF_TIME) >= date); }, diff --git a/deps/npm/node_modules/@sigstore/verify/dist/trust/trust.types.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/trust/trust.types.js similarity index 100% rename from deps/npm/node_modules/@sigstore/verify/dist/trust/trust.types.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/trust/trust.types.js diff --git a/deps/npm/node_modules/@sigstore/verify/dist/verifier.js b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/verifier.js similarity index 100% rename from deps/npm/node_modules/@sigstore/verify/dist/verifier.js rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/dist/verifier.js diff --git a/deps/npm/node_modules/@sigstore/verify/package.json b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/package.json similarity index 86% rename from deps/npm/node_modules/@sigstore/verify/package.json rename to deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/package.json index cd0c845a797e47..edf72b8bfd9680 100644 --- a/deps/npm/node_modules/@sigstore/verify/package.json +++ b/deps/npm/node_modules/sigstore/node_modules/@sigstore/verify/package.json @@ -1,6 +1,6 @@ { "name": "@sigstore/verify", - "version": "1.2.1", + "version": "2.0.0", "description": "Verification of Sigstore signatures", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -27,10 +27,10 @@ }, "dependencies": { "@sigstore/protobuf-specs": "^0.3.2", - "@sigstore/bundle": "^2.3.2", - "@sigstore/core": "^1.1.0" + "@sigstore/bundle": "^3.0.0", + "@sigstore/core": "^2.0.0" }, "engines": { - "node": "^16.14.0 || >=18.0.0" + "node": "^18.17.0 || >=20.5.0" } } diff --git a/deps/npm/node_modules/sigstore/package.json b/deps/npm/node_modules/sigstore/package.json index fa8744bf304a3f..0f798a263657b4 100644 --- a/deps/npm/node_modules/sigstore/package.json +++ b/deps/npm/node_modules/sigstore/package.json @@ -1,6 +1,6 @@ { "name": "sigstore", - "version": "2.3.1", + "version": "3.0.0", "description": "code-signing for npm packages", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -27,21 +27,21 @@ "provenance": true }, "devDependencies": { - "@sigstore/rekor-types": "^2.0.0", + "@sigstore/rekor-types": "^3.0.0", "@sigstore/jest": "^0.0.0", - "@sigstore/mock": "^0.7.4", - "@tufjs/repo-mock": "^2.0.1", + "@sigstore/mock": "^0.8.0", + "@tufjs/repo-mock": "^3.0.1", "@types/make-fetch-happen": "^10.0.4" }, "dependencies": { - "@sigstore/bundle": "^2.3.2", - "@sigstore/core": "^1.0.0", + "@sigstore/bundle": "^3.0.0", + "@sigstore/core": "^2.0.0", "@sigstore/protobuf-specs": "^0.3.2", - "@sigstore/sign": "^2.3.2", - "@sigstore/tuf": "^2.3.4", - "@sigstore/verify": "^1.2.1" + "@sigstore/sign": "^3.0.0", + "@sigstore/tuf": "^3.0.0", + "@sigstore/verify": "^2.0.0" }, "engines": { - "node": "^16.14.0 || >=18.0.0" + "node": "^18.17.0 || >=20.5.0" } } diff --git a/deps/npm/node_modules/spdx-license-ids/deprecated.json b/deps/npm/node_modules/spdx-license-ids/deprecated.json index 278531e40c613d..4f70a14c7469da 100644 --- a/deps/npm/node_modules/spdx-license-ids/deprecated.json +++ b/deps/npm/node_modules/spdx-license-ids/deprecated.json @@ -19,6 +19,7 @@ "LGPL-2.0", "LGPL-2.1", "LGPL-3.0", + "Net-SNMP", "Nunit", "StandardML-NJ", "bzip2-1.0.5", diff --git a/deps/npm/node_modules/spdx-license-ids/index.json b/deps/npm/node_modules/spdx-license-ids/index.json index c7686a710d61d1..f43d5016bd95ab 100644 --- a/deps/npm/node_modules/spdx-license-ids/index.json +++ b/deps/npm/node_modules/spdx-license-ids/index.json @@ -197,6 +197,8 @@ "DRL-1.0", "DRL-1.1", "DSDP", + "DocBook-Schema", + "DocBook-XML", "Dotseqn", "ECL-1.0", "ECL-2.0", @@ -260,6 +262,7 @@ "Glulxe", "Graphics-Gems", "Gutmann", + "HIDAPI", "HP-1986", "HP-1989", "HPND", @@ -270,6 +273,7 @@ "HPND-Kevlin-Henney", "HPND-MIT-disclaimer", "HPND-Markus-Kuhn", + "HPND-Netrek", "HPND-Pbmplus", "HPND-UC", "HPND-UC-export-US", @@ -403,7 +407,6 @@ "NTP", "NTP-0", "Naumen", - "Net-SNMP", "NetCDF", "Newsletr", "Nokia", @@ -485,6 +488,7 @@ "RSCPL", "Rdisc", "Ruby", + "Ruby-pty", "SAX-PD", "SAX-PD-2.0", "SCEA", @@ -541,6 +545,7 @@ "UMich-Merit", "UPL-1.0", "URT-RLE", + "Ubuntu-font-1.0", "Unicode-3.0", "Unicode-DFS-2015", "Unicode-DFS-2016", @@ -559,6 +564,7 @@ "Wsuipa", "X11", "X11-distribute-modifications-variant", + "X11-swapped", "XFree86-1.1", "XSkat", "Xdebug-1.03", diff --git a/deps/npm/node_modules/spdx-license-ids/package.json b/deps/npm/node_modules/spdx-license-ids/package.json index 5f5ed9554f2579..7ab34aab6b8b1d 100644 --- a/deps/npm/node_modules/spdx-license-ids/package.json +++ b/deps/npm/node_modules/spdx-license-ids/package.json @@ -1,6 +1,6 @@ { "name": "spdx-license-ids", - "version": "3.0.18", + "version": "3.0.20", "description": "A list of SPDX license identifiers", "repository": "jslicense/spdx-license-ids", "author": "Shinnosuke Watanabe (https://github.com/shinnn)", diff --git a/deps/npm/node_modules/tuf-js/dist/config.js b/deps/npm/node_modules/tuf-js/dist/config.js index 6845679942fec5..c66d76af86b98c 100644 --- a/deps/npm/node_modules/tuf-js/dist/config.js +++ b/deps/npm/node_modules/tuf-js/dist/config.js @@ -2,7 +2,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.defaultConfig = void 0; exports.defaultConfig = { - maxRootRotations: 32, + maxRootRotations: 256, maxDelegations: 32, rootMaxLength: 512000, //bytes timestampMaxLength: 16384, // bytes diff --git a/deps/npm/node_modules/tuf-js/dist/updater.js b/deps/npm/node_modules/tuf-js/dist/updater.js index 5317f7e14659ac..8d5eb4428f044a 100644 --- a/deps/npm/node_modules/tuf-js/dist/updater.js +++ b/deps/npm/node_modules/tuf-js/dist/updater.js @@ -144,7 +144,7 @@ class Updater { const rootVersion = this.trustedSet.root.signed.version; const lowerBound = rootVersion + 1; const upperBound = lowerBound + this.config.maxRootRotations; - for (let version = lowerBound; version <= upperBound; version++) { + for (let version = lowerBound; version < upperBound; version++) { const rootUrl = url.join(this.metadataBaseUrl, `${version}.root.json`); try { // Client workflow 5.3.3: download new root metadata file @@ -155,7 +155,13 @@ class Updater { this.persistMetadata(models_1.MetadataKind.Root, bytesData); } catch (error) { - break; + if (error instanceof error_1.DownloadHTTPError) { + // 404/403 means current root is newest available + if ([403, 404].includes(error.statusCode)) { + break; + } + } + throw error; } } } @@ -247,7 +253,8 @@ class Updater { const version = this.trustedSet.root.signed.consistentSnapshot ? metaInfo.version : undefined; - const metadataUrl = url.join(this.metadataBaseUrl, version ? `${version}.${role}.json` : `${role}.json`); + const encodedRole = encodeURIComponent(role); + const metadataUrl = url.join(this.metadataBaseUrl, version ? `${version}.${encodedRole}.json` : `${encodedRole}.json`); try { // Client workflow 5.6.1: download targets metadata file const bytesData = await this.fetcher.downloadBytes(metadataUrl, maxLength); @@ -280,7 +287,6 @@ class Updater { while (visitedRoleNames.size <= this.config.maxDelegations && delegationsToVisit.length > 0) { // Pop the role name from the top of the stack. - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const { roleName, parentRoleName } = delegationsToVisit.pop(); // Skip any visited current role to prevent cycles. // Client workflow 5.6.7.1: skip already-visited roles @@ -330,13 +336,14 @@ class Updater { return path.join(this.targetDir, filePath); } persistMetadata(metaDataName, bytesData) { + const encodedName = encodeURIComponent(metaDataName); try { - const filePath = path.join(this.dir, `${metaDataName}.json`); + const filePath = path.join(this.dir, `${encodedName}.json`); log('WRITE %s', filePath); fs.writeFileSync(filePath, bytesData.toString('utf8')); } catch (error) { - throw new error_1.PersistError(`Failed to persist metadata ${metaDataName} error: ${error}`); + throw new error_1.PersistError(`Failed to persist metadata ${encodedName} error: ${error}`); } } } diff --git a/deps/npm/node_modules/tuf-js/dist/utils/url.js b/deps/npm/node_modules/tuf-js/dist/utils/url.js index ce67fe2c230535..359d1f3ef385b7 100644 --- a/deps/npm/node_modules/tuf-js/dist/utils/url.js +++ b/deps/npm/node_modules/tuf-js/dist/utils/url.js @@ -1,11 +1,10 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.join = void 0; +exports.join = join; const url_1 = require("url"); function join(base, path) { return new url_1.URL(ensureTrailingSlash(base) + removeLeadingSlash(path)).toString(); } -exports.join = join; function ensureTrailingSlash(path) { return path.endsWith('/') ? path : path + '/'; } diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/agents.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/agents.js deleted file mode 100644 index c541b93001517e..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/agents.js +++ /dev/null @@ -1,206 +0,0 @@ -'use strict' - -const net = require('net') -const tls = require('tls') -const { once } = require('events') -const timers = require('timers/promises') -const { normalizeOptions, cacheOptions } = require('./options') -const { getProxy, getProxyAgent, proxyCache } = require('./proxy.js') -const Errors = require('./errors.js') -const { Agent: AgentBase } = require('agent-base') - -module.exports = class Agent extends AgentBase { - #options - #timeouts - #proxy - #noProxy - #ProxyAgent - - constructor (options = {}) { - const { timeouts, proxy, noProxy, ...normalizedOptions } = normalizeOptions(options) - - super(normalizedOptions) - - this.#options = normalizedOptions - this.#timeouts = timeouts - - if (proxy) { - this.#proxy = new URL(proxy) - this.#noProxy = noProxy - this.#ProxyAgent = getProxyAgent(proxy) - } - } - - get proxy () { - return this.#proxy ? { url: this.#proxy } : {} - } - - #getProxy (options) { - if (!this.#proxy) { - return - } - - const proxy = getProxy(`${options.protocol}//${options.host}:${options.port}`, { - proxy: this.#proxy, - noProxy: this.#noProxy, - }) - - if (!proxy) { - return - } - - const cacheKey = cacheOptions({ - ...options, - ...this.#options, - timeouts: this.#timeouts, - proxy, - }) - - if (proxyCache.has(cacheKey)) { - return proxyCache.get(cacheKey) - } - - let ProxyAgent = this.#ProxyAgent - if (Array.isArray(ProxyAgent)) { - ProxyAgent = this.isSecureEndpoint(options) ? ProxyAgent[1] : ProxyAgent[0] - } - - const proxyAgent = new ProxyAgent(proxy, { - ...this.#options, - socketOptions: { family: this.#options.family }, - }) - proxyCache.set(cacheKey, proxyAgent) - - return proxyAgent - } - - // takes an array of promises and races them against the connection timeout - // which will throw the necessary error if it is hit. This will return the - // result of the promise race. - async #timeoutConnection ({ promises, options, timeout }, ac = new AbortController()) { - if (timeout) { - const connectionTimeout = timers.setTimeout(timeout, null, { signal: ac.signal }) - .then(() => { - throw new Errors.ConnectionTimeoutError(`${options.host}:${options.port}`) - }).catch((err) => { - if (err.name === 'AbortError') { - return - } - throw err - }) - promises.push(connectionTimeout) - } - - let result - try { - result = await Promise.race(promises) - ac.abort() - } catch (err) { - ac.abort() - throw err - } - return result - } - - async connect (request, options) { - // if the connection does not have its own lookup function - // set, then use the one from our options - options.lookup ??= this.#options.lookup - - let socket - let timeout = this.#timeouts.connection - const isSecureEndpoint = this.isSecureEndpoint(options) - - const proxy = this.#getProxy(options) - if (proxy) { - // some of the proxies will wait for the socket to fully connect before - // returning so we have to await this while also racing it against the - // connection timeout. - const start = Date.now() - socket = await this.#timeoutConnection({ - options, - timeout, - promises: [proxy.connect(request, options)], - }) - // see how much time proxy.connect took and subtract it from - // the timeout - if (timeout) { - timeout = timeout - (Date.now() - start) - } - } else { - socket = (isSecureEndpoint ? tls : net).connect(options) - } - - socket.setKeepAlive(this.keepAlive, this.keepAliveMsecs) - socket.setNoDelay(this.keepAlive) - - const abortController = new AbortController() - const { signal } = abortController - - const connectPromise = socket[isSecureEndpoint ? 'secureConnecting' : 'connecting'] - ? once(socket, isSecureEndpoint ? 'secureConnect' : 'connect', { signal }) - : Promise.resolve() - - await this.#timeoutConnection({ - options, - timeout, - promises: [ - connectPromise, - once(socket, 'error', { signal }).then((err) => { - throw err[0] - }), - ], - }, abortController) - - if (this.#timeouts.idle) { - socket.setTimeout(this.#timeouts.idle, () => { - socket.destroy(new Errors.IdleTimeoutError(`${options.host}:${options.port}`)) - }) - } - - return socket - } - - addRequest (request, options) { - const proxy = this.#getProxy(options) - // it would be better to call proxy.addRequest here but this causes the - // http-proxy-agent to call its super.addRequest which causes the request - // to be added to the agent twice. since we only support 3 agents - // currently (see the required agents in proxy.js) we have manually - // checked that the only public methods we need to call are called in the - // next block. this could change in the future and presumably we would get - // failing tests until we have properly called the necessary methods on - // each of our proxy agents - if (proxy?.setRequestProps) { - proxy.setRequestProps(request, options) - } - - request.setHeader('connection', this.keepAlive ? 'keep-alive' : 'close') - - if (this.#timeouts.response) { - let responseTimeout - request.once('finish', () => { - setTimeout(() => { - request.destroy(new Errors.ResponseTimeoutError(request, this.#proxy)) - }, this.#timeouts.response) - }) - request.once('response', () => { - clearTimeout(responseTimeout) - }) - } - - if (this.#timeouts.transfer) { - let transferTimeout - request.once('response', (res) => { - setTimeout(() => { - res.destroy(new Errors.TransferTimeoutError(request, this.#proxy)) - }, this.#timeouts.transfer) - res.once('close', () => { - clearTimeout(transferTimeout) - }) - }) - } - - return super.addRequest(request, options) - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/dns.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/dns.js deleted file mode 100644 index 3c6946c566d736..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/dns.js +++ /dev/null @@ -1,53 +0,0 @@ -'use strict' - -const { LRUCache } = require('lru-cache') -const dns = require('dns') - -// this is a factory so that each request can have its own opts (i.e. ttl) -// while still sharing the cache across all requests -const cache = new LRUCache({ max: 50 }) - -const getOptions = ({ - family = 0, - hints = dns.ADDRCONFIG, - all = false, - verbatim = undefined, - ttl = 5 * 60 * 1000, - lookup = dns.lookup, -}) => ({ - // hints and lookup are returned since both are top level properties to (net|tls).connect - hints, - lookup: (hostname, ...args) => { - const callback = args.pop() // callback is always last arg - const lookupOptions = args[0] ?? {} - - const options = { - family, - hints, - all, - verbatim, - ...(typeof lookupOptions === 'number' ? { family: lookupOptions } : lookupOptions), - } - - const key = JSON.stringify({ hostname, ...options }) - - if (cache.has(key)) { - const cached = cache.get(key) - return process.nextTick(callback, null, ...cached) - } - - lookup(hostname, options, (err, ...result) => { - if (err) { - return callback(err) - } - - cache.set(key, result, { ttl }) - return callback(null, ...result) - }) - }, -}) - -module.exports = { - cache, - getOptions, -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/errors.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/errors.js deleted file mode 100644 index 70475aec8eb357..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/errors.js +++ /dev/null @@ -1,61 +0,0 @@ -'use strict' - -class InvalidProxyProtocolError extends Error { - constructor (url) { - super(`Invalid protocol \`${url.protocol}\` connecting to proxy \`${url.host}\``) - this.code = 'EINVALIDPROXY' - this.proxy = url - } -} - -class ConnectionTimeoutError extends Error { - constructor (host) { - super(`Timeout connecting to host \`${host}\``) - this.code = 'ECONNECTIONTIMEOUT' - this.host = host - } -} - -class IdleTimeoutError extends Error { - constructor (host) { - super(`Idle timeout reached for host \`${host}\``) - this.code = 'EIDLETIMEOUT' - this.host = host - } -} - -class ResponseTimeoutError extends Error { - constructor (request, proxy) { - let msg = 'Response timeout ' - if (proxy) { - msg += `from proxy \`${proxy.host}\` ` - } - msg += `connecting to host \`${request.host}\`` - super(msg) - this.code = 'ERESPONSETIMEOUT' - this.proxy = proxy - this.request = request - } -} - -class TransferTimeoutError extends Error { - constructor (request, proxy) { - let msg = 'Transfer timeout ' - if (proxy) { - msg += `from proxy \`${proxy.host}\` ` - } - msg += `for \`${request.host}\`` - super(msg) - this.code = 'ETRANSFERTIMEOUT' - this.proxy = proxy - this.request = request - } -} - -module.exports = { - InvalidProxyProtocolError, - ConnectionTimeoutError, - IdleTimeoutError, - ResponseTimeoutError, - TransferTimeoutError, -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/index.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/index.js deleted file mode 100644 index b33d6eaef07a21..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/index.js +++ /dev/null @@ -1,56 +0,0 @@ -'use strict' - -const { LRUCache } = require('lru-cache') -const { normalizeOptions, cacheOptions } = require('./options') -const { getProxy, proxyCache } = require('./proxy.js') -const dns = require('./dns.js') -const Agent = require('./agents.js') - -const agentCache = new LRUCache({ max: 20 }) - -const getAgent = (url, { agent, proxy, noProxy, ...options } = {}) => { - // false has meaning so this can't be a simple truthiness check - if (agent != null) { - return agent - } - - url = new URL(url) - - const proxyForUrl = getProxy(url, { proxy, noProxy }) - const normalizedOptions = { - ...normalizeOptions(options), - proxy: proxyForUrl, - } - - const cacheKey = cacheOptions({ - ...normalizedOptions, - secureEndpoint: url.protocol === 'https:', - }) - - if (agentCache.has(cacheKey)) { - return agentCache.get(cacheKey) - } - - const newAgent = new Agent(normalizedOptions) - agentCache.set(cacheKey, newAgent) - - return newAgent -} - -module.exports = { - getAgent, - Agent, - // these are exported for backwards compatability - HttpAgent: Agent, - HttpsAgent: Agent, - cache: { - proxy: proxyCache, - agent: agentCache, - dns: dns.cache, - clear: () => { - proxyCache.clear() - agentCache.clear() - dns.cache.clear() - }, - }, -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/options.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/options.js deleted file mode 100644 index 0bf53f725f0846..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/options.js +++ /dev/null @@ -1,86 +0,0 @@ -'use strict' - -const dns = require('./dns') - -const normalizeOptions = (opts) => { - const family = parseInt(opts.family ?? '0', 10) - const keepAlive = opts.keepAlive ?? true - - const normalized = { - // nodejs http agent options. these are all the defaults - // but kept here to increase the likelihood of cache hits - // https://nodejs.org/api/http.html#new-agentoptions - keepAliveMsecs: keepAlive ? 1000 : undefined, - maxSockets: opts.maxSockets ?? 15, - maxTotalSockets: Infinity, - maxFreeSockets: keepAlive ? 256 : undefined, - scheduling: 'fifo', - // then spread the rest of the options - ...opts, - // we already set these to their defaults that we want - family, - keepAlive, - // our custom timeout options - timeouts: { - // the standard timeout option is mapped to our idle timeout - // and then deleted below - idle: opts.timeout ?? 0, - connection: 0, - response: 0, - transfer: 0, - ...opts.timeouts, - }, - // get the dns options that go at the top level of socket connection - ...dns.getOptions({ family, ...opts.dns }), - } - - // remove timeout since we already used it to set our own idle timeout - delete normalized.timeout - - return normalized -} - -const createKey = (obj) => { - let key = '' - const sorted = Object.entries(obj).sort((a, b) => a[0] - b[0]) - for (let [k, v] of sorted) { - if (v == null) { - v = 'null' - } else if (v instanceof URL) { - v = v.toString() - } else if (typeof v === 'object') { - v = createKey(v) - } - key += `${k}:${v}:` - } - return key -} - -const cacheOptions = ({ secureEndpoint, ...options }) => createKey({ - secureEndpoint: !!secureEndpoint, - // socket connect options - family: options.family, - hints: options.hints, - localAddress: options.localAddress, - // tls specific connect options - strictSsl: secureEndpoint ? !!options.rejectUnauthorized : false, - ca: secureEndpoint ? options.ca : null, - cert: secureEndpoint ? options.cert : null, - key: secureEndpoint ? options.key : null, - // http agent options - keepAlive: options.keepAlive, - keepAliveMsecs: options.keepAliveMsecs, - maxSockets: options.maxSockets, - maxTotalSockets: options.maxTotalSockets, - maxFreeSockets: options.maxFreeSockets, - scheduling: options.scheduling, - // timeout options - timeouts: options.timeouts, - // proxy - proxy: options.proxy, -}) - -module.exports = { - normalizeOptions, - cacheOptions, -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/proxy.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/proxy.js deleted file mode 100644 index 6272e929e57bcf..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/lib/proxy.js +++ /dev/null @@ -1,88 +0,0 @@ -'use strict' - -const { HttpProxyAgent } = require('http-proxy-agent') -const { HttpsProxyAgent } = require('https-proxy-agent') -const { SocksProxyAgent } = require('socks-proxy-agent') -const { LRUCache } = require('lru-cache') -const { InvalidProxyProtocolError } = require('./errors.js') - -const PROXY_CACHE = new LRUCache({ max: 20 }) - -const SOCKS_PROTOCOLS = new Set(SocksProxyAgent.protocols) - -const PROXY_ENV_KEYS = new Set(['https_proxy', 'http_proxy', 'proxy', 'no_proxy']) - -const PROXY_ENV = Object.entries(process.env).reduce((acc, [key, value]) => { - key = key.toLowerCase() - if (PROXY_ENV_KEYS.has(key)) { - acc[key] = value - } - return acc -}, {}) - -const getProxyAgent = (url) => { - url = new URL(url) - - const protocol = url.protocol.slice(0, -1) - if (SOCKS_PROTOCOLS.has(protocol)) { - return SocksProxyAgent - } - if (protocol === 'https' || protocol === 'http') { - return [HttpProxyAgent, HttpsProxyAgent] - } - - throw new InvalidProxyProtocolError(url) -} - -const isNoProxy = (url, noProxy) => { - if (typeof noProxy === 'string') { - noProxy = noProxy.split(',').map((p) => p.trim()).filter(Boolean) - } - - if (!noProxy || !noProxy.length) { - return false - } - - const hostSegments = url.hostname.split('.').reverse() - - return noProxy.some((no) => { - const noSegments = no.split('.').filter(Boolean).reverse() - if (!noSegments.length) { - return false - } - - for (let i = 0; i < noSegments.length; i++) { - if (hostSegments[i] !== noSegments[i]) { - return false - } - } - - return true - }) -} - -const getProxy = (url, { proxy, noProxy }) => { - url = new URL(url) - - if (!proxy) { - proxy = url.protocol === 'https:' - ? PROXY_ENV.https_proxy - : PROXY_ENV.https_proxy || PROXY_ENV.http_proxy || PROXY_ENV.proxy - } - - if (!noProxy) { - noProxy = PROXY_ENV.no_proxy - } - - if (!proxy || isNoProxy(url, noProxy)) { - return null - } - - return new URL(proxy) -} - -module.exports = { - getProxyAgent, - getProxy, - proxyCache: PROXY_CACHE, -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/package.json b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/package.json deleted file mode 100644 index ef5b4e3228cc46..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/agent/package.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "name": "@npmcli/agent", - "version": "2.2.2", - "description": "the http/https agent used by the npm cli", - "main": "lib/index.js", - "scripts": { - "gencerts": "bash scripts/create-cert.sh", - "test": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "author": "GitHub Inc.", - "license": "ISC", - "bugs": { - "url": "https://github.com/npm/agent/issues" - }, - "homepage": "https://github.com/npm/agent#readme", - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.21.3", - "publish": "true" - }, - "dependencies": { - "agent-base": "^7.1.0", - "http-proxy-agent": "^7.0.0", - "https-proxy-agent": "^7.0.1", - "lru-cache": "^10.0.1", - "socks-proxy-agent": "^8.0.3" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.21.3", - "minipass-fetch": "^3.0.3", - "nock": "^13.2.7", - "semver": "^7.5.4", - "simple-socks": "^3.1.0", - "tap": "^16.3.0" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/agent.git" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/LICENSE.md b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/LICENSE.md deleted file mode 100644 index 5fc208ff122e08..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/LICENSE.md +++ /dev/null @@ -1,20 +0,0 @@ - - -ISC License - -Copyright npm, Inc. - -Permission to use, copy, modify, and/or distribute this -software for any purpose with or without fee is hereby -granted, provided that the above copyright notice and this -permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND NPM DISCLAIMS ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO -EVENT SHALL NPM BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/common/get-options.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/common/get-options.js deleted file mode 100644 index cb5982f79077ac..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/common/get-options.js +++ /dev/null @@ -1,20 +0,0 @@ -// given an input that may or may not be an object, return an object that has -// a copy of every defined property listed in 'copy'. if the input is not an -// object, assign it to the property named by 'wrap' -const getOptions = (input, { copy, wrap }) => { - const result = {} - - if (input && typeof input === 'object') { - for (const prop of copy) { - if (input[prop] !== undefined) { - result[prop] = input[prop] - } - } - } else { - result[wrap] = input - } - - return result -} - -module.exports = getOptions diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/common/node.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/common/node.js deleted file mode 100644 index 4d13bc037359d7..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/common/node.js +++ /dev/null @@ -1,9 +0,0 @@ -const semver = require('semver') - -const satisfies = (range) => { - return semver.satisfies(process.version, range, { includePrerelease: true }) -} - -module.exports = { - satisfies, -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/LICENSE b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/LICENSE deleted file mode 100644 index 93546dfb7655bf..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -(The MIT License) - -Copyright (c) 2011-2017 JP Richardson - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files -(the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, - merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS -OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/errors.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/errors.js deleted file mode 100644 index 1cd1e05d0c533d..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/errors.js +++ /dev/null @@ -1,129 +0,0 @@ -'use strict' -const { inspect } = require('util') - -// adapted from node's internal/errors -// https://github.com/nodejs/node/blob/c8a04049/lib/internal/errors.js - -// close copy of node's internal SystemError class. -class SystemError { - constructor (code, prefix, context) { - // XXX context.code is undefined in all constructors used in cp/polyfill - // that may be a bug copied from node, maybe the constructor should use - // `code` not `errno`? nodejs/node#41104 - let message = `${prefix}: ${context.syscall} returned ` + - `${context.code} (${context.message})` - - if (context.path !== undefined) { - message += ` ${context.path}` - } - if (context.dest !== undefined) { - message += ` => ${context.dest}` - } - - this.code = code - Object.defineProperties(this, { - name: { - value: 'SystemError', - enumerable: false, - writable: true, - configurable: true, - }, - message: { - value: message, - enumerable: false, - writable: true, - configurable: true, - }, - info: { - value: context, - enumerable: true, - configurable: true, - writable: false, - }, - errno: { - get () { - return context.errno - }, - set (value) { - context.errno = value - }, - enumerable: true, - configurable: true, - }, - syscall: { - get () { - return context.syscall - }, - set (value) { - context.syscall = value - }, - enumerable: true, - configurable: true, - }, - }) - - if (context.path !== undefined) { - Object.defineProperty(this, 'path', { - get () { - return context.path - }, - set (value) { - context.path = value - }, - enumerable: true, - configurable: true, - }) - } - - if (context.dest !== undefined) { - Object.defineProperty(this, 'dest', { - get () { - return context.dest - }, - set (value) { - context.dest = value - }, - enumerable: true, - configurable: true, - }) - } - } - - toString () { - return `${this.name} [${this.code}]: ${this.message}` - } - - [Symbol.for('nodejs.util.inspect.custom')] (_recurseTimes, ctx) { - return inspect(this, { - ...ctx, - getters: true, - customInspect: false, - }) - } -} - -function E (code, message) { - module.exports[code] = class NodeError extends SystemError { - constructor (ctx) { - super(code, message, ctx) - } - } -} - -E('ERR_FS_CP_DIR_TO_NON_DIR', 'Cannot overwrite directory with non-directory') -E('ERR_FS_CP_EEXIST', 'Target already exists') -E('ERR_FS_CP_EINVAL', 'Invalid src or dest') -E('ERR_FS_CP_FIFO_PIPE', 'Cannot copy a FIFO pipe') -E('ERR_FS_CP_NON_DIR_TO_DIR', 'Cannot overwrite non-directory with directory') -E('ERR_FS_CP_SOCKET', 'Cannot copy a socket file') -E('ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY', 'Cannot overwrite symlink in subdirectory of self') -E('ERR_FS_CP_UNKNOWN', 'Cannot copy an unknown file type') -E('ERR_FS_EISDIR', 'Path is a directory') - -module.exports.ERR_INVALID_ARG_TYPE = class ERR_INVALID_ARG_TYPE extends Error { - constructor (name, expected, actual) { - super() - this.code = 'ERR_INVALID_ARG_TYPE' - this.message = `The ${name} argument must be ${expected}. Received ${typeof actual}` - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/index.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/index.js deleted file mode 100644 index 972ce7aa12abef..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/index.js +++ /dev/null @@ -1,22 +0,0 @@ -const fs = require('fs/promises') -const getOptions = require('../common/get-options.js') -const node = require('../common/node.js') -const polyfill = require('./polyfill.js') - -// node 16.7.0 added fs.cp -const useNative = node.satisfies('>=16.7.0') - -const cp = async (src, dest, opts) => { - const options = getOptions(opts, { - copy: ['dereference', 'errorOnExist', 'filter', 'force', 'preserveTimestamps', 'recursive'], - }) - - // the polyfill is tested separately from this module, no need to hack - // process.version to try to trigger it just for coverage - // istanbul ignore next - return useNative - ? fs.cp(src, dest, options) - : polyfill(src, dest, options) -} - -module.exports = cp diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/polyfill.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/polyfill.js deleted file mode 100644 index 80eb10de971918..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/cp/polyfill.js +++ /dev/null @@ -1,428 +0,0 @@ -// this file is a modified version of the code in node 17.2.0 -// which is, in turn, a modified version of the fs-extra module on npm -// node core changes: -// - Use of the assert module has been replaced with core's error system. -// - All code related to the glob dependency has been removed. -// - Bring your own custom fs module is not currently supported. -// - Some basic code cleanup. -// changes here: -// - remove all callback related code -// - drop sync support -// - change assertions back to non-internal methods (see options.js) -// - throws ENOTDIR when rmdir gets an ENOENT for a path that exists in Windows -'use strict' - -const { - ERR_FS_CP_DIR_TO_NON_DIR, - ERR_FS_CP_EEXIST, - ERR_FS_CP_EINVAL, - ERR_FS_CP_FIFO_PIPE, - ERR_FS_CP_NON_DIR_TO_DIR, - ERR_FS_CP_SOCKET, - ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY, - ERR_FS_CP_UNKNOWN, - ERR_FS_EISDIR, - ERR_INVALID_ARG_TYPE, -} = require('./errors.js') -const { - constants: { - errno: { - EEXIST, - EISDIR, - EINVAL, - ENOTDIR, - }, - }, -} = require('os') -const { - chmod, - copyFile, - lstat, - mkdir, - readdir, - readlink, - stat, - symlink, - unlink, - utimes, -} = require('fs/promises') -const { - dirname, - isAbsolute, - join, - parse, - resolve, - sep, - toNamespacedPath, -} = require('path') -const { fileURLToPath } = require('url') - -const defaultOptions = { - dereference: false, - errorOnExist: false, - filter: undefined, - force: true, - preserveTimestamps: false, - recursive: false, -} - -async function cp (src, dest, opts) { - if (opts != null && typeof opts !== 'object') { - throw new ERR_INVALID_ARG_TYPE('options', ['Object'], opts) - } - return cpFn( - toNamespacedPath(getValidatedPath(src)), - toNamespacedPath(getValidatedPath(dest)), - { ...defaultOptions, ...opts }) -} - -function getValidatedPath (fileURLOrPath) { - const path = fileURLOrPath != null && fileURLOrPath.href - && fileURLOrPath.origin - ? fileURLToPath(fileURLOrPath) - : fileURLOrPath - return path -} - -async function cpFn (src, dest, opts) { - // Warn about using preserveTimestamps on 32-bit node - // istanbul ignore next - if (opts.preserveTimestamps && process.arch === 'ia32') { - const warning = 'Using the preserveTimestamps option in 32-bit ' + - 'node is not recommended' - process.emitWarning(warning, 'TimestampPrecisionWarning') - } - const stats = await checkPaths(src, dest, opts) - const { srcStat, destStat } = stats - await checkParentPaths(src, srcStat, dest) - if (opts.filter) { - return handleFilter(checkParentDir, destStat, src, dest, opts) - } - return checkParentDir(destStat, src, dest, opts) -} - -async function checkPaths (src, dest, opts) { - const { 0: srcStat, 1: destStat } = await getStats(src, dest, opts) - if (destStat) { - if (areIdentical(srcStat, destStat)) { - throw new ERR_FS_CP_EINVAL({ - message: 'src and dest cannot be the same', - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - if (srcStat.isDirectory() && !destStat.isDirectory()) { - throw new ERR_FS_CP_DIR_TO_NON_DIR({ - message: `cannot overwrite directory ${src} ` + - `with non-directory ${dest}`, - path: dest, - syscall: 'cp', - errno: EISDIR, - }) - } - if (!srcStat.isDirectory() && destStat.isDirectory()) { - throw new ERR_FS_CP_NON_DIR_TO_DIR({ - message: `cannot overwrite non-directory ${src} ` + - `with directory ${dest}`, - path: dest, - syscall: 'cp', - errno: ENOTDIR, - }) - } - } - - if (srcStat.isDirectory() && isSrcSubdir(src, dest)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${src} to a subdirectory of self ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - return { srcStat, destStat } -} - -function areIdentical (srcStat, destStat) { - return destStat.ino && destStat.dev && destStat.ino === srcStat.ino && - destStat.dev === srcStat.dev -} - -function getStats (src, dest, opts) { - const statFunc = opts.dereference ? - (file) => stat(file, { bigint: true }) : - (file) => lstat(file, { bigint: true }) - return Promise.all([ - statFunc(src), - statFunc(dest).catch((err) => { - // istanbul ignore next: unsure how to cover. - if (err.code === 'ENOENT') { - return null - } - // istanbul ignore next: unsure how to cover. - throw err - }), - ]) -} - -async function checkParentDir (destStat, src, dest, opts) { - const destParent = dirname(dest) - const dirExists = await pathExists(destParent) - if (dirExists) { - return getStatsForCopy(destStat, src, dest, opts) - } - await mkdir(destParent, { recursive: true }) - return getStatsForCopy(destStat, src, dest, opts) -} - -function pathExists (dest) { - return stat(dest).then( - () => true, - // istanbul ignore next: not sure when this would occur - (err) => (err.code === 'ENOENT' ? false : Promise.reject(err))) -} - -// Recursively check if dest parent is a subdirectory of src. -// It works for all file types including symlinks since it -// checks the src and dest inodes. It starts from the deepest -// parent and stops once it reaches the src parent or the root path. -async function checkParentPaths (src, srcStat, dest) { - const srcParent = resolve(dirname(src)) - const destParent = resolve(dirname(dest)) - if (destParent === srcParent || destParent === parse(destParent).root) { - return - } - let destStat - try { - destStat = await stat(destParent, { bigint: true }) - } catch (err) { - // istanbul ignore else: not sure when this would occur - if (err.code === 'ENOENT') { - return - } - // istanbul ignore next: not sure when this would occur - throw err - } - if (areIdentical(srcStat, destStat)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${src} to a subdirectory of self ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - return checkParentPaths(src, srcStat, destParent) -} - -const normalizePathToArray = (path) => - resolve(path).split(sep).filter(Boolean) - -// Return true if dest is a subdir of src, otherwise false. -// It only checks the path strings. -function isSrcSubdir (src, dest) { - const srcArr = normalizePathToArray(src) - const destArr = normalizePathToArray(dest) - return srcArr.every((cur, i) => destArr[i] === cur) -} - -async function handleFilter (onInclude, destStat, src, dest, opts, cb) { - const include = await opts.filter(src, dest) - if (include) { - return onInclude(destStat, src, dest, opts, cb) - } -} - -function startCopy (destStat, src, dest, opts) { - if (opts.filter) { - return handleFilter(getStatsForCopy, destStat, src, dest, opts) - } - return getStatsForCopy(destStat, src, dest, opts) -} - -async function getStatsForCopy (destStat, src, dest, opts) { - const statFn = opts.dereference ? stat : lstat - const srcStat = await statFn(src) - // istanbul ignore else: can't portably test FIFO - if (srcStat.isDirectory() && opts.recursive) { - return onDir(srcStat, destStat, src, dest, opts) - } else if (srcStat.isDirectory()) { - throw new ERR_FS_EISDIR({ - message: `${src} is a directory (not copied)`, - path: src, - syscall: 'cp', - errno: EINVAL, - }) - } else if (srcStat.isFile() || - srcStat.isCharacterDevice() || - srcStat.isBlockDevice()) { - return onFile(srcStat, destStat, src, dest, opts) - } else if (srcStat.isSymbolicLink()) { - return onLink(destStat, src, dest) - } else if (srcStat.isSocket()) { - throw new ERR_FS_CP_SOCKET({ - message: `cannot copy a socket file: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } else if (srcStat.isFIFO()) { - throw new ERR_FS_CP_FIFO_PIPE({ - message: `cannot copy a FIFO pipe: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - // istanbul ignore next: should be unreachable - throw new ERR_FS_CP_UNKNOWN({ - message: `cannot copy an unknown file type: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) -} - -function onFile (srcStat, destStat, src, dest, opts) { - if (!destStat) { - return _copyFile(srcStat, src, dest, opts) - } - return mayCopyFile(srcStat, src, dest, opts) -} - -async function mayCopyFile (srcStat, src, dest, opts) { - if (opts.force) { - await unlink(dest) - return _copyFile(srcStat, src, dest, opts) - } else if (opts.errorOnExist) { - throw new ERR_FS_CP_EEXIST({ - message: `${dest} already exists`, - path: dest, - syscall: 'cp', - errno: EEXIST, - }) - } -} - -async function _copyFile (srcStat, src, dest, opts) { - await copyFile(src, dest) - if (opts.preserveTimestamps) { - return handleTimestampsAndMode(srcStat.mode, src, dest) - } - return setDestMode(dest, srcStat.mode) -} - -async function handleTimestampsAndMode (srcMode, src, dest) { - // Make sure the file is writable before setting the timestamp - // otherwise open fails with EPERM when invoked with 'r+' - // (through utimes call) - if (fileIsNotWritable(srcMode)) { - await makeFileWritable(dest, srcMode) - return setDestTimestampsAndMode(srcMode, src, dest) - } - return setDestTimestampsAndMode(srcMode, src, dest) -} - -function fileIsNotWritable (srcMode) { - return (srcMode & 0o200) === 0 -} - -function makeFileWritable (dest, srcMode) { - return setDestMode(dest, srcMode | 0o200) -} - -async function setDestTimestampsAndMode (srcMode, src, dest) { - await setDestTimestamps(src, dest) - return setDestMode(dest, srcMode) -} - -function setDestMode (dest, srcMode) { - return chmod(dest, srcMode) -} - -async function setDestTimestamps (src, dest) { - // The initial srcStat.atime cannot be trusted - // because it is modified by the read(2) system call - // (See https://nodejs.org/api/fs.html#fs_stat_time_values) - const updatedSrcStat = await stat(src) - return utimes(dest, updatedSrcStat.atime, updatedSrcStat.mtime) -} - -function onDir (srcStat, destStat, src, dest, opts) { - if (!destStat) { - return mkDirAndCopy(srcStat.mode, src, dest, opts) - } - return copyDir(src, dest, opts) -} - -async function mkDirAndCopy (srcMode, src, dest, opts) { - await mkdir(dest) - await copyDir(src, dest, opts) - return setDestMode(dest, srcMode) -} - -async function copyDir (src, dest, opts) { - const dir = await readdir(src) - for (let i = 0; i < dir.length; i++) { - const item = dir[i] - const srcItem = join(src, item) - const destItem = join(dest, item) - const { destStat } = await checkPaths(srcItem, destItem, opts) - await startCopy(destStat, srcItem, destItem, opts) - } -} - -async function onLink (destStat, src, dest) { - let resolvedSrc = await readlink(src) - if (!isAbsolute(resolvedSrc)) { - resolvedSrc = resolve(dirname(src), resolvedSrc) - } - if (!destStat) { - return symlink(resolvedSrc, dest) - } - let resolvedDest - try { - resolvedDest = await readlink(dest) - } catch (err) { - // Dest exists and is a regular file or directory, - // Windows may throw UNKNOWN error. If dest already exists, - // fs throws error anyway, so no need to guard against it here. - // istanbul ignore next: can only test on windows - if (err.code === 'EINVAL' || err.code === 'UNKNOWN') { - return symlink(resolvedSrc, dest) - } - // istanbul ignore next: should not be possible - throw err - } - if (!isAbsolute(resolvedDest)) { - resolvedDest = resolve(dirname(dest), resolvedDest) - } - if (isSrcSubdir(resolvedSrc, resolvedDest)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${resolvedSrc} to a subdirectory of self ` + - `${resolvedDest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - // Do not copy if src is a subdir of dest since unlinking - // dest in this case would result in removing src contents - // and therefore a broken symlink would be created. - const srcStat = await stat(src) - if (srcStat.isDirectory() && isSrcSubdir(resolvedDest, resolvedSrc)) { - throw new ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY({ - message: `cannot overwrite ${resolvedDest} with ${resolvedSrc}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - }) - } - return copyLink(resolvedSrc, dest) -} - -async function copyLink (resolvedSrc, dest) { - await unlink(dest) - return symlink(resolvedSrc, dest) -} - -module.exports = cp diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/index.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/index.js deleted file mode 100644 index 81c746304cc428..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/index.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict' - -const cp = require('./cp/index.js') -const withTempDir = require('./with-temp-dir.js') -const readdirScoped = require('./readdir-scoped.js') -const moveFile = require('./move-file.js') - -module.exports = { - cp, - withTempDir, - readdirScoped, - moveFile, -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/move-file.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/move-file.js deleted file mode 100644 index d56e06d384659a..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/move-file.js +++ /dev/null @@ -1,78 +0,0 @@ -const { dirname, join, resolve, relative, isAbsolute } = require('path') -const fs = require('fs/promises') - -const pathExists = async path => { - try { - await fs.access(path) - return true - } catch (er) { - return er.code !== 'ENOENT' - } -} - -const moveFile = async (source, destination, options = {}, root = true, symlinks = []) => { - if (!source || !destination) { - throw new TypeError('`source` and `destination` file required') - } - - options = { - overwrite: true, - ...options, - } - - if (!options.overwrite && await pathExists(destination)) { - throw new Error(`The destination file exists: ${destination}`) - } - - await fs.mkdir(dirname(destination), { recursive: true }) - - try { - await fs.rename(source, destination) - } catch (error) { - if (error.code === 'EXDEV' || error.code === 'EPERM') { - const sourceStat = await fs.lstat(source) - if (sourceStat.isDirectory()) { - const files = await fs.readdir(source) - await Promise.all(files.map((file) => - moveFile(join(source, file), join(destination, file), options, false, symlinks) - )) - } else if (sourceStat.isSymbolicLink()) { - symlinks.push({ source, destination }) - } else { - await fs.copyFile(source, destination) - } - } else { - throw error - } - } - - if (root) { - await Promise.all(symlinks.map(async ({ source: symSource, destination: symDestination }) => { - let target = await fs.readlink(symSource) - // junction symlinks in windows will be absolute paths, so we need to - // make sure they point to the symlink destination - if (isAbsolute(target)) { - target = resolve(symDestination, relative(symSource, target)) - } - // try to determine what the actual file is so we can create the correct - // type of symlink in windows - let targetStat = 'file' - try { - targetStat = await fs.stat(resolve(dirname(symSource), target)) - if (targetStat.isDirectory()) { - targetStat = 'junction' - } - } catch { - // targetStat remains 'file' - } - await fs.symlink( - target, - symDestination, - targetStat - ) - })) - await fs.rm(source, { recursive: true, force: true }) - } -} - -module.exports = moveFile diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/readdir-scoped.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/readdir-scoped.js deleted file mode 100644 index cd601dfbe7486b..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/readdir-scoped.js +++ /dev/null @@ -1,20 +0,0 @@ -const { readdir } = require('fs/promises') -const { join } = require('path') - -const readdirScoped = async (dir) => { - const results = [] - - for (const item of await readdir(dir)) { - if (item.startsWith('@')) { - for (const scopedItem of await readdir(join(dir, item))) { - results.push(join(item, scopedItem)) - } - } else { - results.push(item) - } - } - - return results -} - -module.exports = readdirScoped diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/with-temp-dir.js b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/with-temp-dir.js deleted file mode 100644 index 0738ac4f29e1be..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/lib/with-temp-dir.js +++ /dev/null @@ -1,39 +0,0 @@ -const { join, sep } = require('path') - -const getOptions = require('./common/get-options.js') -const { mkdir, mkdtemp, rm } = require('fs/promises') - -// create a temp directory, ensure its permissions match its parent, then call -// the supplied function passing it the path to the directory. clean up after -// the function finishes, whether it throws or not -const withTempDir = async (root, fn, opts) => { - const options = getOptions(opts, { - copy: ['tmpPrefix'], - }) - // create the directory - await mkdir(root, { recursive: true }) - - const target = await mkdtemp(join(`${root}${sep}`, options.tmpPrefix || '')) - let err - let result - - try { - result = await fn(target) - } catch (_err) { - err = _err - } - - try { - await rm(target, { force: true, recursive: true }) - } catch { - // ignore errors - } - - if (err) { - throw err - } - - return result -} - -module.exports = withTempDir diff --git a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/package.json b/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/package.json deleted file mode 100644 index 5261a11b78000e..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/@npmcli/fs/package.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "name": "@npmcli/fs", - "version": "3.1.1", - "description": "filesystem utilities for the npm cli", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "snap": "tap", - "test": "tap", - "npmclilint": "npmcli-lint", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "lintfix": "npm run lint -- --fix", - "posttest": "npm run lint", - "postsnap": "npm run lintfix --", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/fs.git" - }, - "keywords": [ - "npm", - "oss" - ], - "author": "GitHub Inc.", - "license": "ISC", - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.0.1" - }, - "dependencies": { - "semver": "^7.3.5" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.22.0" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/@tufjs/models/LICENSE b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/LICENSE similarity index 100% rename from deps/npm/node_modules/@tufjs/models/LICENSE rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/LICENSE diff --git a/deps/npm/node_modules/@tufjs/models/dist/base.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/base.js similarity index 80% rename from deps/npm/node_modules/@tufjs/models/dist/base.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/base.js index 259f6799c13a0d..85e45d8fc1151e 100644 --- a/deps/npm/node_modules/@tufjs/models/dist/base.js +++ b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/base.js @@ -3,7 +3,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.Signed = exports.isMetadataKind = exports.MetadataKind = void 0; +exports.Signed = exports.MetadataKind = void 0; +exports.isMetadataKind = isMetadataKind; const util_1 = __importDefault(require("util")); const error_1 = require("./error"); const utils_1 = require("./utils"); @@ -19,7 +20,6 @@ function isMetadataKind(value) { return (typeof value === 'string' && Object.values(MetadataKind).includes(value)); } -exports.isMetadataKind = isMetadataKind; /*** * A base class for the signed part of TUF metadata. * @@ -39,8 +39,8 @@ class Signed { if (specList[0] != SPECIFICATION_VERSION[0]) { throw new error_1.ValueError('Unsupported specVersion'); } - this.expires = options.expires || new Date().toISOString(); - this.version = options.version || 1; + this.expires = options.expires; + this.version = options.version; this.unrecognizedFields = options.unrecognizedFields || {}; } equals(other) { @@ -60,13 +60,22 @@ class Signed { } static commonFieldsFromJSON(data) { const { spec_version, expires, version, ...rest } = data; - if (utils_1.guard.isDefined(spec_version) && !(typeof spec_version === 'string')) { + if (!utils_1.guard.isDefined(spec_version)) { + throw new error_1.ValueError('spec_version is not defined'); + } + else if (typeof spec_version !== 'string') { throw new TypeError('spec_version must be a string'); } - if (utils_1.guard.isDefined(expires) && !(typeof expires === 'string')) { + if (!utils_1.guard.isDefined(expires)) { + throw new error_1.ValueError('expires is not defined'); + } + else if (!(typeof expires === 'string')) { throw new TypeError('expires must be a string'); } - if (utils_1.guard.isDefined(version) && !(typeof version === 'number')) { + if (!utils_1.guard.isDefined(version)) { + throw new error_1.ValueError('version is not defined'); + } + else if (!(typeof version === 'number')) { throw new TypeError('version must be a number'); } return { diff --git a/deps/npm/node_modules/@tufjs/models/dist/delegations.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/delegations.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/delegations.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/delegations.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/error.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/error.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/error.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/error.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/file.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/file.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/file.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/file.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/index.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/index.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/index.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/index.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/key.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/key.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/key.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/key.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/metadata.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/metadata.js similarity index 91% rename from deps/npm/node_modules/@tufjs/models/dist/metadata.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/metadata.js index 9668b6f14fa701..389d2504e0b53d 100644 --- a/deps/npm/node_modules/@tufjs/models/dist/metadata.js +++ b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/metadata.js @@ -125,6 +125,9 @@ class Metadata { if (type !== signed._type) { throw new error_1.ValueError(`expected '${type}', got ${signed['_type']}`); } + if (!utils_1.guard.isObjectArray(signatures)) { + throw new TypeError('signatures is not an array'); + } let signedObj; switch (type) { case base_1.MetadataKind.Root: @@ -142,17 +145,16 @@ class Metadata { default: throw new TypeError('invalid metadata type'); } - const sigMap = signaturesFromJSON(signatures); + const sigMap = {}; + // Ensure that each signature is unique + signatures.forEach((sigData) => { + const sig = signature_1.Signature.fromJSON(sigData); + if (sigMap[sig.keyID]) { + throw new error_1.ValueError(`multiple signatures found for keyid: ${sig.keyID}`); + } + sigMap[sig.keyID] = sig; + }); return new Metadata(signedObj, sigMap, rest); } } exports.Metadata = Metadata; -function signaturesFromJSON(data) { - if (!utils_1.guard.isObjectArray(data)) { - throw new TypeError('signatures is not an array'); - } - return data.reduce((acc, sigData) => { - const signature = signature_1.Signature.fromJSON(sigData); - return { ...acc, [signature.keyID]: signature }; - }, {}); -} diff --git a/deps/npm/node_modules/@tufjs/models/dist/role.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/role.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/role.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/role.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/root.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/root.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/root.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/root.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/signature.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/signature.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/signature.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/signature.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/snapshot.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/snapshot.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/snapshot.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/snapshot.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/targets.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/targets.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/targets.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/targets.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/timestamp.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/timestamp.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/timestamp.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/timestamp.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/utils/guard.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/guard.js similarity index 88% rename from deps/npm/node_modules/@tufjs/models/dist/utils/guard.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/guard.js index efe558852303ce..911e8475986bbc 100644 --- a/deps/npm/node_modules/@tufjs/models/dist/utils/guard.js +++ b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/guard.js @@ -1,33 +1,32 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.isObjectRecord = exports.isStringRecord = exports.isObjectArray = exports.isStringArray = exports.isObject = exports.isDefined = void 0; +exports.isDefined = isDefined; +exports.isObject = isObject; +exports.isStringArray = isStringArray; +exports.isObjectArray = isObjectArray; +exports.isStringRecord = isStringRecord; +exports.isObjectRecord = isObjectRecord; function isDefined(val) { return val !== undefined; } -exports.isDefined = isDefined; function isObject(value) { return typeof value === 'object' && value !== null; } -exports.isObject = isObject; function isStringArray(value) { return Array.isArray(value) && value.every((v) => typeof v === 'string'); } -exports.isStringArray = isStringArray; function isObjectArray(value) { return Array.isArray(value) && value.every(isObject); } -exports.isObjectArray = isObjectArray; function isStringRecord(value) { return (typeof value === 'object' && value !== null && Object.keys(value).every((k) => typeof k === 'string') && Object.values(value).every((v) => typeof v === 'string')); } -exports.isStringRecord = isStringRecord; function isObjectRecord(value) { return (typeof value === 'object' && value !== null && Object.keys(value).every((k) => typeof k === 'string') && Object.values(value).every((v) => typeof v === 'object' && v !== null)); } -exports.isObjectRecord = isObjectRecord; diff --git a/deps/npm/node_modules/@tufjs/models/dist/utils/index.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/index.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/utils/index.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/index.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/utils/key.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/key.js similarity index 99% rename from deps/npm/node_modules/@tufjs/models/dist/utils/key.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/key.js index 1f795ba1a2733f..3c3ec07f1425a7 100644 --- a/deps/npm/node_modules/@tufjs/models/dist/utils/key.js +++ b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/key.js @@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.getPublicKey = void 0; +exports.getPublicKey = getPublicKey; const crypto_1 = __importDefault(require("crypto")); const error_1 = require("../error"); const oid_1 = require("./oid"); @@ -28,7 +28,6 @@ function getPublicKey(keyInfo) { throw new error_1.UnsupportedAlgorithmError(`Unsupported key type: ${keyInfo.keyType}`); } } -exports.getPublicKey = getPublicKey; function getRSAPublicKey(keyInfo) { // Only support PEM-encoded RSA keys if (!keyInfo.keyVal.startsWith(PEM_HEADER)) { diff --git a/deps/npm/node_modules/@tufjs/models/dist/utils/oid.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/oid.js similarity index 96% rename from deps/npm/node_modules/@tufjs/models/dist/utils/oid.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/oid.js index e1bb7af5e54fbf..00b29c3030d1ec 100644 --- a/deps/npm/node_modules/@tufjs/models/dist/utils/oid.js +++ b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/oid.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.encodeOIDString = void 0; +exports.encodeOIDString = encodeOIDString; const ANS1_TAG_OID = 0x06; function encodeOIDString(oid) { const parts = oid.split('.'); @@ -14,7 +14,6 @@ function encodeOIDString(oid) { const der = Buffer.from([first, ...rest]); return Buffer.from([ANS1_TAG_OID, der.length, ...der]); } -exports.encodeOIDString = encodeOIDString; function encodeVariableLengthInteger(value) { const bytes = []; let mask = 0x00; diff --git a/deps/npm/node_modules/@tufjs/models/dist/utils/types.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/types.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/utils/types.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/types.js diff --git a/deps/npm/node_modules/@tufjs/models/dist/utils/verify.js b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/verify.js similarity index 100% rename from deps/npm/node_modules/@tufjs/models/dist/utils/verify.js rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/dist/utils/verify.js diff --git a/deps/npm/node_modules/@tufjs/models/package.json b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/package.json similarity index 90% rename from deps/npm/node_modules/@tufjs/models/package.json rename to deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/package.json index be581591a0f3a3..8e5132ddf1079c 100644 --- a/deps/npm/node_modules/@tufjs/models/package.json +++ b/deps/npm/node_modules/tuf-js/node_modules/@tufjs/models/package.json @@ -1,6 +1,6 @@ { "name": "@tufjs/models", - "version": "2.0.1", + "version": "3.0.1", "description": "TUF metadata models", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -29,9 +29,9 @@ "homepage": "https://github.com/theupdateframework/tuf-js/tree/main/packages/models#readme", "dependencies": { "@tufjs/canonical-json": "2.0.0", - "minimatch": "^9.0.4" + "minimatch": "^9.0.5" }, "engines": { - "node": "^16.14.0 || >=18.0.0" + "node": "^18.17.0 || >=20.5.0" } } diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/LICENSE.md b/deps/npm/node_modules/tuf-js/node_modules/cacache/LICENSE.md deleted file mode 100644 index 8d28acf866d932..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/LICENSE.md +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright (c) npm, Inc. - -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted, provided that the -above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE COPYRIGHT HOLDER DISCLAIMS -ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/path.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/path.js deleted file mode 100644 index ad5a76a4f73f26..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/path.js +++ /dev/null @@ -1,29 +0,0 @@ -'use strict' - -const contentVer = require('../../package.json')['cache-version'].content -const hashToSegments = require('../util/hash-to-segments') -const path = require('path') -const ssri = require('ssri') - -// Current format of content file path: -// -// sha512-BaSE64Hex= -> -// ~/.my-cache/content-v2/sha512/ba/da/55deadbeefc0ffee -// -module.exports = contentPath - -function contentPath (cache, integrity) { - const sri = ssri.parse(integrity, { single: true }) - // contentPath is the *strongest* algo given - return path.join( - contentDir(cache), - sri.algorithm, - ...hashToSegments(sri.hexDigest()) - ) -} - -module.exports.contentDir = contentDir - -function contentDir (cache) { - return path.join(cache, `content-v${contentVer}`) -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/read.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/read.js deleted file mode 100644 index 5f6192c3cec566..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/read.js +++ /dev/null @@ -1,165 +0,0 @@ -'use strict' - -const fs = require('fs/promises') -const fsm = require('fs-minipass') -const ssri = require('ssri') -const contentPath = require('./path') -const Pipeline = require('minipass-pipeline') - -module.exports = read - -const MAX_SINGLE_READ_SIZE = 64 * 1024 * 1024 -async function read (cache, integrity, opts = {}) { - const { size } = opts - const { stat, cpath, sri } = await withContentSri(cache, integrity, async (cpath, sri) => { - // get size - const stat = size ? { size } : await fs.stat(cpath) - return { stat, cpath, sri } - }) - - if (stat.size > MAX_SINGLE_READ_SIZE) { - return readPipeline(cpath, stat.size, sri, new Pipeline()).concat() - } - - const data = await fs.readFile(cpath, { encoding: null }) - - if (stat.size !== data.length) { - throw sizeError(stat.size, data.length) - } - - if (!ssri.checkData(data, sri)) { - throw integrityError(sri, cpath) - } - - return data -} - -const readPipeline = (cpath, size, sri, stream) => { - stream.push( - new fsm.ReadStream(cpath, { - size, - readSize: MAX_SINGLE_READ_SIZE, - }), - ssri.integrityStream({ - integrity: sri, - size, - }) - ) - return stream -} - -module.exports.stream = readStream -module.exports.readStream = readStream - -function readStream (cache, integrity, opts = {}) { - const { size } = opts - const stream = new Pipeline() - // Set all this up to run on the stream and then just return the stream - Promise.resolve().then(async () => { - const { stat, cpath, sri } = await withContentSri(cache, integrity, async (cpath, sri) => { - // get size - const stat = size ? { size } : await fs.stat(cpath) - return { stat, cpath, sri } - }) - - return readPipeline(cpath, stat.size, sri, stream) - }).catch(err => stream.emit('error', err)) - - return stream -} - -module.exports.copy = copy - -function copy (cache, integrity, dest) { - return withContentSri(cache, integrity, (cpath) => { - return fs.copyFile(cpath, dest) - }) -} - -module.exports.hasContent = hasContent - -async function hasContent (cache, integrity) { - if (!integrity) { - return false - } - - try { - return await withContentSri(cache, integrity, async (cpath, sri) => { - const stat = await fs.stat(cpath) - return { size: stat.size, sri, stat } - }) - } catch (err) { - if (err.code === 'ENOENT') { - return false - } - - if (err.code === 'EPERM') { - /* istanbul ignore else */ - if (process.platform !== 'win32') { - throw err - } else { - return false - } - } - } -} - -async function withContentSri (cache, integrity, fn) { - const sri = ssri.parse(integrity) - // If `integrity` has multiple entries, pick the first digest - // with available local data. - const algo = sri.pickAlgorithm() - const digests = sri[algo] - - if (digests.length <= 1) { - const cpath = contentPath(cache, digests[0]) - return fn(cpath, digests[0]) - } else { - // Can't use race here because a generic error can happen before - // a ENOENT error, and can happen before a valid result - const results = await Promise.all(digests.map(async (meta) => { - try { - return await withContentSri(cache, meta, fn) - } catch (err) { - if (err.code === 'ENOENT') { - return Object.assign( - new Error('No matching content found for ' + sri.toString()), - { code: 'ENOENT' } - ) - } - return err - } - })) - // Return the first non error if it is found - const result = results.find((r) => !(r instanceof Error)) - if (result) { - return result - } - - // Throw the No matching content found error - const enoentError = results.find((r) => r.code === 'ENOENT') - if (enoentError) { - throw enoentError - } - - // Throw generic error - throw results.find((r) => r instanceof Error) - } -} - -function sizeError (expected, found) { - /* eslint-disable-next-line max-len */ - const err = new Error(`Bad data size: expected inserted data to be ${expected} bytes, but got ${found} instead`) - err.expected = expected - err.found = found - err.code = 'EBADSIZE' - return err -} - -function integrityError (sri, path) { - const err = new Error(`Integrity verification failed for ${sri} (${path})`) - err.code = 'EINTEGRITY' - err.sri = sri - err.path = path - return err -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/rm.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/rm.js deleted file mode 100644 index ce58d679e4cb25..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/rm.js +++ /dev/null @@ -1,18 +0,0 @@ -'use strict' - -const fs = require('fs/promises') -const contentPath = require('./path') -const { hasContent } = require('./read') - -module.exports = rm - -async function rm (cache, integrity) { - const content = await hasContent(cache, integrity) - // ~pretty~ sure we can't end up with a content lacking sri, but be safe - if (content && content.sri) { - await fs.rm(contentPath(cache, content.sri), { recursive: true, force: true }) - return true - } else { - return false - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/write.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/write.js deleted file mode 100644 index e7187abca8788a..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/content/write.js +++ /dev/null @@ -1,206 +0,0 @@ -'use strict' - -const events = require('events') - -const contentPath = require('./path') -const fs = require('fs/promises') -const { moveFile } = require('@npmcli/fs') -const { Minipass } = require('minipass') -const Pipeline = require('minipass-pipeline') -const Flush = require('minipass-flush') -const path = require('path') -const ssri = require('ssri') -const uniqueFilename = require('unique-filename') -const fsm = require('fs-minipass') - -module.exports = write - -// Cache of move operations in process so we don't duplicate -const moveOperations = new Map() - -async function write (cache, data, opts = {}) { - const { algorithms, size, integrity } = opts - - if (typeof size === 'number' && data.length !== size) { - throw sizeError(size, data.length) - } - - const sri = ssri.fromData(data, algorithms ? { algorithms } : {}) - if (integrity && !ssri.checkData(data, integrity, opts)) { - throw checksumError(integrity, sri) - } - - for (const algo in sri) { - const tmp = await makeTmp(cache, opts) - const hash = sri[algo].toString() - try { - await fs.writeFile(tmp.target, data, { flag: 'wx' }) - await moveToDestination(tmp, cache, hash, opts) - } finally { - if (!tmp.moved) { - await fs.rm(tmp.target, { recursive: true, force: true }) - } - } - } - return { integrity: sri, size: data.length } -} - -module.exports.stream = writeStream - -// writes proxied to the 'inputStream' that is passed to the Promise -// 'end' is deferred until content is handled. -class CacacheWriteStream extends Flush { - constructor (cache, opts) { - super() - this.opts = opts - this.cache = cache - this.inputStream = new Minipass() - this.inputStream.on('error', er => this.emit('error', er)) - this.inputStream.on('drain', () => this.emit('drain')) - this.handleContentP = null - } - - write (chunk, encoding, cb) { - if (!this.handleContentP) { - this.handleContentP = handleContent( - this.inputStream, - this.cache, - this.opts - ) - this.handleContentP.catch(error => this.emit('error', error)) - } - return this.inputStream.write(chunk, encoding, cb) - } - - flush (cb) { - this.inputStream.end(() => { - if (!this.handleContentP) { - const e = new Error('Cache input stream was empty') - e.code = 'ENODATA' - // empty streams are probably emitting end right away. - // defer this one tick by rejecting a promise on it. - return Promise.reject(e).catch(cb) - } - // eslint-disable-next-line promise/catch-or-return - this.handleContentP.then( - (res) => { - res.integrity && this.emit('integrity', res.integrity) - // eslint-disable-next-line promise/always-return - res.size !== null && this.emit('size', res.size) - cb() - }, - (er) => cb(er) - ) - }) - } -} - -function writeStream (cache, opts = {}) { - return new CacacheWriteStream(cache, opts) -} - -async function handleContent (inputStream, cache, opts) { - const tmp = await makeTmp(cache, opts) - try { - const res = await pipeToTmp(inputStream, cache, tmp.target, opts) - await moveToDestination( - tmp, - cache, - res.integrity, - opts - ) - return res - } finally { - if (!tmp.moved) { - await fs.rm(tmp.target, { recursive: true, force: true }) - } - } -} - -async function pipeToTmp (inputStream, cache, tmpTarget, opts) { - const outStream = new fsm.WriteStream(tmpTarget, { - flags: 'wx', - }) - - if (opts.integrityEmitter) { - // we need to create these all simultaneously since they can fire in any order - const [integrity, size] = await Promise.all([ - events.once(opts.integrityEmitter, 'integrity').then(res => res[0]), - events.once(opts.integrityEmitter, 'size').then(res => res[0]), - new Pipeline(inputStream, outStream).promise(), - ]) - return { integrity, size } - } - - let integrity - let size - const hashStream = ssri.integrityStream({ - integrity: opts.integrity, - algorithms: opts.algorithms, - size: opts.size, - }) - hashStream.on('integrity', i => { - integrity = i - }) - hashStream.on('size', s => { - size = s - }) - - const pipeline = new Pipeline(inputStream, hashStream, outStream) - await pipeline.promise() - return { integrity, size } -} - -async function makeTmp (cache, opts) { - const tmpTarget = uniqueFilename(path.join(cache, 'tmp'), opts.tmpPrefix) - await fs.mkdir(path.dirname(tmpTarget), { recursive: true }) - return { - target: tmpTarget, - moved: false, - } -} - -async function moveToDestination (tmp, cache, sri) { - const destination = contentPath(cache, sri) - const destDir = path.dirname(destination) - if (moveOperations.has(destination)) { - return moveOperations.get(destination) - } - moveOperations.set( - destination, - fs.mkdir(destDir, { recursive: true }) - .then(async () => { - await moveFile(tmp.target, destination, { overwrite: false }) - tmp.moved = true - return tmp.moved - }) - .catch(err => { - if (!err.message.startsWith('The destination file exists')) { - throw Object.assign(err, { code: 'EEXIST' }) - } - }).finally(() => { - moveOperations.delete(destination) - }) - - ) - return moveOperations.get(destination) -} - -function sizeError (expected, found) { - /* eslint-disable-next-line max-len */ - const err = new Error(`Bad data size: expected inserted data to be ${expected} bytes, but got ${found} instead`) - err.expected = expected - err.found = found - err.code = 'EBADSIZE' - return err -} - -function checksumError (expected, found) { - const err = new Error(`Integrity check failed: - Wanted: ${expected} - Found: ${found}`) - err.code = 'EINTEGRITY' - err.expected = expected - err.found = found - return err -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/entry-index.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/entry-index.js deleted file mode 100644 index 89c28f2f257d48..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/entry-index.js +++ /dev/null @@ -1,336 +0,0 @@ -'use strict' - -const crypto = require('crypto') -const { - appendFile, - mkdir, - readFile, - readdir, - rm, - writeFile, -} = require('fs/promises') -const { Minipass } = require('minipass') -const path = require('path') -const ssri = require('ssri') -const uniqueFilename = require('unique-filename') - -const contentPath = require('./content/path') -const hashToSegments = require('./util/hash-to-segments') -const indexV = require('../package.json')['cache-version'].index -const { moveFile } = require('@npmcli/fs') - -const pMap = require('p-map') -const lsStreamConcurrency = 5 - -module.exports.NotFoundError = class NotFoundError extends Error { - constructor (cache, key) { - super(`No cache entry for ${key} found in ${cache}`) - this.code = 'ENOENT' - this.cache = cache - this.key = key - } -} - -module.exports.compact = compact - -async function compact (cache, key, matchFn, opts = {}) { - const bucket = bucketPath(cache, key) - const entries = await bucketEntries(bucket) - const newEntries = [] - // we loop backwards because the bottom-most result is the newest - // since we add new entries with appendFile - for (let i = entries.length - 1; i >= 0; --i) { - const entry = entries[i] - // a null integrity could mean either a delete was appended - // or the user has simply stored an index that does not map - // to any content. we determine if the user wants to keep the - // null integrity based on the validateEntry function passed in options. - // if the integrity is null and no validateEntry is provided, we break - // as we consider the null integrity to be a deletion of everything - // that came before it. - if (entry.integrity === null && !opts.validateEntry) { - break - } - - // if this entry is valid, and it is either the first entry or - // the newEntries array doesn't already include an entry that - // matches this one based on the provided matchFn, then we add - // it to the beginning of our list - if ((!opts.validateEntry || opts.validateEntry(entry) === true) && - (newEntries.length === 0 || - !newEntries.find((oldEntry) => matchFn(oldEntry, entry)))) { - newEntries.unshift(entry) - } - } - - const newIndex = '\n' + newEntries.map((entry) => { - const stringified = JSON.stringify(entry) - const hash = hashEntry(stringified) - return `${hash}\t${stringified}` - }).join('\n') - - const setup = async () => { - const target = uniqueFilename(path.join(cache, 'tmp'), opts.tmpPrefix) - await mkdir(path.dirname(target), { recursive: true }) - return { - target, - moved: false, - } - } - - const teardown = async (tmp) => { - if (!tmp.moved) { - return rm(tmp.target, { recursive: true, force: true }) - } - } - - const write = async (tmp) => { - await writeFile(tmp.target, newIndex, { flag: 'wx' }) - await mkdir(path.dirname(bucket), { recursive: true }) - // we use @npmcli/move-file directly here because we - // want to overwrite the existing file - await moveFile(tmp.target, bucket) - tmp.moved = true - } - - // write the file atomically - const tmp = await setup() - try { - await write(tmp) - } finally { - await teardown(tmp) - } - - // we reverse the list we generated such that the newest - // entries come first in order to make looping through them easier - // the true passed to formatEntry tells it to keep null - // integrity values, if they made it this far it's because - // validateEntry returned true, and as such we should return it - return newEntries.reverse().map((entry) => formatEntry(cache, entry, true)) -} - -module.exports.insert = insert - -async function insert (cache, key, integrity, opts = {}) { - const { metadata, size, time } = opts - const bucket = bucketPath(cache, key) - const entry = { - key, - integrity: integrity && ssri.stringify(integrity), - time: time || Date.now(), - size, - metadata, - } - try { - await mkdir(path.dirname(bucket), { recursive: true }) - const stringified = JSON.stringify(entry) - // NOTE - Cleverness ahoy! - // - // This works because it's tremendously unlikely for an entry to corrupt - // another while still preserving the string length of the JSON in - // question. So, we just slap the length in there and verify it on read. - // - // Thanks to @isaacs for the whiteboarding session that ended up with - // this. - await appendFile(bucket, `\n${hashEntry(stringified)}\t${stringified}`) - } catch (err) { - if (err.code === 'ENOENT') { - return undefined - } - - throw err - } - return formatEntry(cache, entry) -} - -module.exports.find = find - -async function find (cache, key) { - const bucket = bucketPath(cache, key) - try { - const entries = await bucketEntries(bucket) - return entries.reduce((latest, next) => { - if (next && next.key === key) { - return formatEntry(cache, next) - } else { - return latest - } - }, null) - } catch (err) { - if (err.code === 'ENOENT') { - return null - } else { - throw err - } - } -} - -module.exports.delete = del - -function del (cache, key, opts = {}) { - if (!opts.removeFully) { - return insert(cache, key, null, opts) - } - - const bucket = bucketPath(cache, key) - return rm(bucket, { recursive: true, force: true }) -} - -module.exports.lsStream = lsStream - -function lsStream (cache) { - const indexDir = bucketDir(cache) - const stream = new Minipass({ objectMode: true }) - - // Set all this up to run on the stream and then just return the stream - Promise.resolve().then(async () => { - const buckets = await readdirOrEmpty(indexDir) - await pMap(buckets, async (bucket) => { - const bucketPath = path.join(indexDir, bucket) - const subbuckets = await readdirOrEmpty(bucketPath) - await pMap(subbuckets, async (subbucket) => { - const subbucketPath = path.join(bucketPath, subbucket) - - // "/cachename//./*" - const subbucketEntries = await readdirOrEmpty(subbucketPath) - await pMap(subbucketEntries, async (entry) => { - const entryPath = path.join(subbucketPath, entry) - try { - const entries = await bucketEntries(entryPath) - // using a Map here prevents duplicate keys from showing up - // twice, I guess? - const reduced = entries.reduce((acc, entry) => { - acc.set(entry.key, entry) - return acc - }, new Map()) - // reduced is a map of key => entry - for (const entry of reduced.values()) { - const formatted = formatEntry(cache, entry) - if (formatted) { - stream.write(formatted) - } - } - } catch (err) { - if (err.code === 'ENOENT') { - return undefined - } - throw err - } - }, - { concurrency: lsStreamConcurrency }) - }, - { concurrency: lsStreamConcurrency }) - }, - { concurrency: lsStreamConcurrency }) - stream.end() - return stream - }).catch(err => stream.emit('error', err)) - - return stream -} - -module.exports.ls = ls - -async function ls (cache) { - const entries = await lsStream(cache).collect() - return entries.reduce((acc, xs) => { - acc[xs.key] = xs - return acc - }, {}) -} - -module.exports.bucketEntries = bucketEntries - -async function bucketEntries (bucket, filter) { - const data = await readFile(bucket, 'utf8') - return _bucketEntries(data, filter) -} - -function _bucketEntries (data) { - const entries = [] - data.split('\n').forEach((entry) => { - if (!entry) { - return - } - - const pieces = entry.split('\t') - if (!pieces[1] || hashEntry(pieces[1]) !== pieces[0]) { - // Hash is no good! Corruption or malice? Doesn't matter! - // EJECT EJECT - return - } - let obj - try { - obj = JSON.parse(pieces[1]) - } catch (_) { - // eslint-ignore-next-line no-empty-block - } - // coverage disabled here, no need to test with an entry that parses to something falsey - // istanbul ignore else - if (obj) { - entries.push(obj) - } - }) - return entries -} - -module.exports.bucketDir = bucketDir - -function bucketDir (cache) { - return path.join(cache, `index-v${indexV}`) -} - -module.exports.bucketPath = bucketPath - -function bucketPath (cache, key) { - const hashed = hashKey(key) - return path.join.apply( - path, - [bucketDir(cache)].concat(hashToSegments(hashed)) - ) -} - -module.exports.hashKey = hashKey - -function hashKey (key) { - return hash(key, 'sha256') -} - -module.exports.hashEntry = hashEntry - -function hashEntry (str) { - return hash(str, 'sha1') -} - -function hash (str, digest) { - return crypto - .createHash(digest) - .update(str) - .digest('hex') -} - -function formatEntry (cache, entry, keepAll) { - // Treat null digests as deletions. They'll shadow any previous entries. - if (!entry.integrity && !keepAll) { - return null - } - - return { - key: entry.key, - integrity: entry.integrity, - path: entry.integrity ? contentPath(cache, entry.integrity) : undefined, - size: entry.size, - time: entry.time, - metadata: entry.metadata, - } -} - -function readdirOrEmpty (dir) { - return readdir(dir).catch((err) => { - if (err.code === 'ENOENT' || err.code === 'ENOTDIR') { - return [] - } - - throw err - }) -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/get.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/get.js deleted file mode 100644 index 80ec206c7ecaaa..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/get.js +++ /dev/null @@ -1,170 +0,0 @@ -'use strict' - -const Collect = require('minipass-collect') -const { Minipass } = require('minipass') -const Pipeline = require('minipass-pipeline') - -const index = require('./entry-index') -const memo = require('./memoization') -const read = require('./content/read') - -async function getData (cache, key, opts = {}) { - const { integrity, memoize, size } = opts - const memoized = memo.get(cache, key, opts) - if (memoized && memoize !== false) { - return { - metadata: memoized.entry.metadata, - data: memoized.data, - integrity: memoized.entry.integrity, - size: memoized.entry.size, - } - } - - const entry = await index.find(cache, key, opts) - if (!entry) { - throw new index.NotFoundError(cache, key) - } - const data = await read(cache, entry.integrity, { integrity, size }) - if (memoize) { - memo.put(cache, entry, data, opts) - } - - return { - data, - metadata: entry.metadata, - size: entry.size, - integrity: entry.integrity, - } -} -module.exports = getData - -async function getDataByDigest (cache, key, opts = {}) { - const { integrity, memoize, size } = opts - const memoized = memo.get.byDigest(cache, key, opts) - if (memoized && memoize !== false) { - return memoized - } - - const res = await read(cache, key, { integrity, size }) - if (memoize) { - memo.put.byDigest(cache, key, res, opts) - } - return res -} -module.exports.byDigest = getDataByDigest - -const getMemoizedStream = (memoized) => { - const stream = new Minipass() - stream.on('newListener', function (ev, cb) { - ev === 'metadata' && cb(memoized.entry.metadata) - ev === 'integrity' && cb(memoized.entry.integrity) - ev === 'size' && cb(memoized.entry.size) - }) - stream.end(memoized.data) - return stream -} - -function getStream (cache, key, opts = {}) { - const { memoize, size } = opts - const memoized = memo.get(cache, key, opts) - if (memoized && memoize !== false) { - return getMemoizedStream(memoized) - } - - const stream = new Pipeline() - // Set all this up to run on the stream and then just return the stream - Promise.resolve().then(async () => { - const entry = await index.find(cache, key) - if (!entry) { - throw new index.NotFoundError(cache, key) - } - - stream.emit('metadata', entry.metadata) - stream.emit('integrity', entry.integrity) - stream.emit('size', entry.size) - stream.on('newListener', function (ev, cb) { - ev === 'metadata' && cb(entry.metadata) - ev === 'integrity' && cb(entry.integrity) - ev === 'size' && cb(entry.size) - }) - - const src = read.readStream( - cache, - entry.integrity, - { ...opts, size: typeof size !== 'number' ? entry.size : size } - ) - - if (memoize) { - const memoStream = new Collect.PassThrough() - memoStream.on('collect', data => memo.put(cache, entry, data, opts)) - stream.unshift(memoStream) - } - stream.unshift(src) - return stream - }).catch((err) => stream.emit('error', err)) - - return stream -} - -module.exports.stream = getStream - -function getStreamDigest (cache, integrity, opts = {}) { - const { memoize } = opts - const memoized = memo.get.byDigest(cache, integrity, opts) - if (memoized && memoize !== false) { - const stream = new Minipass() - stream.end(memoized) - return stream - } else { - const stream = read.readStream(cache, integrity, opts) - if (!memoize) { - return stream - } - - const memoStream = new Collect.PassThrough() - memoStream.on('collect', data => memo.put.byDigest( - cache, - integrity, - data, - opts - )) - return new Pipeline(stream, memoStream) - } -} - -module.exports.stream.byDigest = getStreamDigest - -function info (cache, key, opts = {}) { - const { memoize } = opts - const memoized = memo.get(cache, key, opts) - if (memoized && memoize !== false) { - return Promise.resolve(memoized.entry) - } else { - return index.find(cache, key) - } -} -module.exports.info = info - -async function copy (cache, key, dest, opts = {}) { - const entry = await index.find(cache, key, opts) - if (!entry) { - throw new index.NotFoundError(cache, key) - } - await read.copy(cache, entry.integrity, dest, opts) - return { - metadata: entry.metadata, - size: entry.size, - integrity: entry.integrity, - } -} - -module.exports.copy = copy - -async function copyByDigest (cache, key, dest, opts = {}) { - await read.copy(cache, key, dest, opts) - return key -} - -module.exports.copy.byDigest = copyByDigest - -module.exports.hasContent = read.hasContent diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/index.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/index.js deleted file mode 100644 index c9b0da5f3a271b..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/index.js +++ /dev/null @@ -1,42 +0,0 @@ -'use strict' - -const get = require('./get.js') -const put = require('./put.js') -const rm = require('./rm.js') -const verify = require('./verify.js') -const { clearMemoized } = require('./memoization.js') -const tmp = require('./util/tmp.js') -const index = require('./entry-index.js') - -module.exports.index = {} -module.exports.index.compact = index.compact -module.exports.index.insert = index.insert - -module.exports.ls = index.ls -module.exports.ls.stream = index.lsStream - -module.exports.get = get -module.exports.get.byDigest = get.byDigest -module.exports.get.stream = get.stream -module.exports.get.stream.byDigest = get.stream.byDigest -module.exports.get.copy = get.copy -module.exports.get.copy.byDigest = get.copy.byDigest -module.exports.get.info = get.info -module.exports.get.hasContent = get.hasContent - -module.exports.put = put -module.exports.put.stream = put.stream - -module.exports.rm = rm.entry -module.exports.rm.all = rm.all -module.exports.rm.entry = module.exports.rm -module.exports.rm.content = rm.content - -module.exports.clearMemoized = clearMemoized - -module.exports.tmp = {} -module.exports.tmp.mkdir = tmp.mkdir -module.exports.tmp.withTmp = tmp.withTmp - -module.exports.verify = verify -module.exports.verify.lastRun = verify.lastRun diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/memoization.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/memoization.js deleted file mode 100644 index 2ecc60912e4563..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/memoization.js +++ /dev/null @@ -1,72 +0,0 @@ -'use strict' - -const { LRUCache } = require('lru-cache') - -const MEMOIZED = new LRUCache({ - max: 500, - maxSize: 50 * 1024 * 1024, // 50MB - ttl: 3 * 60 * 1000, // 3 minutes - sizeCalculation: (entry, key) => key.startsWith('key:') ? entry.data.length : entry.length, -}) - -module.exports.clearMemoized = clearMemoized - -function clearMemoized () { - const old = {} - MEMOIZED.forEach((v, k) => { - old[k] = v - }) - MEMOIZED.clear() - return old -} - -module.exports.put = put - -function put (cache, entry, data, opts) { - pickMem(opts).set(`key:${cache}:${entry.key}`, { entry, data }) - putDigest(cache, entry.integrity, data, opts) -} - -module.exports.put.byDigest = putDigest - -function putDigest (cache, integrity, data, opts) { - pickMem(opts).set(`digest:${cache}:${integrity}`, data) -} - -module.exports.get = get - -function get (cache, key, opts) { - return pickMem(opts).get(`key:${cache}:${key}`) -} - -module.exports.get.byDigest = getDigest - -function getDigest (cache, integrity, opts) { - return pickMem(opts).get(`digest:${cache}:${integrity}`) -} - -class ObjProxy { - constructor (obj) { - this.obj = obj - } - - get (key) { - return this.obj[key] - } - - set (key, val) { - this.obj[key] = val - } -} - -function pickMem (opts) { - if (!opts || !opts.memoize) { - return MEMOIZED - } else if (opts.memoize.get && opts.memoize.set) { - return opts.memoize - } else if (typeof opts.memoize === 'object') { - return new ObjProxy(opts.memoize) - } else { - return MEMOIZED - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/put.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/put.js deleted file mode 100644 index 9fc932d5f6dec5..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/put.js +++ /dev/null @@ -1,80 +0,0 @@ -'use strict' - -const index = require('./entry-index') -const memo = require('./memoization') -const write = require('./content/write') -const Flush = require('minipass-flush') -const { PassThrough } = require('minipass-collect') -const Pipeline = require('minipass-pipeline') - -const putOpts = (opts) => ({ - algorithms: ['sha512'], - ...opts, -}) - -module.exports = putData - -async function putData (cache, key, data, opts = {}) { - const { memoize } = opts - opts = putOpts(opts) - const res = await write(cache, data, opts) - const entry = await index.insert(cache, key, res.integrity, { ...opts, size: res.size }) - if (memoize) { - memo.put(cache, entry, data, opts) - } - - return res.integrity -} - -module.exports.stream = putStream - -function putStream (cache, key, opts = {}) { - const { memoize } = opts - opts = putOpts(opts) - let integrity - let size - let error - - let memoData - const pipeline = new Pipeline() - // first item in the pipeline is the memoizer, because we need - // that to end first and get the collected data. - if (memoize) { - const memoizer = new PassThrough().on('collect', data => { - memoData = data - }) - pipeline.push(memoizer) - } - - // contentStream is a write-only, not a passthrough - // no data comes out of it. - const contentStream = write.stream(cache, opts) - .on('integrity', (int) => { - integrity = int - }) - .on('size', (s) => { - size = s - }) - .on('error', (err) => { - error = err - }) - - pipeline.push(contentStream) - - // last but not least, we write the index and emit hash and size, - // and memoize if we're doing that - pipeline.push(new Flush({ - async flush () { - if (!error) { - const entry = await index.insert(cache, key, integrity, { ...opts, size }) - if (memoize && memoData) { - memo.put(cache, entry, memoData, opts) - } - pipeline.emit('integrity', integrity) - pipeline.emit('size', size) - } - }, - })) - - return pipeline -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/rm.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/rm.js deleted file mode 100644 index a94760c7cf2430..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/rm.js +++ /dev/null @@ -1,31 +0,0 @@ -'use strict' - -const { rm } = require('fs/promises') -const glob = require('./util/glob.js') -const index = require('./entry-index') -const memo = require('./memoization') -const path = require('path') -const rmContent = require('./content/rm') - -module.exports = entry -module.exports.entry = entry - -function entry (cache, key, opts) { - memo.clearMemoized() - return index.delete(cache, key, opts) -} - -module.exports.content = content - -function content (cache, integrity) { - memo.clearMemoized() - return rmContent(cache, integrity) -} - -module.exports.all = all - -async function all (cache) { - memo.clearMemoized() - const paths = await glob(path.join(cache, '*(content-*|index-*)'), { silent: true, nosort: true }) - return Promise.all(paths.map((p) => rm(p, { recursive: true, force: true }))) -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/util/glob.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/util/glob.js deleted file mode 100644 index 8500c1c16a429f..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/util/glob.js +++ /dev/null @@ -1,7 +0,0 @@ -'use strict' - -const { glob } = require('glob') -const path = require('path') - -const globify = (pattern) => pattern.split(path.win32.sep).join(path.posix.sep) -module.exports = (path, options) => glob(globify(path), options) diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/util/hash-to-segments.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/util/hash-to-segments.js deleted file mode 100644 index 445599b5038088..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/util/hash-to-segments.js +++ /dev/null @@ -1,7 +0,0 @@ -'use strict' - -module.exports = hashToSegments - -function hashToSegments (hash) { - return [hash.slice(0, 2), hash.slice(2, 4), hash.slice(4)] -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/util/tmp.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/util/tmp.js deleted file mode 100644 index 0bf5302136ebeb..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/util/tmp.js +++ /dev/null @@ -1,26 +0,0 @@ -'use strict' - -const { withTempDir } = require('@npmcli/fs') -const fs = require('fs/promises') -const path = require('path') - -module.exports.mkdir = mktmpdir - -async function mktmpdir (cache, opts = {}) { - const { tmpPrefix } = opts - const tmpDir = path.join(cache, 'tmp') - await fs.mkdir(tmpDir, { recursive: true, owner: 'inherit' }) - // do not use path.join(), it drops the trailing / if tmpPrefix is unset - const target = `${tmpDir}${path.sep}${tmpPrefix || ''}` - return fs.mkdtemp(target, { owner: 'inherit' }) -} - -module.exports.withTmp = withTmp - -function withTmp (cache, opts, cb) { - if (!cb) { - cb = opts - opts = {} - } - return withTempDir(path.join(cache, 'tmp'), cb, opts) -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/verify.js b/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/verify.js deleted file mode 100644 index d7423da1295b68..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/lib/verify.js +++ /dev/null @@ -1,257 +0,0 @@ -'use strict' - -const { - mkdir, - readFile, - rm, - stat, - truncate, - writeFile, -} = require('fs/promises') -const pMap = require('p-map') -const contentPath = require('./content/path') -const fsm = require('fs-minipass') -const glob = require('./util/glob.js') -const index = require('./entry-index') -const path = require('path') -const ssri = require('ssri') - -const hasOwnProperty = (obj, key) => - Object.prototype.hasOwnProperty.call(obj, key) - -const verifyOpts = (opts) => ({ - concurrency: 20, - log: { silly () {} }, - ...opts, -}) - -module.exports = verify - -async function verify (cache, opts) { - opts = verifyOpts(opts) - opts.log.silly('verify', 'verifying cache at', cache) - - const steps = [ - markStartTime, - fixPerms, - garbageCollect, - rebuildIndex, - cleanTmp, - writeVerifile, - markEndTime, - ] - - const stats = {} - for (const step of steps) { - const label = step.name - const start = new Date() - const s = await step(cache, opts) - if (s) { - Object.keys(s).forEach((k) => { - stats[k] = s[k] - }) - } - const end = new Date() - if (!stats.runTime) { - stats.runTime = {} - } - stats.runTime[label] = end - start - } - stats.runTime.total = stats.endTime - stats.startTime - opts.log.silly( - 'verify', - 'verification finished for', - cache, - 'in', - `${stats.runTime.total}ms` - ) - return stats -} - -async function markStartTime () { - return { startTime: new Date() } -} - -async function markEndTime () { - return { endTime: new Date() } -} - -async function fixPerms (cache, opts) { - opts.log.silly('verify', 'fixing cache permissions') - await mkdir(cache, { recursive: true }) - return null -} - -// Implements a naive mark-and-sweep tracing garbage collector. -// -// The algorithm is basically as follows: -// 1. Read (and filter) all index entries ("pointers") -// 2. Mark each integrity value as "live" -// 3. Read entire filesystem tree in `content-vX/` dir -// 4. If content is live, verify its checksum and delete it if it fails -// 5. If content is not marked as live, rm it. -// -async function garbageCollect (cache, opts) { - opts.log.silly('verify', 'garbage collecting content') - const indexStream = index.lsStream(cache) - const liveContent = new Set() - indexStream.on('data', (entry) => { - if (opts.filter && !opts.filter(entry)) { - return - } - - // integrity is stringified, re-parse it so we can get each hash - const integrity = ssri.parse(entry.integrity) - for (const algo in integrity) { - liveContent.add(integrity[algo].toString()) - } - }) - await new Promise((resolve, reject) => { - indexStream.on('end', resolve).on('error', reject) - }) - const contentDir = contentPath.contentDir(cache) - const files = await glob(path.join(contentDir, '**'), { - follow: false, - nodir: true, - nosort: true, - }) - const stats = { - verifiedContent: 0, - reclaimedCount: 0, - reclaimedSize: 0, - badContentCount: 0, - keptSize: 0, - } - await pMap( - files, - async (f) => { - const split = f.split(/[/\\]/) - const digest = split.slice(split.length - 3).join('') - const algo = split[split.length - 4] - const integrity = ssri.fromHex(digest, algo) - if (liveContent.has(integrity.toString())) { - const info = await verifyContent(f, integrity) - if (!info.valid) { - stats.reclaimedCount++ - stats.badContentCount++ - stats.reclaimedSize += info.size - } else { - stats.verifiedContent++ - stats.keptSize += info.size - } - } else { - // No entries refer to this content. We can delete. - stats.reclaimedCount++ - const s = await stat(f) - await rm(f, { recursive: true, force: true }) - stats.reclaimedSize += s.size - } - return stats - }, - { concurrency: opts.concurrency } - ) - return stats -} - -async function verifyContent (filepath, sri) { - const contentInfo = {} - try { - const { size } = await stat(filepath) - contentInfo.size = size - contentInfo.valid = true - await ssri.checkStream(new fsm.ReadStream(filepath), sri) - } catch (err) { - if (err.code === 'ENOENT') { - return { size: 0, valid: false } - } - if (err.code !== 'EINTEGRITY') { - throw err - } - - await rm(filepath, { recursive: true, force: true }) - contentInfo.valid = false - } - return contentInfo -} - -async function rebuildIndex (cache, opts) { - opts.log.silly('verify', 'rebuilding index') - const entries = await index.ls(cache) - const stats = { - missingContent: 0, - rejectedEntries: 0, - totalEntries: 0, - } - const buckets = {} - for (const k in entries) { - /* istanbul ignore else */ - if (hasOwnProperty(entries, k)) { - const hashed = index.hashKey(k) - const entry = entries[k] - const excluded = opts.filter && !opts.filter(entry) - excluded && stats.rejectedEntries++ - if (buckets[hashed] && !excluded) { - buckets[hashed].push(entry) - } else if (buckets[hashed] && excluded) { - // skip - } else if (excluded) { - buckets[hashed] = [] - buckets[hashed]._path = index.bucketPath(cache, k) - } else { - buckets[hashed] = [entry] - buckets[hashed]._path = index.bucketPath(cache, k) - } - } - } - await pMap( - Object.keys(buckets), - (key) => { - return rebuildBucket(cache, buckets[key], stats, opts) - }, - { concurrency: opts.concurrency } - ) - return stats -} - -async function rebuildBucket (cache, bucket, stats) { - await truncate(bucket._path) - // This needs to be serialized because cacache explicitly - // lets very racy bucket conflicts clobber each other. - for (const entry of bucket) { - const content = contentPath(cache, entry.integrity) - try { - await stat(content) - await index.insert(cache, entry.key, entry.integrity, { - metadata: entry.metadata, - size: entry.size, - time: entry.time, - }) - stats.totalEntries++ - } catch (err) { - if (err.code === 'ENOENT') { - stats.rejectedEntries++ - stats.missingContent++ - } else { - throw err - } - } - } -} - -function cleanTmp (cache, opts) { - opts.log.silly('verify', 'cleaning tmp directory') - return rm(path.join(cache, 'tmp'), { recursive: true, force: true }) -} - -async function writeVerifile (cache, opts) { - const verifile = path.join(cache, '_lastverified') - opts.log.silly('verify', 'writing verifile to ' + verifile) - return writeFile(verifile, `${Date.now()}`) -} - -module.exports.lastRun = lastRun - -async function lastRun (cache) { - const data = await readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' }) - return new Date(+data) -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/cacache/package.json b/deps/npm/node_modules/tuf-js/node_modules/cacache/package.json deleted file mode 100644 index 6e6219158ed759..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/cacache/package.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "name": "cacache", - "version": "18.0.4", - "cache-version": { - "content": "2", - "index": "5" - }, - "description": "Fast, fault-tolerant, cross-platform, disk-based, data-agnostic, content-addressable cache.", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "test": "tap", - "snap": "tap", - "coverage": "tap", - "test-docker": "docker run -it --rm --name pacotest -v \"$PWD\":/tmp -w /tmp node:latest npm test", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "npmclilint": "npmcli-lint", - "lintfix": "npm run lint -- --fix", - "postsnap": "npm run lintfix --", - "postlint": "template-oss-check", - "posttest": "npm run lint", - "template-oss-apply": "template-oss-apply --force" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/cacache.git" - }, - "keywords": [ - "cache", - "caching", - "content-addressable", - "sri", - "sri hash", - "subresource integrity", - "cache", - "storage", - "store", - "file store", - "filesystem", - "disk cache", - "disk storage" - ], - "license": "ISC", - "dependencies": { - "@npmcli/fs": "^3.1.0", - "fs-minipass": "^3.0.0", - "glob": "^10.2.2", - "lru-cache": "^10.0.1", - "minipass": "^7.0.3", - "minipass-collect": "^2.0.1", - "minipass-flush": "^1.0.5", - "minipass-pipeline": "^1.2.4", - "p-map": "^4.0.0", - "ssri": "^10.0.0", - "tar": "^6.1.11", - "unique-filename": "^3.0.0" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.0.0" - }, - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "windowsCI": false, - "version": "4.22.0", - "publish": "true" - }, - "author": "GitHub Inc.", - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/LICENSE b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/LICENSE deleted file mode 100644 index 1808eb2844231c..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/LICENSE +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright 2017-2022 (c) npm, Inc. - -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted, provided that the -above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE COPYRIGHT HOLDER DISCLAIMS -ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/entry.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/entry.js deleted file mode 100644 index bfcfacbcc95e18..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/entry.js +++ /dev/null @@ -1,471 +0,0 @@ -const { Request, Response } = require('minipass-fetch') -const { Minipass } = require('minipass') -const MinipassFlush = require('minipass-flush') -const cacache = require('cacache') -const url = require('url') - -const CachingMinipassPipeline = require('../pipeline.js') -const CachePolicy = require('./policy.js') -const cacheKey = require('./key.js') -const remote = require('../remote.js') - -const hasOwnProperty = (obj, prop) => Object.prototype.hasOwnProperty.call(obj, prop) - -// allow list for request headers that will be written to the cache index -// note: we will also store any request headers -// that are named in a response's vary header -const KEEP_REQUEST_HEADERS = [ - 'accept-charset', - 'accept-encoding', - 'accept-language', - 'accept', - 'cache-control', -] - -// allow list for response headers that will be written to the cache index -// note: we must not store the real response's age header, or when we load -// a cache policy based on the metadata it will think the cached response -// is always stale -const KEEP_RESPONSE_HEADERS = [ - 'cache-control', - 'content-encoding', - 'content-language', - 'content-type', - 'date', - 'etag', - 'expires', - 'last-modified', - 'link', - 'location', - 'pragma', - 'vary', -] - -// return an object containing all metadata to be written to the index -const getMetadata = (request, response, options) => { - const metadata = { - time: Date.now(), - url: request.url, - reqHeaders: {}, - resHeaders: {}, - - // options on which we must match the request and vary the response - options: { - compress: options.compress != null ? options.compress : request.compress, - }, - } - - // only save the status if it's not a 200 or 304 - if (response.status !== 200 && response.status !== 304) { - metadata.status = response.status - } - - for (const name of KEEP_REQUEST_HEADERS) { - if (request.headers.has(name)) { - metadata.reqHeaders[name] = request.headers.get(name) - } - } - - // if the request's host header differs from the host in the url - // we need to keep it, otherwise it's just noise and we ignore it - const host = request.headers.get('host') - const parsedUrl = new url.URL(request.url) - if (host && parsedUrl.host !== host) { - metadata.reqHeaders.host = host - } - - // if the response has a vary header, make sure - // we store the relevant request headers too - if (response.headers.has('vary')) { - const vary = response.headers.get('vary') - // a vary of "*" means every header causes a different response. - // in that scenario, we do not include any additional headers - // as the freshness check will always fail anyway and we don't - // want to bloat the cache indexes - if (vary !== '*') { - // copy any other request headers that will vary the response - const varyHeaders = vary.trim().toLowerCase().split(/\s*,\s*/) - for (const name of varyHeaders) { - if (request.headers.has(name)) { - metadata.reqHeaders[name] = request.headers.get(name) - } - } - } - } - - for (const name of KEEP_RESPONSE_HEADERS) { - if (response.headers.has(name)) { - metadata.resHeaders[name] = response.headers.get(name) - } - } - - for (const name of options.cacheAdditionalHeaders) { - if (response.headers.has(name)) { - metadata.resHeaders[name] = response.headers.get(name) - } - } - - return metadata -} - -// symbols used to hide objects that may be lazily evaluated in a getter -const _request = Symbol('request') -const _response = Symbol('response') -const _policy = Symbol('policy') - -class CacheEntry { - constructor ({ entry, request, response, options }) { - if (entry) { - this.key = entry.key - this.entry = entry - // previous versions of this module didn't write an explicit timestamp in - // the metadata, so fall back to the entry's timestamp. we can't use the - // entry timestamp to determine staleness because cacache will update it - // when it verifies its data - this.entry.metadata.time = this.entry.metadata.time || this.entry.time - } else { - this.key = cacheKey(request) - } - - this.options = options - - // these properties are behind getters that lazily evaluate - this[_request] = request - this[_response] = response - this[_policy] = null - } - - // returns a CacheEntry instance that satisfies the given request - // or undefined if no existing entry satisfies - static async find (request, options) { - try { - // compacts the index and returns an array of unique entries - var matches = await cacache.index.compact(options.cachePath, cacheKey(request), (A, B) => { - const entryA = new CacheEntry({ entry: A, options }) - const entryB = new CacheEntry({ entry: B, options }) - return entryA.policy.satisfies(entryB.request) - }, { - validateEntry: (entry) => { - // clean out entries with a buggy content-encoding value - if (entry.metadata && - entry.metadata.resHeaders && - entry.metadata.resHeaders['content-encoding'] === null) { - return false - } - - // if an integrity is null, it needs to have a status specified - if (entry.integrity === null) { - return !!(entry.metadata && entry.metadata.status) - } - - return true - }, - }) - } catch (err) { - // if the compact request fails, ignore the error and return - return - } - - // a cache mode of 'reload' means to behave as though we have no cache - // on the way to the network. return undefined to allow cacheFetch to - // create a brand new request no matter what. - if (options.cache === 'reload') { - return - } - - // find the specific entry that satisfies the request - let match - for (const entry of matches) { - const _entry = new CacheEntry({ - entry, - options, - }) - - if (_entry.policy.satisfies(request)) { - match = _entry - break - } - } - - return match - } - - // if the user made a PUT/POST/PATCH then we invalidate our - // cache for the same url by deleting the index entirely - static async invalidate (request, options) { - const key = cacheKey(request) - try { - await cacache.rm.entry(options.cachePath, key, { removeFully: true }) - } catch (err) { - // ignore errors - } - } - - get request () { - if (!this[_request]) { - this[_request] = new Request(this.entry.metadata.url, { - method: 'GET', - headers: this.entry.metadata.reqHeaders, - ...this.entry.metadata.options, - }) - } - - return this[_request] - } - - get response () { - if (!this[_response]) { - this[_response] = new Response(null, { - url: this.entry.metadata.url, - counter: this.options.counter, - status: this.entry.metadata.status || 200, - headers: { - ...this.entry.metadata.resHeaders, - 'content-length': this.entry.size, - }, - }) - } - - return this[_response] - } - - get policy () { - if (!this[_policy]) { - this[_policy] = new CachePolicy({ - entry: this.entry, - request: this.request, - response: this.response, - options: this.options, - }) - } - - return this[_policy] - } - - // wraps the response in a pipeline that stores the data - // in the cache while the user consumes it - async store (status) { - // if we got a status other than 200, 301, or 308, - // or the CachePolicy forbid storage, append the - // cache status header and return it untouched - if ( - this.request.method !== 'GET' || - ![200, 301, 308].includes(this.response.status) || - !this.policy.storable() - ) { - this.response.headers.set('x-local-cache-status', 'skip') - return this.response - } - - const size = this.response.headers.get('content-length') - const cacheOpts = { - algorithms: this.options.algorithms, - metadata: getMetadata(this.request, this.response, this.options), - size, - integrity: this.options.integrity, - integrityEmitter: this.response.body.hasIntegrityEmitter && this.response.body, - } - - let body = null - // we only set a body if the status is a 200, redirects are - // stored as metadata only - if (this.response.status === 200) { - let cacheWriteResolve, cacheWriteReject - const cacheWritePromise = new Promise((resolve, reject) => { - cacheWriteResolve = resolve - cacheWriteReject = reject - }).catch((err) => { - body.emit('error', err) - }) - - body = new CachingMinipassPipeline({ events: ['integrity', 'size'] }, new MinipassFlush({ - flush () { - return cacheWritePromise - }, - })) - // this is always true since if we aren't reusing the one from the remote fetch, we - // are using the one from cacache - body.hasIntegrityEmitter = true - - const onResume = () => { - const tee = new Minipass() - const cacheStream = cacache.put.stream(this.options.cachePath, this.key, cacheOpts) - // re-emit the integrity and size events on our new response body so they can be reused - cacheStream.on('integrity', i => body.emit('integrity', i)) - cacheStream.on('size', s => body.emit('size', s)) - // stick a flag on here so downstream users will know if they can expect integrity events - tee.pipe(cacheStream) - // TODO if the cache write fails, log a warning but return the response anyway - // eslint-disable-next-line promise/catch-or-return - cacheStream.promise().then(cacheWriteResolve, cacheWriteReject) - body.unshift(tee) - body.unshift(this.response.body) - } - - body.once('resume', onResume) - body.once('end', () => body.removeListener('resume', onResume)) - } else { - await cacache.index.insert(this.options.cachePath, this.key, null, cacheOpts) - } - - // note: we do not set the x-local-cache-hash header because we do not know - // the hash value until after the write to the cache completes, which doesn't - // happen until after the response has been sent and it's too late to write - // the header anyway - this.response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath)) - this.response.headers.set('x-local-cache-key', encodeURIComponent(this.key)) - this.response.headers.set('x-local-cache-mode', 'stream') - this.response.headers.set('x-local-cache-status', status) - this.response.headers.set('x-local-cache-time', new Date().toISOString()) - const newResponse = new Response(body, { - url: this.response.url, - status: this.response.status, - headers: this.response.headers, - counter: this.options.counter, - }) - return newResponse - } - - // use the cached data to create a response and return it - async respond (method, options, status) { - let response - if (method === 'HEAD' || [301, 308].includes(this.response.status)) { - // if the request is a HEAD, or the response is a redirect, - // then the metadata in the entry already includes everything - // we need to build a response - response = this.response - } else { - // we're responding with a full cached response, so create a body - // that reads from cacache and attach it to a new Response - const body = new Minipass() - const headers = { ...this.policy.responseHeaders() } - - const onResume = () => { - const cacheStream = cacache.get.stream.byDigest( - this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize } - ) - cacheStream.on('error', async (err) => { - cacheStream.pause() - if (err.code === 'EINTEGRITY') { - await cacache.rm.content( - this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize } - ) - } - if (err.code === 'ENOENT' || err.code === 'EINTEGRITY') { - await CacheEntry.invalidate(this.request, this.options) - } - body.emit('error', err) - cacheStream.resume() - }) - // emit the integrity and size events based on our metadata so we're consistent - body.emit('integrity', this.entry.integrity) - body.emit('size', Number(headers['content-length'])) - cacheStream.pipe(body) - } - - body.once('resume', onResume) - body.once('end', () => body.removeListener('resume', onResume)) - response = new Response(body, { - url: this.entry.metadata.url, - counter: options.counter, - status: 200, - headers, - }) - } - - response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath)) - response.headers.set('x-local-cache-hash', encodeURIComponent(this.entry.integrity)) - response.headers.set('x-local-cache-key', encodeURIComponent(this.key)) - response.headers.set('x-local-cache-mode', 'stream') - response.headers.set('x-local-cache-status', status) - response.headers.set('x-local-cache-time', new Date(this.entry.metadata.time).toUTCString()) - return response - } - - // use the provided request along with this cache entry to - // revalidate the stored response. returns a response, either - // from the cache or from the update - async revalidate (request, options) { - const revalidateRequest = new Request(request, { - headers: this.policy.revalidationHeaders(request), - }) - - try { - // NOTE: be sure to remove the headers property from the - // user supplied options, since we have already defined - // them on the new request object. if they're still in the - // options then those will overwrite the ones from the policy - var response = await remote(revalidateRequest, { - ...options, - headers: undefined, - }) - } catch (err) { - // if the network fetch fails, return the stale - // cached response unless it has a cache-control - // of 'must-revalidate' - if (!this.policy.mustRevalidate) { - return this.respond(request.method, options, 'stale') - } - - throw err - } - - if (this.policy.revalidated(revalidateRequest, response)) { - // we got a 304, write a new index to the cache and respond from cache - const metadata = getMetadata(request, response, options) - // 304 responses do not include headers that are specific to the response data - // since they do not include a body, so we copy values for headers that were - // in the old cache entry to the new one, if the new metadata does not already - // include that header - for (const name of KEEP_RESPONSE_HEADERS) { - if ( - !hasOwnProperty(metadata.resHeaders, name) && - hasOwnProperty(this.entry.metadata.resHeaders, name) - ) { - metadata.resHeaders[name] = this.entry.metadata.resHeaders[name] - } - } - - for (const name of options.cacheAdditionalHeaders) { - const inMeta = hasOwnProperty(metadata.resHeaders, name) - const inEntry = hasOwnProperty(this.entry.metadata.resHeaders, name) - const inPolicy = hasOwnProperty(this.policy.response.headers, name) - - // if the header is in the existing entry, but it is not in the metadata - // then we need to write it to the metadata as this will refresh the on-disk cache - if (!inMeta && inEntry) { - metadata.resHeaders[name] = this.entry.metadata.resHeaders[name] - } - // if the header is in the metadata, but not in the policy, then we need to set - // it in the policy so that it's included in the immediate response. future - // responses will load a new cache entry, so we don't need to change that - if (!inPolicy && inMeta) { - this.policy.response.headers[name] = metadata.resHeaders[name] - } - } - - try { - await cacache.index.insert(options.cachePath, this.key, this.entry.integrity, { - size: this.entry.size, - metadata, - }) - } catch (err) { - // if updating the cache index fails, we ignore it and - // respond anyway - } - return this.respond(request.method, options, 'revalidated') - } - - // if we got a modified response, create a new entry based on it - const newEntry = new CacheEntry({ - request, - response, - options, - }) - - // respond with the new entry while writing it to the cache - return newEntry.store('updated') - } -} - -module.exports = CacheEntry diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/errors.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/errors.js deleted file mode 100644 index 67a66573bebe66..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/errors.js +++ /dev/null @@ -1,11 +0,0 @@ -class NotCachedError extends Error { - constructor (url) { - /* eslint-disable-next-line max-len */ - super(`request to ${url} failed: cache mode is 'only-if-cached' but no cached response is available.`) - this.code = 'ENOTCACHED' - } -} - -module.exports = { - NotCachedError, -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/index.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/index.js deleted file mode 100644 index 0de49d23fb9336..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/index.js +++ /dev/null @@ -1,49 +0,0 @@ -const { NotCachedError } = require('./errors.js') -const CacheEntry = require('./entry.js') -const remote = require('../remote.js') - -// do whatever is necessary to get a Response and return it -const cacheFetch = async (request, options) => { - // try to find a cached entry that satisfies this request - const entry = await CacheEntry.find(request, options) - if (!entry) { - // no cached result, if the cache mode is 'only-if-cached' that's a failure - if (options.cache === 'only-if-cached') { - throw new NotCachedError(request.url) - } - - // otherwise, we make a request, store it and return it - const response = await remote(request, options) - const newEntry = new CacheEntry({ request, response, options }) - return newEntry.store('miss') - } - - // we have a cached response that satisfies this request, however if the cache - // mode is 'no-cache' then we send the revalidation request no matter what - if (options.cache === 'no-cache') { - return entry.revalidate(request, options) - } - - // if the cached entry is not stale, or if the cache mode is 'force-cache' or - // 'only-if-cached' we can respond with the cached entry. set the status - // based on the result of needsRevalidation and respond - const _needsRevalidation = entry.policy.needsRevalidation(request) - if (options.cache === 'force-cache' || - options.cache === 'only-if-cached' || - !_needsRevalidation) { - return entry.respond(request.method, options, _needsRevalidation ? 'stale' : 'hit') - } - - // if we got here, the cache entry is stale so revalidate it - return entry.revalidate(request, options) -} - -cacheFetch.invalidate = async (request, options) => { - if (!options.cachePath) { - return - } - - return CacheEntry.invalidate(request, options) -} - -module.exports = cacheFetch diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/key.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/key.js deleted file mode 100644 index f7684d562b7fae..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/key.js +++ /dev/null @@ -1,17 +0,0 @@ -const { URL, format } = require('url') - -// options passed to url.format() when generating a key -const formatOptions = { - auth: false, - fragment: false, - search: true, - unicode: false, -} - -// returns a string to be used as the cache key for the Request -const cacheKey = (request) => { - const parsed = new URL(request.url) - return `make-fetch-happen:request-cache:${format(parsed, formatOptions)}` -} - -module.exports = cacheKey diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/policy.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/policy.js deleted file mode 100644 index ada3c8600dae92..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/cache/policy.js +++ /dev/null @@ -1,161 +0,0 @@ -const CacheSemantics = require('http-cache-semantics') -const Negotiator = require('negotiator') -const ssri = require('ssri') - -// options passed to http-cache-semantics constructor -const policyOptions = { - shared: false, - ignoreCargoCult: true, -} - -// a fake empty response, used when only testing the -// request for storability -const emptyResponse = { status: 200, headers: {} } - -// returns a plain object representation of the Request -const requestObject = (request) => { - const _obj = { - method: request.method, - url: request.url, - headers: {}, - compress: request.compress, - } - - request.headers.forEach((value, key) => { - _obj.headers[key] = value - }) - - return _obj -} - -// returns a plain object representation of the Response -const responseObject = (response) => { - const _obj = { - status: response.status, - headers: {}, - } - - response.headers.forEach((value, key) => { - _obj.headers[key] = value - }) - - return _obj -} - -class CachePolicy { - constructor ({ entry, request, response, options }) { - this.entry = entry - this.request = requestObject(request) - this.response = responseObject(response) - this.options = options - this.policy = new CacheSemantics(this.request, this.response, policyOptions) - - if (this.entry) { - // if we have an entry, copy the timestamp to the _responseTime - // this is necessary because the CacheSemantics constructor forces - // the value to Date.now() which means a policy created from a - // cache entry is likely to always identify itself as stale - this.policy._responseTime = this.entry.metadata.time - } - } - - // static method to quickly determine if a request alone is storable - static storable (request, options) { - // no cachePath means no caching - if (!options.cachePath) { - return false - } - - // user explicitly asked not to cache - if (options.cache === 'no-store') { - return false - } - - // we only cache GET and HEAD requests - if (!['GET', 'HEAD'].includes(request.method)) { - return false - } - - // otherwise, let http-cache-semantics make the decision - // based on the request's headers - const policy = new CacheSemantics(requestObject(request), emptyResponse, policyOptions) - return policy.storable() - } - - // returns true if the policy satisfies the request - satisfies (request) { - const _req = requestObject(request) - if (this.request.headers.host !== _req.headers.host) { - return false - } - - if (this.request.compress !== _req.compress) { - return false - } - - const negotiatorA = new Negotiator(this.request) - const negotiatorB = new Negotiator(_req) - - if (JSON.stringify(negotiatorA.mediaTypes()) !== JSON.stringify(negotiatorB.mediaTypes())) { - return false - } - - if (JSON.stringify(negotiatorA.languages()) !== JSON.stringify(negotiatorB.languages())) { - return false - } - - if (JSON.stringify(negotiatorA.encodings()) !== JSON.stringify(negotiatorB.encodings())) { - return false - } - - if (this.options.integrity) { - return ssri.parse(this.options.integrity).match(this.entry.integrity) - } - - return true - } - - // returns true if the request and response allow caching - storable () { - return this.policy.storable() - } - - // NOTE: this is a hack to avoid parsing the cache-control - // header ourselves, it returns true if the response's - // cache-control contains must-revalidate - get mustRevalidate () { - return !!this.policy._rescc['must-revalidate'] - } - - // returns true if the cached response requires revalidation - // for the given request - needsRevalidation (request) { - const _req = requestObject(request) - // force method to GET because we only cache GETs - // but can serve a HEAD from a cached GET - _req.method = 'GET' - return !this.policy.satisfiesWithoutRevalidation(_req) - } - - responseHeaders () { - return this.policy.responseHeaders() - } - - // returns a new object containing the appropriate headers - // to send a revalidation request - revalidationHeaders (request) { - const _req = requestObject(request) - return this.policy.revalidationHeaders(_req) - } - - // returns true if the request/response was revalidated - // successfully. returns false if a new response was received - revalidated (request, response) { - const _req = requestObject(request) - const _res = responseObject(response) - const policy = this.policy.revalidatedPolicy(_req, _res) - return !policy.modified - } -} - -module.exports = CachePolicy diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/fetch.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/fetch.js deleted file mode 100644 index 233ba67e165502..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/fetch.js +++ /dev/null @@ -1,118 +0,0 @@ -'use strict' - -const { FetchError, Request, isRedirect } = require('minipass-fetch') -const url = require('url') - -const CachePolicy = require('./cache/policy.js') -const cache = require('./cache/index.js') -const remote = require('./remote.js') - -// given a Request, a Response and user options -// return true if the response is a redirect that -// can be followed. we throw errors that will result -// in the fetch being rejected if the redirect is -// possible but invalid for some reason -const canFollowRedirect = (request, response, options) => { - if (!isRedirect(response.status)) { - return false - } - - if (options.redirect === 'manual') { - return false - } - - if (options.redirect === 'error') { - throw new FetchError(`redirect mode is set to error: ${request.url}`, - 'no-redirect', { code: 'ENOREDIRECT' }) - } - - if (!response.headers.has('location')) { - throw new FetchError(`redirect location header missing for: ${request.url}`, - 'no-location', { code: 'EINVALIDREDIRECT' }) - } - - if (request.counter >= request.follow) { - throw new FetchError(`maximum redirect reached at: ${request.url}`, - 'max-redirect', { code: 'EMAXREDIRECT' }) - } - - return true -} - -// given a Request, a Response, and the user's options return an object -// with a new Request and a new options object that will be used for -// following the redirect -const getRedirect = (request, response, options) => { - const _opts = { ...options } - const location = response.headers.get('location') - const redirectUrl = new url.URL(location, /^https?:/.test(location) ? undefined : request.url) - // Comment below is used under the following license: - /** - * @license - * Copyright (c) 2010-2012 Mikeal Rogers - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language - * governing permissions and limitations under the License. - */ - - // Remove authorization if changing hostnames (but not if just - // changing ports or protocols). This matches the behavior of request: - // https://github.com/request/request/blob/b12a6245/lib/redirect.js#L134-L138 - if (new url.URL(request.url).hostname !== redirectUrl.hostname) { - request.headers.delete('authorization') - request.headers.delete('cookie') - } - - // for POST request with 301/302 response, or any request with 303 response, - // use GET when following redirect - if ( - response.status === 303 || - (request.method === 'POST' && [301, 302].includes(response.status)) - ) { - _opts.method = 'GET' - _opts.body = null - request.headers.delete('content-length') - } - - _opts.headers = {} - request.headers.forEach((value, key) => { - _opts.headers[key] = value - }) - - _opts.counter = ++request.counter - const redirectReq = new Request(url.format(redirectUrl), _opts) - return { - request: redirectReq, - options: _opts, - } -} - -const fetch = async (request, options) => { - const response = CachePolicy.storable(request, options) - ? await cache(request, options) - : await remote(request, options) - - // if the request wasn't a GET or HEAD, and the response - // status is between 200 and 399 inclusive, invalidate the - // request url - if (!['GET', 'HEAD'].includes(request.method) && - response.status >= 200 && - response.status <= 399) { - await cache.invalidate(request, options) - } - - if (!canFollowRedirect(request, response, options)) { - return response - } - - const redirect = getRedirect(request, response, options) - return fetch(redirect.request, redirect.options) -} - -module.exports = fetch diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/index.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/index.js deleted file mode 100644 index 2f12e8e1b61131..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/index.js +++ /dev/null @@ -1,41 +0,0 @@ -const { FetchError, Headers, Request, Response } = require('minipass-fetch') - -const configureOptions = require('./options.js') -const fetch = require('./fetch.js') - -const makeFetchHappen = (url, opts) => { - const options = configureOptions(opts) - - const request = new Request(url, options) - return fetch(request, options) -} - -makeFetchHappen.defaults = (defaultUrl, defaultOptions = {}, wrappedFetch = makeFetchHappen) => { - if (typeof defaultUrl === 'object') { - defaultOptions = defaultUrl - defaultUrl = null - } - - const defaultedFetch = (url, options = {}) => { - const finalUrl = url || defaultUrl - const finalOptions = { - ...defaultOptions, - ...options, - headers: { - ...defaultOptions.headers, - ...options.headers, - }, - } - return wrappedFetch(finalUrl, finalOptions) - } - - defaultedFetch.defaults = (defaultUrl1, defaultOptions1 = {}) => - makeFetchHappen.defaults(defaultUrl1, defaultOptions1, defaultedFetch) - return defaultedFetch -} - -module.exports = makeFetchHappen -module.exports.FetchError = FetchError -module.exports.Headers = Headers -module.exports.Request = Request -module.exports.Response = Response diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/options.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/options.js deleted file mode 100644 index f77511279f831d..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/options.js +++ /dev/null @@ -1,54 +0,0 @@ -const dns = require('dns') - -const conditionalHeaders = [ - 'if-modified-since', - 'if-none-match', - 'if-unmodified-since', - 'if-match', - 'if-range', -] - -const configureOptions = (opts) => { - const { strictSSL, ...options } = { ...opts } - options.method = options.method ? options.method.toUpperCase() : 'GET' - options.rejectUnauthorized = strictSSL !== false - - if (!options.retry) { - options.retry = { retries: 0 } - } else if (typeof options.retry === 'string') { - const retries = parseInt(options.retry, 10) - if (isFinite(retries)) { - options.retry = { retries } - } else { - options.retry = { retries: 0 } - } - } else if (typeof options.retry === 'number') { - options.retry = { retries: options.retry } - } else { - options.retry = { retries: 0, ...options.retry } - } - - options.dns = { ttl: 5 * 60 * 1000, lookup: dns.lookup, ...options.dns } - - options.cache = options.cache || 'default' - if (options.cache === 'default') { - const hasConditionalHeader = Object.keys(options.headers || {}).some((name) => { - return conditionalHeaders.includes(name.toLowerCase()) - }) - if (hasConditionalHeader) { - options.cache = 'no-store' - } - } - - options.cacheAdditionalHeaders = options.cacheAdditionalHeaders || [] - - // cacheManager is deprecated, but if it's set and - // cachePath is not we should copy it to the new field - if (options.cacheManager && !options.cachePath) { - options.cachePath = options.cacheManager - } - - return options -} - -module.exports = configureOptions diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/pipeline.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/pipeline.js deleted file mode 100644 index b1d221b2d0ce31..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/pipeline.js +++ /dev/null @@ -1,41 +0,0 @@ -'use strict' - -const MinipassPipeline = require('minipass-pipeline') - -class CachingMinipassPipeline extends MinipassPipeline { - #events = [] - #data = new Map() - - constructor (opts, ...streams) { - // CRITICAL: do NOT pass the streams to the call to super(), this will start - // the flow of data and potentially cause the events we need to catch to emit - // before we've finished our own setup. instead we call super() with no args, - // finish our setup, and then push the streams into ourselves to start the - // data flow - super() - this.#events = opts.events - - /* istanbul ignore next - coverage disabled because this is pointless to test here */ - if (streams.length) { - this.push(...streams) - } - } - - on (event, handler) { - if (this.#events.includes(event) && this.#data.has(event)) { - return handler(...this.#data.get(event)) - } - - return super.on(event, handler) - } - - emit (event, ...data) { - if (this.#events.includes(event)) { - this.#data.set(event, data) - } - - return super.emit(event, ...data) - } -} - -module.exports = CachingMinipassPipeline diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/remote.js b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/remote.js deleted file mode 100644 index 8554564074de6e..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/lib/remote.js +++ /dev/null @@ -1,131 +0,0 @@ -const { Minipass } = require('minipass') -const fetch = require('minipass-fetch') -const promiseRetry = require('promise-retry') -const ssri = require('ssri') -const { log } = require('proc-log') - -const CachingMinipassPipeline = require('./pipeline.js') -const { getAgent } = require('@npmcli/agent') -const pkg = require('../package.json') - -const USER_AGENT = `${pkg.name}/${pkg.version} (+https://npm.im/${pkg.name})` - -const RETRY_ERRORS = [ - 'ECONNRESET', // remote socket closed on us - 'ECONNREFUSED', // remote host refused to open connection - 'EADDRINUSE', // failed to bind to a local port (proxy?) - 'ETIMEDOUT', // someone in the transaction is WAY TOO SLOW - // from @npmcli/agent - 'ECONNECTIONTIMEOUT', - 'EIDLETIMEOUT', - 'ERESPONSETIMEOUT', - 'ETRANSFERTIMEOUT', - // Known codes we do NOT retry on: - // ENOTFOUND (getaddrinfo failure. Either bad hostname, or offline) - // EINVALIDPROXY // invalid protocol from @npmcli/agent - // EINVALIDRESPONSE // invalid status code from @npmcli/agent -] - -const RETRY_TYPES = [ - 'request-timeout', -] - -// make a request directly to the remote source, -// retrying certain classes of errors as well as -// following redirects (through the cache if necessary) -// and verifying response integrity -const remoteFetch = (request, options) => { - const agent = getAgent(request.url, options) - if (!request.headers.has('connection')) { - request.headers.set('connection', agent ? 'keep-alive' : 'close') - } - - if (!request.headers.has('user-agent')) { - request.headers.set('user-agent', USER_AGENT) - } - - // keep our own options since we're overriding the agent - // and the redirect mode - const _opts = { - ...options, - agent, - redirect: 'manual', - } - - return promiseRetry(async (retryHandler, attemptNum) => { - const req = new fetch.Request(request, _opts) - try { - let res = await fetch(req, _opts) - if (_opts.integrity && res.status === 200) { - // we got a 200 response and the user has specified an expected - // integrity value, so wrap the response in an ssri stream to verify it - const integrityStream = ssri.integrityStream({ - algorithms: _opts.algorithms, - integrity: _opts.integrity, - size: _opts.size, - }) - const pipeline = new CachingMinipassPipeline({ - events: ['integrity', 'size'], - }, res.body, integrityStream) - // we also propagate the integrity and size events out to the pipeline so we can use - // this new response body as an integrityEmitter for cacache - integrityStream.on('integrity', i => pipeline.emit('integrity', i)) - integrityStream.on('size', s => pipeline.emit('size', s)) - res = new fetch.Response(pipeline, res) - // set an explicit flag so we know if our response body will emit integrity and size - res.body.hasIntegrityEmitter = true - } - - res.headers.set('x-fetch-attempts', attemptNum) - - // do not retry POST requests, or requests with a streaming body - // do retry requests with a 408, 420, 429 or 500+ status in the response - const isStream = Minipass.isStream(req.body) - const isRetriable = req.method !== 'POST' && - !isStream && - ([408, 420, 429].includes(res.status) || res.status >= 500) - - if (isRetriable) { - if (typeof options.onRetry === 'function') { - options.onRetry(res) - } - - /* eslint-disable-next-line max-len */ - log.http('fetch', `${req.method} ${req.url} attempt ${attemptNum} failed with ${res.status}`) - return retryHandler(res) - } - - return res - } catch (err) { - const code = (err.code === 'EPROMISERETRY') - ? err.retried.code - : err.code - - // err.retried will be the thing that was thrown from above - // if it's a response, we just got a bad status code and we - // can re-throw to allow the retry - const isRetryError = err.retried instanceof fetch.Response || - (RETRY_ERRORS.includes(code) && RETRY_TYPES.includes(err.type)) - - if (req.method === 'POST' || isRetryError) { - throw err - } - - if (typeof options.onRetry === 'function') { - options.onRetry(err) - } - - log.http('fetch', `${req.method} ${req.url} attempt ${attemptNum} failed with ${err.code}`) - return retryHandler(err) - } - }, options.retry).catch((err) => { - // don't reject for http errors, just return them - if (err.status >= 400 && err.type !== 'system') { - return err - } - - throw err - }) -} - -module.exports = remoteFetch diff --git a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/package.json b/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/package.json deleted file mode 100644 index 7adb4d1e7f9719..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/make-fetch-happen/package.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "name": "make-fetch-happen", - "version": "13.0.1", - "description": "Opinionated, caching, retrying fetch client", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "test": "tap", - "posttest": "npm run lint", - "eslint": "eslint", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "lintfix": "npm run lint -- --fix", - "postlint": "template-oss-check", - "snap": "tap", - "template-oss-apply": "template-oss-apply --force" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/make-fetch-happen.git" - }, - "keywords": [ - "http", - "request", - "fetch", - "mean girls", - "caching", - "cache", - "subresource integrity" - ], - "author": "GitHub Inc.", - "license": "ISC", - "dependencies": { - "@npmcli/agent": "^2.0.0", - "cacache": "^18.0.0", - "http-cache-semantics": "^4.1.1", - "is-lambda": "^1.0.1", - "minipass": "^7.0.2", - "minipass-fetch": "^3.0.0", - "minipass-flush": "^1.0.5", - "minipass-pipeline": "^1.2.4", - "negotiator": "^0.6.3", - "proc-log": "^4.2.0", - "promise-retry": "^2.0.1", - "ssri": "^10.0.0" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.21.4", - "nock": "^13.2.4", - "safe-buffer": "^5.2.1", - "standard-version": "^9.3.2", - "tap": "^16.0.0" - }, - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "tap": { - "color": 1, - "files": "test/*.js", - "check-coverage": true, - "timeout": 60, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.21.4", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/LICENSE b/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/LICENSE deleted file mode 100644 index 3c3410cdc12ee3..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/LICENSE +++ /dev/null @@ -1,28 +0,0 @@ -The MIT License (MIT) - -Copyright (c) Isaac Z. Schlueter and Contributors -Copyright (c) 2016 David Frank - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - ---- - -Note: This is a derivative work based on "node-fetch" by David Frank, -modified and distributed under the terms of the MIT license above. -https://github.com/bitinn/node-fetch diff --git a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/abort-error.js b/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/abort-error.js deleted file mode 100644 index b18f643269e375..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/abort-error.js +++ /dev/null @@ -1,17 +0,0 @@ -'use strict' -class AbortError extends Error { - constructor (message) { - super(message) - this.code = 'FETCH_ABORTED' - this.type = 'aborted' - Error.captureStackTrace(this, this.constructor) - } - - get name () { - return 'AbortError' - } - - // don't allow name to be overridden, but don't throw either - set name (s) {} -} -module.exports = AbortError diff --git a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/blob.js b/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/blob.js deleted file mode 100644 index 121b1730102e72..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/blob.js +++ /dev/null @@ -1,97 +0,0 @@ -'use strict' -const { Minipass } = require('minipass') -const TYPE = Symbol('type') -const BUFFER = Symbol('buffer') - -class Blob { - constructor (blobParts, options) { - this[TYPE] = '' - - const buffers = [] - let size = 0 - - if (blobParts) { - const a = blobParts - const length = Number(a.length) - for (let i = 0; i < length; i++) { - const element = a[i] - const buffer = element instanceof Buffer ? element - : ArrayBuffer.isView(element) - ? Buffer.from(element.buffer, element.byteOffset, element.byteLength) - : element instanceof ArrayBuffer ? Buffer.from(element) - : element instanceof Blob ? element[BUFFER] - : typeof element === 'string' ? Buffer.from(element) - : Buffer.from(String(element)) - size += buffer.length - buffers.push(buffer) - } - } - - this[BUFFER] = Buffer.concat(buffers, size) - - const type = options && options.type !== undefined - && String(options.type).toLowerCase() - if (type && !/[^\u0020-\u007E]/.test(type)) { - this[TYPE] = type - } - } - - get size () { - return this[BUFFER].length - } - - get type () { - return this[TYPE] - } - - text () { - return Promise.resolve(this[BUFFER].toString()) - } - - arrayBuffer () { - const buf = this[BUFFER] - const off = buf.byteOffset - const len = buf.byteLength - const ab = buf.buffer.slice(off, off + len) - return Promise.resolve(ab) - } - - stream () { - return new Minipass().end(this[BUFFER]) - } - - slice (start, end, type) { - const size = this.size - const relativeStart = start === undefined ? 0 - : start < 0 ? Math.max(size + start, 0) - : Math.min(start, size) - const relativeEnd = end === undefined ? size - : end < 0 ? Math.max(size + end, 0) - : Math.min(end, size) - const span = Math.max(relativeEnd - relativeStart, 0) - - const buffer = this[BUFFER] - const slicedBuffer = buffer.slice( - relativeStart, - relativeStart + span - ) - const blob = new Blob([], { type }) - blob[BUFFER] = slicedBuffer - return blob - } - - get [Symbol.toStringTag] () { - return 'Blob' - } - - static get BUFFER () { - return BUFFER - } -} - -Object.defineProperties(Blob.prototype, { - size: { enumerable: true }, - type: { enumerable: true }, -}) - -module.exports = Blob diff --git a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/body.js b/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/body.js deleted file mode 100644 index 62286bd1de0d91..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/body.js +++ /dev/null @@ -1,350 +0,0 @@ -'use strict' -const { Minipass } = require('minipass') -const MinipassSized = require('minipass-sized') - -const Blob = require('./blob.js') -const { BUFFER } = Blob -const FetchError = require('./fetch-error.js') - -// optional dependency on 'encoding' -let convert -try { - convert = require('encoding').convert -} catch (e) { - // defer error until textConverted is called -} - -const INTERNALS = Symbol('Body internals') -const CONSUME_BODY = Symbol('consumeBody') - -class Body { - constructor (bodyArg, options = {}) { - const { size = 0, timeout = 0 } = options - const body = bodyArg === undefined || bodyArg === null ? null - : isURLSearchParams(bodyArg) ? Buffer.from(bodyArg.toString()) - : isBlob(bodyArg) ? bodyArg - : Buffer.isBuffer(bodyArg) ? bodyArg - : Object.prototype.toString.call(bodyArg) === '[object ArrayBuffer]' - ? Buffer.from(bodyArg) - : ArrayBuffer.isView(bodyArg) - ? Buffer.from(bodyArg.buffer, bodyArg.byteOffset, bodyArg.byteLength) - : Minipass.isStream(bodyArg) ? bodyArg - : Buffer.from(String(bodyArg)) - - this[INTERNALS] = { - body, - disturbed: false, - error: null, - } - - this.size = size - this.timeout = timeout - - if (Minipass.isStream(body)) { - body.on('error', er => { - const error = er.name === 'AbortError' ? er - : new FetchError(`Invalid response while trying to fetch ${ - this.url}: ${er.message}`, 'system', er) - this[INTERNALS].error = error - }) - } - } - - get body () { - return this[INTERNALS].body - } - - get bodyUsed () { - return this[INTERNALS].disturbed - } - - arrayBuffer () { - return this[CONSUME_BODY]().then(buf => - buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength)) - } - - blob () { - const ct = this.headers && this.headers.get('content-type') || '' - return this[CONSUME_BODY]().then(buf => Object.assign( - new Blob([], { type: ct.toLowerCase() }), - { [BUFFER]: buf } - )) - } - - async json () { - const buf = await this[CONSUME_BODY]() - try { - return JSON.parse(buf.toString()) - } catch (er) { - throw new FetchError( - `invalid json response body at ${this.url} reason: ${er.message}`, - 'invalid-json' - ) - } - } - - text () { - return this[CONSUME_BODY]().then(buf => buf.toString()) - } - - buffer () { - return this[CONSUME_BODY]() - } - - textConverted () { - return this[CONSUME_BODY]().then(buf => convertBody(buf, this.headers)) - } - - [CONSUME_BODY] () { - if (this[INTERNALS].disturbed) { - return Promise.reject(new TypeError(`body used already for: ${ - this.url}`)) - } - - this[INTERNALS].disturbed = true - - if (this[INTERNALS].error) { - return Promise.reject(this[INTERNALS].error) - } - - // body is null - if (this.body === null) { - return Promise.resolve(Buffer.alloc(0)) - } - - if (Buffer.isBuffer(this.body)) { - return Promise.resolve(this.body) - } - - const upstream = isBlob(this.body) ? this.body.stream() : this.body - - /* istanbul ignore if: should never happen */ - if (!Minipass.isStream(upstream)) { - return Promise.resolve(Buffer.alloc(0)) - } - - const stream = this.size && upstream instanceof MinipassSized ? upstream - : !this.size && upstream instanceof Minipass && - !(upstream instanceof MinipassSized) ? upstream - : this.size ? new MinipassSized({ size: this.size }) - : new Minipass() - - // allow timeout on slow response body, but only if the stream is still writable. this - // makes the timeout center on the socket stream from lib/index.js rather than the - // intermediary minipass stream we create to receive the data - const resTimeout = this.timeout && stream.writable ? setTimeout(() => { - stream.emit('error', new FetchError( - `Response timeout while trying to fetch ${ - this.url} (over ${this.timeout}ms)`, 'body-timeout')) - }, this.timeout) : null - - // do not keep the process open just for this timeout, even - // though we expect it'll get cleared eventually. - if (resTimeout && resTimeout.unref) { - resTimeout.unref() - } - - // do the pipe in the promise, because the pipe() can send too much - // data through right away and upset the MP Sized object - return new Promise((resolve) => { - // if the stream is some other kind of stream, then pipe through a MP - // so we can collect it more easily. - if (stream !== upstream) { - upstream.on('error', er => stream.emit('error', er)) - upstream.pipe(stream) - } - resolve() - }).then(() => stream.concat()).then(buf => { - clearTimeout(resTimeout) - return buf - }).catch(er => { - clearTimeout(resTimeout) - // request was aborted, reject with this Error - if (er.name === 'AbortError' || er.name === 'FetchError') { - throw er - } else if (er.name === 'RangeError') { - throw new FetchError(`Could not create Buffer from response body for ${ - this.url}: ${er.message}`, 'system', er) - } else { - // other errors, such as incorrect content-encoding or content-length - throw new FetchError(`Invalid response body while trying to fetch ${ - this.url}: ${er.message}`, 'system', er) - } - }) - } - - static clone (instance) { - if (instance.bodyUsed) { - throw new Error('cannot clone body after it is used') - } - - const body = instance.body - - // check that body is a stream and not form-data object - // NB: can't clone the form-data object without having it as a dependency - if (Minipass.isStream(body) && typeof body.getBoundary !== 'function') { - // create a dedicated tee stream so that we don't lose data - // potentially sitting in the body stream's buffer by writing it - // immediately to p1 and not having it for p2. - const tee = new Minipass() - const p1 = new Minipass() - const p2 = new Minipass() - tee.on('error', er => { - p1.emit('error', er) - p2.emit('error', er) - }) - body.on('error', er => tee.emit('error', er)) - tee.pipe(p1) - tee.pipe(p2) - body.pipe(tee) - // set instance body to one fork, return the other - instance[INTERNALS].body = p1 - return p2 - } else { - return instance.body - } - } - - static extractContentType (body) { - return body === null || body === undefined ? null - : typeof body === 'string' ? 'text/plain;charset=UTF-8' - : isURLSearchParams(body) - ? 'application/x-www-form-urlencoded;charset=UTF-8' - : isBlob(body) ? body.type || null - : Buffer.isBuffer(body) ? null - : Object.prototype.toString.call(body) === '[object ArrayBuffer]' ? null - : ArrayBuffer.isView(body) ? null - : typeof body.getBoundary === 'function' - ? `multipart/form-data;boundary=${body.getBoundary()}` - : Minipass.isStream(body) ? null - : 'text/plain;charset=UTF-8' - } - - static getTotalBytes (instance) { - const { body } = instance - return (body === null || body === undefined) ? 0 - : isBlob(body) ? body.size - : Buffer.isBuffer(body) ? body.length - : body && typeof body.getLengthSync === 'function' && ( - // detect form data input from form-data module - body._lengthRetrievers && - /* istanbul ignore next */ body._lengthRetrievers.length === 0 || // 1.x - body.hasKnownLength && body.hasKnownLength()) // 2.x - ? body.getLengthSync() - : null - } - - static writeToStream (dest, instance) { - const { body } = instance - - if (body === null || body === undefined) { - dest.end() - } else if (Buffer.isBuffer(body) || typeof body === 'string') { - dest.end(body) - } else { - // body is stream or blob - const stream = isBlob(body) ? body.stream() : body - stream.on('error', er => dest.emit('error', er)).pipe(dest) - } - - return dest - } -} - -Object.defineProperties(Body.prototype, { - body: { enumerable: true }, - bodyUsed: { enumerable: true }, - arrayBuffer: { enumerable: true }, - blob: { enumerable: true }, - json: { enumerable: true }, - text: { enumerable: true }, -}) - -const isURLSearchParams = obj => - // Duck-typing as a necessary condition. - (typeof obj !== 'object' || - typeof obj.append !== 'function' || - typeof obj.delete !== 'function' || - typeof obj.get !== 'function' || - typeof obj.getAll !== 'function' || - typeof obj.has !== 'function' || - typeof obj.set !== 'function') ? false - // Brand-checking and more duck-typing as optional condition. - : obj.constructor.name === 'URLSearchParams' || - Object.prototype.toString.call(obj) === '[object URLSearchParams]' || - typeof obj.sort === 'function' - -const isBlob = obj => - typeof obj === 'object' && - typeof obj.arrayBuffer === 'function' && - typeof obj.type === 'string' && - typeof obj.stream === 'function' && - typeof obj.constructor === 'function' && - typeof obj.constructor.name === 'string' && - /^(Blob|File)$/.test(obj.constructor.name) && - /^(Blob|File)$/.test(obj[Symbol.toStringTag]) - -const convertBody = (buffer, headers) => { - /* istanbul ignore if */ - if (typeof convert !== 'function') { - throw new Error('The package `encoding` must be installed to use the textConverted() function') - } - - const ct = headers && headers.get('content-type') - let charset = 'utf-8' - let res - - // header - if (ct) { - res = /charset=([^;]*)/i.exec(ct) - } - - // no charset in content type, peek at response body for at most 1024 bytes - const str = buffer.slice(0, 1024).toString() - - // html5 - if (!res && str) { - res = / this.expect - ? 'max-size' : type - this.message = message - Error.captureStackTrace(this, this.constructor) - } - - get name () { - return 'FetchError' - } - - // don't allow name to be overwritten - set name (n) {} - - get [Symbol.toStringTag] () { - return 'FetchError' - } -} -module.exports = FetchError diff --git a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/headers.js b/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/headers.js deleted file mode 100644 index dd6e854d5ba399..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/headers.js +++ /dev/null @@ -1,267 +0,0 @@ -'use strict' -const invalidTokenRegex = /[^^_`a-zA-Z\-0-9!#$%&'*+.|~]/ -const invalidHeaderCharRegex = /[^\t\x20-\x7e\x80-\xff]/ - -const validateName = name => { - name = `${name}` - if (invalidTokenRegex.test(name) || name === '') { - throw new TypeError(`${name} is not a legal HTTP header name`) - } -} - -const validateValue = value => { - value = `${value}` - if (invalidHeaderCharRegex.test(value)) { - throw new TypeError(`${value} is not a legal HTTP header value`) - } -} - -const find = (map, name) => { - name = name.toLowerCase() - for (const key in map) { - if (key.toLowerCase() === name) { - return key - } - } - return undefined -} - -const MAP = Symbol('map') -class Headers { - constructor (init = undefined) { - this[MAP] = Object.create(null) - if (init instanceof Headers) { - const rawHeaders = init.raw() - const headerNames = Object.keys(rawHeaders) - for (const headerName of headerNames) { - for (const value of rawHeaders[headerName]) { - this.append(headerName, value) - } - } - return - } - - // no-op - if (init === undefined || init === null) { - return - } - - if (typeof init === 'object') { - const method = init[Symbol.iterator] - if (method !== null && method !== undefined) { - if (typeof method !== 'function') { - throw new TypeError('Header pairs must be iterable') - } - - // sequence> - // Note: per spec we have to first exhaust the lists then process them - const pairs = [] - for (const pair of init) { - if (typeof pair !== 'object' || - typeof pair[Symbol.iterator] !== 'function') { - throw new TypeError('Each header pair must be iterable') - } - const arrPair = Array.from(pair) - if (arrPair.length !== 2) { - throw new TypeError('Each header pair must be a name/value tuple') - } - pairs.push(arrPair) - } - - for (const pair of pairs) { - this.append(pair[0], pair[1]) - } - } else { - // record - for (const key of Object.keys(init)) { - this.append(key, init[key]) - } - } - } else { - throw new TypeError('Provided initializer must be an object') - } - } - - get (name) { - name = `${name}` - validateName(name) - const key = find(this[MAP], name) - if (key === undefined) { - return null - } - - return this[MAP][key].join(', ') - } - - forEach (callback, thisArg = undefined) { - let pairs = getHeaders(this) - for (let i = 0; i < pairs.length; i++) { - const [name, value] = pairs[i] - callback.call(thisArg, value, name, this) - // refresh in case the callback added more headers - pairs = getHeaders(this) - } - } - - set (name, value) { - name = `${name}` - value = `${value}` - validateName(name) - validateValue(value) - const key = find(this[MAP], name) - this[MAP][key !== undefined ? key : name] = [value] - } - - append (name, value) { - name = `${name}` - value = `${value}` - validateName(name) - validateValue(value) - const key = find(this[MAP], name) - if (key !== undefined) { - this[MAP][key].push(value) - } else { - this[MAP][name] = [value] - } - } - - has (name) { - name = `${name}` - validateName(name) - return find(this[MAP], name) !== undefined - } - - delete (name) { - name = `${name}` - validateName(name) - const key = find(this[MAP], name) - if (key !== undefined) { - delete this[MAP][key] - } - } - - raw () { - return this[MAP] - } - - keys () { - return new HeadersIterator(this, 'key') - } - - values () { - return new HeadersIterator(this, 'value') - } - - [Symbol.iterator] () { - return new HeadersIterator(this, 'key+value') - } - - entries () { - return new HeadersIterator(this, 'key+value') - } - - get [Symbol.toStringTag] () { - return 'Headers' - } - - static exportNodeCompatibleHeaders (headers) { - const obj = Object.assign(Object.create(null), headers[MAP]) - - // http.request() only supports string as Host header. This hack makes - // specifying custom Host header possible. - const hostHeaderKey = find(headers[MAP], 'Host') - if (hostHeaderKey !== undefined) { - obj[hostHeaderKey] = obj[hostHeaderKey][0] - } - - return obj - } - - static createHeadersLenient (obj) { - const headers = new Headers() - for (const name of Object.keys(obj)) { - if (invalidTokenRegex.test(name)) { - continue - } - - if (Array.isArray(obj[name])) { - for (const val of obj[name]) { - if (invalidHeaderCharRegex.test(val)) { - continue - } - - if (headers[MAP][name] === undefined) { - headers[MAP][name] = [val] - } else { - headers[MAP][name].push(val) - } - } - } else if (!invalidHeaderCharRegex.test(obj[name])) { - headers[MAP][name] = [obj[name]] - } - } - return headers - } -} - -Object.defineProperties(Headers.prototype, { - get: { enumerable: true }, - forEach: { enumerable: true }, - set: { enumerable: true }, - append: { enumerable: true }, - has: { enumerable: true }, - delete: { enumerable: true }, - keys: { enumerable: true }, - values: { enumerable: true }, - entries: { enumerable: true }, -}) - -const getHeaders = (headers, kind = 'key+value') => - Object.keys(headers[MAP]).sort().map( - kind === 'key' ? k => k.toLowerCase() - : kind === 'value' ? k => headers[MAP][k].join(', ') - : k => [k.toLowerCase(), headers[MAP][k].join(', ')] - ) - -const INTERNAL = Symbol('internal') - -class HeadersIterator { - constructor (target, kind) { - this[INTERNAL] = { - target, - kind, - index: 0, - } - } - - get [Symbol.toStringTag] () { - return 'HeadersIterator' - } - - next () { - /* istanbul ignore if: should be impossible */ - if (!this || Object.getPrototypeOf(this) !== HeadersIterator.prototype) { - throw new TypeError('Value of `this` is not a HeadersIterator') - } - - const { target, kind, index } = this[INTERNAL] - const values = getHeaders(target, kind) - const len = values.length - if (index >= len) { - return { - value: undefined, - done: true, - } - } - - this[INTERNAL].index++ - - return { value: values[index], done: false } - } -} - -// manually extend because 'extends' requires a ctor -Object.setPrototypeOf(HeadersIterator.prototype, - Object.getPrototypeOf(Object.getPrototypeOf([][Symbol.iterator]()))) - -module.exports = Headers diff --git a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/index.js b/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/index.js deleted file mode 100644 index da402161670e65..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/index.js +++ /dev/null @@ -1,377 +0,0 @@ -'use strict' -const { URL } = require('url') -const http = require('http') -const https = require('https') -const zlib = require('minizlib') -const { Minipass } = require('minipass') - -const Body = require('./body.js') -const { writeToStream, getTotalBytes } = Body -const Response = require('./response.js') -const Headers = require('./headers.js') -const { createHeadersLenient } = Headers -const Request = require('./request.js') -const { getNodeRequestOptions } = Request -const FetchError = require('./fetch-error.js') -const AbortError = require('./abort-error.js') - -// XXX this should really be split up and unit-ized for easier testing -// and better DRY implementation of data/http request aborting -const fetch = async (url, opts) => { - if (/^data:/.test(url)) { - const request = new Request(url, opts) - // delay 1 promise tick so that the consumer can abort right away - return Promise.resolve().then(() => new Promise((resolve, reject) => { - let type, data - try { - const { pathname, search } = new URL(url) - const split = pathname.split(',') - if (split.length < 2) { - throw new Error('invalid data: URI') - } - const mime = split.shift() - const base64 = /;base64$/.test(mime) - type = base64 ? mime.slice(0, -1 * ';base64'.length) : mime - const rawData = decodeURIComponent(split.join(',') + search) - data = base64 ? Buffer.from(rawData, 'base64') : Buffer.from(rawData) - } catch (er) { - return reject(new FetchError(`[${request.method}] ${ - request.url} invalid URL, ${er.message}`, 'system', er)) - } - - const { signal } = request - if (signal && signal.aborted) { - return reject(new AbortError('The user aborted a request.')) - } - - const headers = { 'Content-Length': data.length } - if (type) { - headers['Content-Type'] = type - } - return resolve(new Response(data, { headers })) - })) - } - - return new Promise((resolve, reject) => { - // build request object - const request = new Request(url, opts) - let options - try { - options = getNodeRequestOptions(request) - } catch (er) { - return reject(er) - } - - const send = (options.protocol === 'https:' ? https : http).request - const { signal } = request - let response = null - const abort = () => { - const error = new AbortError('The user aborted a request.') - reject(error) - if (Minipass.isStream(request.body) && - typeof request.body.destroy === 'function') { - request.body.destroy(error) - } - if (response && response.body) { - response.body.emit('error', error) - } - } - - if (signal && signal.aborted) { - return abort() - } - - const abortAndFinalize = () => { - abort() - finalize() - } - - const finalize = () => { - req.abort() - if (signal) { - signal.removeEventListener('abort', abortAndFinalize) - } - clearTimeout(reqTimeout) - } - - // send request - const req = send(options) - - if (signal) { - signal.addEventListener('abort', abortAndFinalize) - } - - let reqTimeout = null - if (request.timeout) { - req.once('socket', () => { - reqTimeout = setTimeout(() => { - reject(new FetchError(`network timeout at: ${ - request.url}`, 'request-timeout')) - finalize() - }, request.timeout) - }) - } - - req.on('error', er => { - // if a 'response' event is emitted before the 'error' event, then by the - // time this handler is run it's too late to reject the Promise for the - // response. instead, we forward the error event to the response stream - // so that the error will surface to the user when they try to consume - // the body. this is done as a side effect of aborting the request except - // for in windows, where we must forward the event manually, otherwise - // there is no longer a ref'd socket attached to the request and the - // stream never ends so the event loop runs out of work and the process - // exits without warning. - // coverage skipped here due to the difficulty in testing - // istanbul ignore next - if (req.res) { - req.res.emit('error', er) - } - reject(new FetchError(`request to ${request.url} failed, reason: ${ - er.message}`, 'system', er)) - finalize() - }) - - req.on('response', res => { - clearTimeout(reqTimeout) - - const headers = createHeadersLenient(res.headers) - - // HTTP fetch step 5 - if (fetch.isRedirect(res.statusCode)) { - // HTTP fetch step 5.2 - const location = headers.get('Location') - - // HTTP fetch step 5.3 - let locationURL = null - try { - locationURL = location === null ? null : new URL(location, request.url).toString() - } catch { - // error here can only be invalid URL in Location: header - // do not throw when options.redirect == manual - // let the user extract the errorneous redirect URL - if (request.redirect !== 'manual') { - /* eslint-disable-next-line max-len */ - reject(new FetchError(`uri requested responds with an invalid redirect URL: ${location}`, 'invalid-redirect')) - finalize() - return - } - } - - // HTTP fetch step 5.5 - if (request.redirect === 'error') { - reject(new FetchError('uri requested responds with a redirect, ' + - `redirect mode is set to error: ${request.url}`, 'no-redirect')) - finalize() - return - } else if (request.redirect === 'manual') { - // node-fetch-specific step: make manual redirect a bit easier to - // use by setting the Location header value to the resolved URL. - if (locationURL !== null) { - // handle corrupted header - try { - headers.set('Location', locationURL) - } catch (err) { - /* istanbul ignore next: nodejs server prevent invalid - response headers, we can't test this through normal - request */ - reject(err) - } - } - } else if (request.redirect === 'follow' && locationURL !== null) { - // HTTP-redirect fetch step 5 - if (request.counter >= request.follow) { - reject(new FetchError(`maximum redirect reached at: ${ - request.url}`, 'max-redirect')) - finalize() - return - } - - // HTTP-redirect fetch step 9 - if (res.statusCode !== 303 && - request.body && - getTotalBytes(request) === null) { - reject(new FetchError( - 'Cannot follow redirect with body being a readable stream', - 'unsupported-redirect' - )) - finalize() - return - } - - // Update host due to redirection - request.headers.set('host', (new URL(locationURL)).host) - - // HTTP-redirect fetch step 6 (counter increment) - // Create a new Request object. - const requestOpts = { - headers: new Headers(request.headers), - follow: request.follow, - counter: request.counter + 1, - agent: request.agent, - compress: request.compress, - method: request.method, - body: request.body, - signal: request.signal, - timeout: request.timeout, - } - - // if the redirect is to a new hostname, strip the authorization and cookie headers - const parsedOriginal = new URL(request.url) - const parsedRedirect = new URL(locationURL) - if (parsedOriginal.hostname !== parsedRedirect.hostname) { - requestOpts.headers.delete('authorization') - requestOpts.headers.delete('cookie') - } - - // HTTP-redirect fetch step 11 - if (res.statusCode === 303 || ( - (res.statusCode === 301 || res.statusCode === 302) && - request.method === 'POST' - )) { - requestOpts.method = 'GET' - requestOpts.body = undefined - requestOpts.headers.delete('content-length') - } - - // HTTP-redirect fetch step 15 - resolve(fetch(new Request(locationURL, requestOpts))) - finalize() - return - } - } // end if(isRedirect) - - // prepare response - res.once('end', () => - signal && signal.removeEventListener('abort', abortAndFinalize)) - - const body = new Minipass() - // if an error occurs, either on the response stream itself, on one of the - // decoder streams, or a response length timeout from the Body class, we - // forward the error through to our internal body stream. If we see an - // error event on that, we call finalize to abort the request and ensure - // we don't leave a socket believing a request is in flight. - // this is difficult to test, so lacks specific coverage. - body.on('error', finalize) - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - res.on('error', /* istanbul ignore next */ er => body.emit('error', er)) - res.on('data', (chunk) => body.write(chunk)) - res.on('end', () => body.end()) - - const responseOptions = { - url: request.url, - status: res.statusCode, - statusText: res.statusMessage, - headers: headers, - size: request.size, - timeout: request.timeout, - counter: request.counter, - trailer: new Promise(resolveTrailer => - res.on('end', () => resolveTrailer(createHeadersLenient(res.trailers)))), - } - - // HTTP-network fetch step 12.1.1.3 - const codings = headers.get('Content-Encoding') - - // HTTP-network fetch step 12.1.1.4: handle content codings - - // in following scenarios we ignore compression support - // 1. compression support is disabled - // 2. HEAD request - // 3. no Content-Encoding header - // 4. no content response (204) - // 5. content not modified response (304) - if (!request.compress || - request.method === 'HEAD' || - codings === null || - res.statusCode === 204 || - res.statusCode === 304) { - response = new Response(body, responseOptions) - resolve(response) - return - } - - // Be less strict when decoding compressed responses, since sometimes - // servers send slightly invalid responses that are still accepted - // by common browsers. - // Always using Z_SYNC_FLUSH is what cURL does. - const zlibOptions = { - flush: zlib.constants.Z_SYNC_FLUSH, - finishFlush: zlib.constants.Z_SYNC_FLUSH, - } - - // for gzip - if (codings === 'gzip' || codings === 'x-gzip') { - const unzip = new zlib.Gunzip(zlibOptions) - response = new Response( - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - body.on('error', /* istanbul ignore next */ er => unzip.emit('error', er)).pipe(unzip), - responseOptions - ) - resolve(response) - return - } - - // for deflate - if (codings === 'deflate' || codings === 'x-deflate') { - // handle the infamous raw deflate response from old servers - // a hack for old IIS and Apache servers - const raw = res.pipe(new Minipass()) - raw.once('data', chunk => { - // see http://stackoverflow.com/questions/37519828 - const decoder = (chunk[0] & 0x0F) === 0x08 - ? new zlib.Inflate() - : new zlib.InflateRaw() - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - body.on('error', /* istanbul ignore next */ er => decoder.emit('error', er)).pipe(decoder) - response = new Response(decoder, responseOptions) - resolve(response) - }) - return - } - - // for br - if (codings === 'br') { - // ignoring coverage so tests don't have to fake support (or lack of) for brotli - // istanbul ignore next - try { - var decoder = new zlib.BrotliDecompress() - } catch (err) { - reject(err) - finalize() - return - } - // exceedingly rare that the stream would have an error, - // but just in case we proxy it to the stream in use. - body.on('error', /* istanbul ignore next */ er => decoder.emit('error', er)).pipe(decoder) - response = new Response(decoder, responseOptions) - resolve(response) - return - } - - // otherwise, use response as-is - response = new Response(body, responseOptions) - resolve(response) - }) - - writeToStream(req, request) - }) -} - -module.exports = fetch - -fetch.isRedirect = code => - code === 301 || - code === 302 || - code === 303 || - code === 307 || - code === 308 - -fetch.Headers = Headers -fetch.Request = Request -fetch.Response = Response -fetch.FetchError = FetchError -fetch.AbortError = AbortError diff --git a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/request.js b/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/request.js deleted file mode 100644 index 054439e6699107..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/request.js +++ /dev/null @@ -1,282 +0,0 @@ -'use strict' -const { URL } = require('url') -const { Minipass } = require('minipass') -const Headers = require('./headers.js') -const { exportNodeCompatibleHeaders } = Headers -const Body = require('./body.js') -const { clone, extractContentType, getTotalBytes } = Body - -const version = require('../package.json').version -const defaultUserAgent = - `minipass-fetch/${version} (+https://github.com/isaacs/minipass-fetch)` - -const INTERNALS = Symbol('Request internals') - -const isRequest = input => - typeof input === 'object' && typeof input[INTERNALS] === 'object' - -const isAbortSignal = signal => { - const proto = ( - signal - && typeof signal === 'object' - && Object.getPrototypeOf(signal) - ) - return !!(proto && proto.constructor.name === 'AbortSignal') -} - -class Request extends Body { - constructor (input, init = {}) { - const parsedURL = isRequest(input) ? new URL(input.url) - : input && input.href ? new URL(input.href) - : new URL(`${input}`) - - if (isRequest(input)) { - init = { ...input[INTERNALS], ...init } - } else if (!input || typeof input === 'string') { - input = {} - } - - const method = (init.method || input.method || 'GET').toUpperCase() - const isGETHEAD = method === 'GET' || method === 'HEAD' - - if ((init.body !== null && init.body !== undefined || - isRequest(input) && input.body !== null) && isGETHEAD) { - throw new TypeError('Request with GET/HEAD method cannot have body') - } - - const inputBody = init.body !== null && init.body !== undefined ? init.body - : isRequest(input) && input.body !== null ? clone(input) - : null - - super(inputBody, { - timeout: init.timeout || input.timeout || 0, - size: init.size || input.size || 0, - }) - - const headers = new Headers(init.headers || input.headers || {}) - - if (inputBody !== null && inputBody !== undefined && - !headers.has('Content-Type')) { - const contentType = extractContentType(inputBody) - if (contentType) { - headers.append('Content-Type', contentType) - } - } - - const signal = 'signal' in init ? init.signal - : null - - if (signal !== null && signal !== undefined && !isAbortSignal(signal)) { - throw new TypeError('Expected signal must be an instanceof AbortSignal') - } - - // TLS specific options that are handled by node - const { - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized = process.env.NODE_TLS_REJECT_UNAUTHORIZED !== '0', - secureOptions, - secureProtocol, - servername, - sessionIdContext, - } = init - - this[INTERNALS] = { - method, - redirect: init.redirect || input.redirect || 'follow', - headers, - parsedURL, - signal, - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized, - secureOptions, - secureProtocol, - servername, - sessionIdContext, - } - - // node-fetch-only options - this.follow = init.follow !== undefined ? init.follow - : input.follow !== undefined ? input.follow - : 20 - this.compress = init.compress !== undefined ? init.compress - : input.compress !== undefined ? input.compress - : true - this.counter = init.counter || input.counter || 0 - this.agent = init.agent || input.agent - } - - get method () { - return this[INTERNALS].method - } - - get url () { - return this[INTERNALS].parsedURL.toString() - } - - get headers () { - return this[INTERNALS].headers - } - - get redirect () { - return this[INTERNALS].redirect - } - - get signal () { - return this[INTERNALS].signal - } - - clone () { - return new Request(this) - } - - get [Symbol.toStringTag] () { - return 'Request' - } - - static getNodeRequestOptions (request) { - const parsedURL = request[INTERNALS].parsedURL - const headers = new Headers(request[INTERNALS].headers) - - // fetch step 1.3 - if (!headers.has('Accept')) { - headers.set('Accept', '*/*') - } - - // Basic fetch - if (!/^https?:$/.test(parsedURL.protocol)) { - throw new TypeError('Only HTTP(S) protocols are supported') - } - - if (request.signal && - Minipass.isStream(request.body) && - typeof request.body.destroy !== 'function') { - throw new Error( - 'Cancellation of streamed requests with AbortSignal is not supported') - } - - // HTTP-network-or-cache fetch steps 2.4-2.7 - const contentLengthValue = - (request.body === null || request.body === undefined) && - /^(POST|PUT)$/i.test(request.method) ? '0' - : request.body !== null && request.body !== undefined - ? getTotalBytes(request) - : null - - if (contentLengthValue) { - headers.set('Content-Length', contentLengthValue + '') - } - - // HTTP-network-or-cache fetch step 2.11 - if (!headers.has('User-Agent')) { - headers.set('User-Agent', defaultUserAgent) - } - - // HTTP-network-or-cache fetch step 2.15 - if (request.compress && !headers.has('Accept-Encoding')) { - headers.set('Accept-Encoding', 'gzip,deflate') - } - - const agent = typeof request.agent === 'function' - ? request.agent(parsedURL) - : request.agent - - if (!headers.has('Connection') && !agent) { - headers.set('Connection', 'close') - } - - // TLS specific options that are handled by node - const { - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized, - secureOptions, - secureProtocol, - servername, - sessionIdContext, - } = request[INTERNALS] - - // HTTP-network fetch step 4.2 - // chunked encoding is handled by Node.js - - // we cannot spread parsedURL directly, so we have to read each property one-by-one - // and map them to the equivalent https?.request() method options - const urlProps = { - auth: parsedURL.username || parsedURL.password - ? `${parsedURL.username}:${parsedURL.password}` - : '', - host: parsedURL.host, - hostname: parsedURL.hostname, - path: `${parsedURL.pathname}${parsedURL.search}`, - port: parsedURL.port, - protocol: parsedURL.protocol, - } - - return { - ...urlProps, - method: request.method, - headers: exportNodeCompatibleHeaders(headers), - agent, - ca, - cert, - ciphers, - clientCertEngine, - crl, - dhparam, - ecdhCurve, - family, - honorCipherOrder, - key, - passphrase, - pfx, - rejectUnauthorized, - secureOptions, - secureProtocol, - servername, - sessionIdContext, - timeout: request.timeout, - } - } -} - -module.exports = Request - -Object.defineProperties(Request.prototype, { - method: { enumerable: true }, - url: { enumerable: true }, - headers: { enumerable: true }, - redirect: { enumerable: true }, - clone: { enumerable: true }, - signal: { enumerable: true }, -}) diff --git a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/response.js b/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/response.js deleted file mode 100644 index 54cb52db3594a7..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/lib/response.js +++ /dev/null @@ -1,90 +0,0 @@ -'use strict' -const http = require('http') -const { STATUS_CODES } = http - -const Headers = require('./headers.js') -const Body = require('./body.js') -const { clone, extractContentType } = Body - -const INTERNALS = Symbol('Response internals') - -class Response extends Body { - constructor (body = null, opts = {}) { - super(body, opts) - - const status = opts.status || 200 - const headers = new Headers(opts.headers) - - if (body !== null && body !== undefined && !headers.has('Content-Type')) { - const contentType = extractContentType(body) - if (contentType) { - headers.append('Content-Type', contentType) - } - } - - this[INTERNALS] = { - url: opts.url, - status, - statusText: opts.statusText || STATUS_CODES[status], - headers, - counter: opts.counter, - trailer: Promise.resolve(opts.trailer || new Headers()), - } - } - - get trailer () { - return this[INTERNALS].trailer - } - - get url () { - return this[INTERNALS].url || '' - } - - get status () { - return this[INTERNALS].status - } - - get ok () { - return this[INTERNALS].status >= 200 && this[INTERNALS].status < 300 - } - - get redirected () { - return this[INTERNALS].counter > 0 - } - - get statusText () { - return this[INTERNALS].statusText - } - - get headers () { - return this[INTERNALS].headers - } - - clone () { - return new Response(clone(this), { - url: this.url, - status: this.status, - statusText: this.statusText, - headers: this.headers, - ok: this.ok, - redirected: this.redirected, - trailer: this.trailer, - }) - } - - get [Symbol.toStringTag] () { - return 'Response' - } -} - -module.exports = Response - -Object.defineProperties(Response.prototype, { - url: { enumerable: true }, - status: { enumerable: true }, - ok: { enumerable: true }, - redirected: { enumerable: true }, - statusText: { enumerable: true }, - headers: { enumerable: true }, - clone: { enumerable: true }, -}) diff --git a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/package.json b/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/package.json deleted file mode 100644 index d491a7fba126d0..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/minipass-fetch/package.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "name": "minipass-fetch", - "version": "3.0.5", - "description": "An implementation of window.fetch in Node.js using Minipass streams", - "license": "MIT", - "main": "lib/index.js", - "scripts": { - "test:tls-fixtures": "./test/fixtures/tls/setup.sh", - "test": "tap", - "snap": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "lintfix": "npm run lint -- --fix", - "posttest": "npm run lint", - "template-oss-apply": "template-oss-apply --force" - }, - "tap": { - "coverage-map": "map.js", - "check-coverage": true, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "@ungap/url-search-params": "^0.2.2", - "abort-controller": "^3.0.0", - "abortcontroller-polyfill": "~1.7.3", - "encoding": "^0.1.13", - "form-data": "^4.0.0", - "nock": "^13.2.4", - "parted": "^0.1.1", - "string-to-arraybuffer": "^1.0.2", - "tap": "^16.0.0" - }, - "dependencies": { - "minipass": "^7.0.3", - "minipass-sized": "^1.0.3", - "minizlib": "^2.1.2" - }, - "optionalDependencies": { - "encoding": "^0.1.13" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/minipass-fetch.git" - }, - "keywords": [ - "fetch", - "minipass", - "node-fetch", - "window.fetch" - ], - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "author": "GitHub Inc.", - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.22.0", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/proc-log/LICENSE b/deps/npm/node_modules/tuf-js/node_modules/proc-log/LICENSE deleted file mode 100644 index 83837797202b70..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/proc-log/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -The ISC License - -Copyright (c) GitHub, Inc. - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR -IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/tuf-js/node_modules/proc-log/lib/index.js b/deps/npm/node_modules/tuf-js/node_modules/proc-log/lib/index.js deleted file mode 100644 index 86d90861078dab..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/proc-log/lib/index.js +++ /dev/null @@ -1,153 +0,0 @@ -const META = Symbol('proc-log.meta') -module.exports = { - META: META, - output: { - LEVELS: [ - 'standard', - 'error', - 'buffer', - 'flush', - ], - KEYS: { - standard: 'standard', - error: 'error', - buffer: 'buffer', - flush: 'flush', - }, - standard: function (...args) { - return process.emit('output', 'standard', ...args) - }, - error: function (...args) { - return process.emit('output', 'error', ...args) - }, - buffer: function (...args) { - return process.emit('output', 'buffer', ...args) - }, - flush: function (...args) { - return process.emit('output', 'flush', ...args) - }, - }, - log: { - LEVELS: [ - 'notice', - 'error', - 'warn', - 'info', - 'verbose', - 'http', - 'silly', - 'timing', - 'pause', - 'resume', - ], - KEYS: { - notice: 'notice', - error: 'error', - warn: 'warn', - info: 'info', - verbose: 'verbose', - http: 'http', - silly: 'silly', - timing: 'timing', - pause: 'pause', - resume: 'resume', - }, - error: function (...args) { - return process.emit('log', 'error', ...args) - }, - notice: function (...args) { - return process.emit('log', 'notice', ...args) - }, - warn: function (...args) { - return process.emit('log', 'warn', ...args) - }, - info: function (...args) { - return process.emit('log', 'info', ...args) - }, - verbose: function (...args) { - return process.emit('log', 'verbose', ...args) - }, - http: function (...args) { - return process.emit('log', 'http', ...args) - }, - silly: function (...args) { - return process.emit('log', 'silly', ...args) - }, - timing: function (...args) { - return process.emit('log', 'timing', ...args) - }, - pause: function () { - return process.emit('log', 'pause') - }, - resume: function () { - return process.emit('log', 'resume') - }, - }, - time: { - LEVELS: [ - 'start', - 'end', - ], - KEYS: { - start: 'start', - end: 'end', - }, - start: function (name, fn) { - process.emit('time', 'start', name) - function end () { - return process.emit('time', 'end', name) - } - if (typeof fn === 'function') { - const res = fn() - if (res && res.finally) { - return res.finally(end) - } - end() - return res - } - return end - }, - end: function (name) { - return process.emit('time', 'end', name) - }, - }, - input: { - LEVELS: [ - 'start', - 'end', - 'read', - ], - KEYS: { - start: 'start', - end: 'end', - read: 'read', - }, - start: function (fn) { - process.emit('input', 'start') - function end () { - return process.emit('input', 'end') - } - if (typeof fn === 'function') { - const res = fn() - if (res && res.finally) { - return res.finally(end) - } - end() - return res - } - return end - }, - end: function () { - return process.emit('input', 'end') - }, - read: function (...args) { - let resolve, reject - const promise = new Promise((_resolve, _reject) => { - resolve = _resolve - reject = _reject - }) - process.emit('input', 'read', resolve, reject, ...args) - return promise - }, - }, -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/proc-log/package.json b/deps/npm/node_modules/tuf-js/node_modules/proc-log/package.json deleted file mode 100644 index 4ab89102ecc9b5..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/proc-log/package.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "name": "proc-log", - "version": "4.2.0", - "files": [ - "bin/", - "lib/" - ], - "main": "lib/index.js", - "description": "just emit 'log' events on the process object", - "repository": { - "type": "git", - "url": "https://github.com/npm/proc-log.git" - }, - "author": "GitHub Inc.", - "license": "ISC", - "scripts": { - "test": "tap", - "snap": "tap", - "posttest": "npm run lint", - "postsnap": "eslint index.js test/*.js --fix", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "lintfix": "npm run lint -- --fix", - "template-oss-apply": "template-oss-apply --force" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.21.3", - "tap": "^16.0.1" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.21.3", - "publish": true - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/ssri/LICENSE.md b/deps/npm/node_modules/tuf-js/node_modules/ssri/LICENSE.md deleted file mode 100644 index e335388869f50f..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/ssri/LICENSE.md +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright 2021 (c) npm, Inc. - -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted, provided that the -above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE COPYRIGHT HOLDER DISCLAIMS -ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/tuf-js/node_modules/ssri/lib/index.js b/deps/npm/node_modules/tuf-js/node_modules/ssri/lib/index.js deleted file mode 100644 index 7d749ed480fb98..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/ssri/lib/index.js +++ /dev/null @@ -1,580 +0,0 @@ -'use strict' - -const crypto = require('crypto') -const { Minipass } = require('minipass') - -const SPEC_ALGORITHMS = ['sha512', 'sha384', 'sha256'] -const DEFAULT_ALGORITHMS = ['sha512'] - -// TODO: this should really be a hardcoded list of algorithms we support, -// rather than [a-z0-9]. -const BASE64_REGEX = /^[a-z0-9+/]+(?:=?=?)$/i -const SRI_REGEX = /^([a-z0-9]+)-([^?]+)([?\S*]*)$/ -const STRICT_SRI_REGEX = /^([a-z0-9]+)-([A-Za-z0-9+/=]{44,88})(\?[\x21-\x7E]*)?$/ -const VCHAR_REGEX = /^[\x21-\x7E]+$/ - -const getOptString = options => options?.length ? `?${options.join('?')}` : '' - -class IntegrityStream extends Minipass { - #emittedIntegrity - #emittedSize - #emittedVerified - - constructor (opts) { - super() - this.size = 0 - this.opts = opts - - // may be overridden later, but set now for class consistency - this.#getOptions() - - // options used for calculating stream. can't be changed. - if (opts?.algorithms) { - this.algorithms = [...opts.algorithms] - } else { - this.algorithms = [...DEFAULT_ALGORITHMS] - } - if (this.algorithm !== null && !this.algorithms.includes(this.algorithm)) { - this.algorithms.push(this.algorithm) - } - - this.hashes = this.algorithms.map(crypto.createHash) - } - - #getOptions () { - // For verification - this.sri = this.opts?.integrity ? parse(this.opts?.integrity, this.opts) : null - this.expectedSize = this.opts?.size - - if (!this.sri) { - this.algorithm = null - } else if (this.sri.isHash) { - this.goodSri = true - this.algorithm = this.sri.algorithm - } else { - this.goodSri = !this.sri.isEmpty() - this.algorithm = this.sri.pickAlgorithm(this.opts) - } - - this.digests = this.goodSri ? this.sri[this.algorithm] : null - this.optString = getOptString(this.opts?.options) - } - - on (ev, handler) { - if (ev === 'size' && this.#emittedSize) { - return handler(this.#emittedSize) - } - - if (ev === 'integrity' && this.#emittedIntegrity) { - return handler(this.#emittedIntegrity) - } - - if (ev === 'verified' && this.#emittedVerified) { - return handler(this.#emittedVerified) - } - - return super.on(ev, handler) - } - - emit (ev, data) { - if (ev === 'end') { - this.#onEnd() - } - return super.emit(ev, data) - } - - write (data) { - this.size += data.length - this.hashes.forEach(h => h.update(data)) - return super.write(data) - } - - #onEnd () { - if (!this.goodSri) { - this.#getOptions() - } - const newSri = parse(this.hashes.map((h, i) => { - return `${this.algorithms[i]}-${h.digest('base64')}${this.optString}` - }).join(' '), this.opts) - // Integrity verification mode - const match = this.goodSri && newSri.match(this.sri, this.opts) - if (typeof this.expectedSize === 'number' && this.size !== this.expectedSize) { - /* eslint-disable-next-line max-len */ - const err = new Error(`stream size mismatch when checking ${this.sri}.\n Wanted: ${this.expectedSize}\n Found: ${this.size}`) - err.code = 'EBADSIZE' - err.found = this.size - err.expected = this.expectedSize - err.sri = this.sri - this.emit('error', err) - } else if (this.sri && !match) { - /* eslint-disable-next-line max-len */ - const err = new Error(`${this.sri} integrity checksum failed when using ${this.algorithm}: wanted ${this.digests} but got ${newSri}. (${this.size} bytes)`) - err.code = 'EINTEGRITY' - err.found = newSri - err.expected = this.digests - err.algorithm = this.algorithm - err.sri = this.sri - this.emit('error', err) - } else { - this.#emittedSize = this.size - this.emit('size', this.size) - this.#emittedIntegrity = newSri - this.emit('integrity', newSri) - if (match) { - this.#emittedVerified = match - this.emit('verified', match) - } - } - } -} - -class Hash { - get isHash () { - return true - } - - constructor (hash, opts) { - const strict = opts?.strict - this.source = hash.trim() - - // set default values so that we make V8 happy to - // always see a familiar object template. - this.digest = '' - this.algorithm = '' - this.options = [] - - // 3.1. Integrity metadata (called "Hash" by ssri) - // https://w3c.github.io/webappsec-subresource-integrity/#integrity-metadata-description - const match = this.source.match( - strict - ? STRICT_SRI_REGEX - : SRI_REGEX - ) - if (!match) { - return - } - if (strict && !SPEC_ALGORITHMS.includes(match[1])) { - return - } - this.algorithm = match[1] - this.digest = match[2] - - const rawOpts = match[3] - if (rawOpts) { - this.options = rawOpts.slice(1).split('?') - } - } - - hexDigest () { - return this.digest && Buffer.from(this.digest, 'base64').toString('hex') - } - - toJSON () { - return this.toString() - } - - match (integrity, opts) { - const other = parse(integrity, opts) - if (!other) { - return false - } - if (other.isIntegrity) { - const algo = other.pickAlgorithm(opts, [this.algorithm]) - - if (!algo) { - return false - } - - const foundHash = other[algo].find(hash => hash.digest === this.digest) - - if (foundHash) { - return foundHash - } - - return false - } - return other.digest === this.digest ? other : false - } - - toString (opts) { - if (opts?.strict) { - // Strict mode enforces the standard as close to the foot of the - // letter as it can. - if (!( - // The spec has very restricted productions for algorithms. - // https://www.w3.org/TR/CSP2/#source-list-syntax - SPEC_ALGORITHMS.includes(this.algorithm) && - // Usually, if someone insists on using a "different" base64, we - // leave it as-is, since there's multiple standards, and the - // specified is not a URL-safe variant. - // https://www.w3.org/TR/CSP2/#base64_value - this.digest.match(BASE64_REGEX) && - // Option syntax is strictly visual chars. - // https://w3c.github.io/webappsec-subresource-integrity/#grammardef-option-expression - // https://tools.ietf.org/html/rfc5234#appendix-B.1 - this.options.every(opt => opt.match(VCHAR_REGEX)) - )) { - return '' - } - } - return `${this.algorithm}-${this.digest}${getOptString(this.options)}` - } -} - -function integrityHashToString (toString, sep, opts, hashes) { - const toStringIsNotEmpty = toString !== '' - - let shouldAddFirstSep = false - let complement = '' - - const lastIndex = hashes.length - 1 - - for (let i = 0; i < lastIndex; i++) { - const hashString = Hash.prototype.toString.call(hashes[i], opts) - - if (hashString) { - shouldAddFirstSep = true - - complement += hashString - complement += sep - } - } - - const finalHashString = Hash.prototype.toString.call(hashes[lastIndex], opts) - - if (finalHashString) { - shouldAddFirstSep = true - complement += finalHashString - } - - if (toStringIsNotEmpty && shouldAddFirstSep) { - return toString + sep + complement - } - - return toString + complement -} - -class Integrity { - get isIntegrity () { - return true - } - - toJSON () { - return this.toString() - } - - isEmpty () { - return Object.keys(this).length === 0 - } - - toString (opts) { - let sep = opts?.sep || ' ' - let toString = '' - - if (opts?.strict) { - // Entries must be separated by whitespace, according to spec. - sep = sep.replace(/\S+/g, ' ') - - for (const hash of SPEC_ALGORITHMS) { - if (this[hash]) { - toString = integrityHashToString(toString, sep, opts, this[hash]) - } - } - } else { - for (const hash of Object.keys(this)) { - toString = integrityHashToString(toString, sep, opts, this[hash]) - } - } - - return toString - } - - concat (integrity, opts) { - const other = typeof integrity === 'string' - ? integrity - : stringify(integrity, opts) - return parse(`${this.toString(opts)} ${other}`, opts) - } - - hexDigest () { - return parse(this, { single: true }).hexDigest() - } - - // add additional hashes to an integrity value, but prevent - // *changing* an existing integrity hash. - merge (integrity, opts) { - const other = parse(integrity, opts) - for (const algo in other) { - if (this[algo]) { - if (!this[algo].find(hash => - other[algo].find(otherhash => - hash.digest === otherhash.digest))) { - throw new Error('hashes do not match, cannot update integrity') - } - } else { - this[algo] = other[algo] - } - } - } - - match (integrity, opts) { - const other = parse(integrity, opts) - if (!other) { - return false - } - const algo = other.pickAlgorithm(opts, Object.keys(this)) - return ( - !!algo && - this[algo] && - other[algo] && - this[algo].find(hash => - other[algo].find(otherhash => - hash.digest === otherhash.digest - ) - ) - ) || false - } - - // Pick the highest priority algorithm present, optionally also limited to a - // set of hashes found in another integrity. When limiting it may return - // nothing. - pickAlgorithm (opts, hashes) { - const pickAlgorithm = opts?.pickAlgorithm || getPrioritizedHash - const keys = Object.keys(this).filter(k => { - if (hashes?.length) { - return hashes.includes(k) - } - return true - }) - if (keys.length) { - return keys.reduce((acc, algo) => pickAlgorithm(acc, algo) || acc) - } - // no intersection between this and hashes, - return null - } -} - -module.exports.parse = parse -function parse (sri, opts) { - if (!sri) { - return null - } - if (typeof sri === 'string') { - return _parse(sri, opts) - } else if (sri.algorithm && sri.digest) { - const fullSri = new Integrity() - fullSri[sri.algorithm] = [sri] - return _parse(stringify(fullSri, opts), opts) - } else { - return _parse(stringify(sri, opts), opts) - } -} - -function _parse (integrity, opts) { - // 3.4.3. Parse metadata - // https://w3c.github.io/webappsec-subresource-integrity/#parse-metadata - if (opts?.single) { - return new Hash(integrity, opts) - } - const hashes = integrity.trim().split(/\s+/).reduce((acc, string) => { - const hash = new Hash(string, opts) - if (hash.algorithm && hash.digest) { - const algo = hash.algorithm - if (!acc[algo]) { - acc[algo] = [] - } - acc[algo].push(hash) - } - return acc - }, new Integrity()) - return hashes.isEmpty() ? null : hashes -} - -module.exports.stringify = stringify -function stringify (obj, opts) { - if (obj.algorithm && obj.digest) { - return Hash.prototype.toString.call(obj, opts) - } else if (typeof obj === 'string') { - return stringify(parse(obj, opts), opts) - } else { - return Integrity.prototype.toString.call(obj, opts) - } -} - -module.exports.fromHex = fromHex -function fromHex (hexDigest, algorithm, opts) { - const optString = getOptString(opts?.options) - return parse( - `${algorithm}-${ - Buffer.from(hexDigest, 'hex').toString('base64') - }${optString}`, opts - ) -} - -module.exports.fromData = fromData -function fromData (data, opts) { - const algorithms = opts?.algorithms || [...DEFAULT_ALGORITHMS] - const optString = getOptString(opts?.options) - return algorithms.reduce((acc, algo) => { - const digest = crypto.createHash(algo).update(data).digest('base64') - const hash = new Hash( - `${algo}-${digest}${optString}`, - opts - ) - /* istanbul ignore else - it would be VERY strange if the string we - * just calculated with an algo did not have an algo or digest. - */ - if (hash.algorithm && hash.digest) { - const hashAlgo = hash.algorithm - if (!acc[hashAlgo]) { - acc[hashAlgo] = [] - } - acc[hashAlgo].push(hash) - } - return acc - }, new Integrity()) -} - -module.exports.fromStream = fromStream -function fromStream (stream, opts) { - const istream = integrityStream(opts) - return new Promise((resolve, reject) => { - stream.pipe(istream) - stream.on('error', reject) - istream.on('error', reject) - let sri - istream.on('integrity', s => { - sri = s - }) - istream.on('end', () => resolve(sri)) - istream.resume() - }) -} - -module.exports.checkData = checkData -function checkData (data, sri, opts) { - sri = parse(sri, opts) - if (!sri || !Object.keys(sri).length) { - if (opts?.error) { - throw Object.assign( - new Error('No valid integrity hashes to check against'), { - code: 'EINTEGRITY', - } - ) - } else { - return false - } - } - const algorithm = sri.pickAlgorithm(opts) - const digest = crypto.createHash(algorithm).update(data).digest('base64') - const newSri = parse({ algorithm, digest }) - const match = newSri.match(sri, opts) - opts = opts || {} - if (match || !(opts.error)) { - return match - } else if (typeof opts.size === 'number' && (data.length !== opts.size)) { - /* eslint-disable-next-line max-len */ - const err = new Error(`data size mismatch when checking ${sri}.\n Wanted: ${opts.size}\n Found: ${data.length}`) - err.code = 'EBADSIZE' - err.found = data.length - err.expected = opts.size - err.sri = sri - throw err - } else { - /* eslint-disable-next-line max-len */ - const err = new Error(`Integrity checksum failed when using ${algorithm}: Wanted ${sri}, but got ${newSri}. (${data.length} bytes)`) - err.code = 'EINTEGRITY' - err.found = newSri - err.expected = sri - err.algorithm = algorithm - err.sri = sri - throw err - } -} - -module.exports.checkStream = checkStream -function checkStream (stream, sri, opts) { - opts = opts || Object.create(null) - opts.integrity = sri - sri = parse(sri, opts) - if (!sri || !Object.keys(sri).length) { - return Promise.reject(Object.assign( - new Error('No valid integrity hashes to check against'), { - code: 'EINTEGRITY', - } - )) - } - const checker = integrityStream(opts) - return new Promise((resolve, reject) => { - stream.pipe(checker) - stream.on('error', reject) - checker.on('error', reject) - let verified - checker.on('verified', s => { - verified = s - }) - checker.on('end', () => resolve(verified)) - checker.resume() - }) -} - -module.exports.integrityStream = integrityStream -function integrityStream (opts = Object.create(null)) { - return new IntegrityStream(opts) -} - -module.exports.create = createIntegrity -function createIntegrity (opts) { - const algorithms = opts?.algorithms || [...DEFAULT_ALGORITHMS] - const optString = getOptString(opts?.options) - - const hashes = algorithms.map(crypto.createHash) - - return { - update: function (chunk, enc) { - hashes.forEach(h => h.update(chunk, enc)) - return this - }, - digest: function () { - const integrity = algorithms.reduce((acc, algo) => { - const digest = hashes.shift().digest('base64') - const hash = new Hash( - `${algo}-${digest}${optString}`, - opts - ) - /* istanbul ignore else - it would be VERY strange if the hash we - * just calculated with an algo did not have an algo or digest. - */ - if (hash.algorithm && hash.digest) { - const hashAlgo = hash.algorithm - if (!acc[hashAlgo]) { - acc[hashAlgo] = [] - } - acc[hashAlgo].push(hash) - } - return acc - }, new Integrity()) - - return integrity - }, - } -} - -const NODE_HASHES = crypto.getHashes() - -// This is a Best Effort™ at a reasonable priority for hash algos -const DEFAULT_PRIORITY = [ - 'md5', 'whirlpool', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', - // TODO - it's unclear _which_ of these Node will actually use as its name - // for the algorithm, so we guesswork it based on the OpenSSL names. - 'sha3', - 'sha3-256', 'sha3-384', 'sha3-512', - 'sha3_256', 'sha3_384', 'sha3_512', -].filter(algo => NODE_HASHES.includes(algo)) - -function getPrioritizedHash (algo1, algo2) { - /* eslint-disable-next-line max-len */ - return DEFAULT_PRIORITY.indexOf(algo1.toLowerCase()) >= DEFAULT_PRIORITY.indexOf(algo2.toLowerCase()) - ? algo1 - : algo2 -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/ssri/package.json b/deps/npm/node_modules/tuf-js/node_modules/ssri/package.json deleted file mode 100644 index 28395414e4643c..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/ssri/package.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "name": "ssri", - "version": "10.0.6", - "description": "Standard Subresource Integrity library -- parses, serializes, generates, and verifies integrity metadata according to the SRI spec.", - "main": "lib/index.js", - "files": [ - "bin/", - "lib/" - ], - "scripts": { - "prerelease": "npm t", - "postrelease": "npm publish", - "posttest": "npm run lint", - "test": "tap", - "coverage": "tap", - "lint": "eslint \"**/*.{js,cjs,ts,mjs,jsx,tsx}\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap" - }, - "tap": { - "check-coverage": true, - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/ssri.git" - }, - "keywords": [ - "w3c", - "web", - "security", - "integrity", - "checksum", - "hashing", - "subresource integrity", - "sri", - "sri hash", - "sri string", - "sri generator", - "html" - ], - "author": "GitHub Inc.", - "license": "ISC", - "dependencies": { - "minipass": "^7.0.3" - }, - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.22.0", - "tap": "^16.0.1" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.22.0", - "publish": "true" - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/unique-filename/LICENSE b/deps/npm/node_modules/tuf-js/node_modules/unique-filename/LICENSE deleted file mode 100644 index 69619c125ea7ef..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/unique-filename/LICENSE +++ /dev/null @@ -1,5 +0,0 @@ -Copyright npm, Inc - -Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/tuf-js/node_modules/unique-filename/lib/index.js b/deps/npm/node_modules/tuf-js/node_modules/unique-filename/lib/index.js deleted file mode 100644 index d067d2e709809a..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/unique-filename/lib/index.js +++ /dev/null @@ -1,7 +0,0 @@ -var path = require('path') - -var uniqueSlug = require('unique-slug') - -module.exports = function (filepath, prefix, uniq) { - return path.join(filepath, (prefix ? prefix + '-' : '') + uniqueSlug(uniq)) -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/unique-filename/package.json b/deps/npm/node_modules/tuf-js/node_modules/unique-filename/package.json deleted file mode 100644 index b2fbf0666489a6..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/unique-filename/package.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "name": "unique-filename", - "version": "3.0.0", - "description": "Generate a unique filename for use in temporary directories or caches.", - "main": "lib/index.js", - "scripts": { - "test": "tap", - "lint": "eslint \"**/*.js\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/unique-filename.git" - }, - "keywords": [], - "author": "GitHub Inc.", - "license": "ISC", - "bugs": { - "url": "https://github.com/iarna/unique-filename/issues" - }, - "homepage": "https://github.com/iarna/unique-filename", - "devDependencies": { - "@npmcli/eslint-config": "^4.0.0", - "@npmcli/template-oss": "4.5.1", - "tap": "^16.3.0" - }, - "dependencies": { - "unique-slug": "^4.0.0" - }, - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.5.1" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/unique-slug/LICENSE b/deps/npm/node_modules/tuf-js/node_modules/unique-slug/LICENSE deleted file mode 100644 index 7953647e7760b8..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/unique-slug/LICENSE +++ /dev/null @@ -1,15 +0,0 @@ -The ISC License - -Copyright npm, Inc - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR -IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/npm/node_modules/tuf-js/node_modules/unique-slug/lib/index.js b/deps/npm/node_modules/tuf-js/node_modules/unique-slug/lib/index.js deleted file mode 100644 index 1bac84d95d7307..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/unique-slug/lib/index.js +++ /dev/null @@ -1,11 +0,0 @@ -'use strict' -var MurmurHash3 = require('imurmurhash') - -module.exports = function (uniq) { - if (uniq) { - var hash = new MurmurHash3(uniq) - return ('00000000' + hash.result().toString(16)).slice(-8) - } else { - return (Math.random().toString(16) + '0000000').slice(2, 10) - } -} diff --git a/deps/npm/node_modules/tuf-js/node_modules/unique-slug/package.json b/deps/npm/node_modules/tuf-js/node_modules/unique-slug/package.json deleted file mode 100644 index 33732cdbb42859..00000000000000 --- a/deps/npm/node_modules/tuf-js/node_modules/unique-slug/package.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "name": "unique-slug", - "version": "4.0.0", - "description": "Generate a unique character string suitible for use in files and URLs.", - "main": "lib/index.js", - "scripts": { - "test": "tap", - "lint": "eslint \"**/*.js\"", - "postlint": "template-oss-check", - "template-oss-apply": "template-oss-apply --force", - "lintfix": "npm run lint -- --fix", - "snap": "tap", - "posttest": "npm run lint" - }, - "keywords": [], - "author": "GitHub Inc.", - "license": "ISC", - "devDependencies": { - "@npmcli/eslint-config": "^3.1.0", - "@npmcli/template-oss": "4.5.1", - "tap": "^16.3.0" - }, - "repository": { - "type": "git", - "url": "https://github.com/npm/unique-slug.git" - }, - "dependencies": { - "imurmurhash": "^0.1.4" - }, - "files": [ - "bin/", - "lib/" - ], - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - }, - "templateOSS": { - "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "4.5.1" - }, - "tap": { - "nyc-arg": [ - "--exclude", - "tap-snapshots/**" - ] - } -} diff --git a/deps/npm/node_modules/tuf-js/package.json b/deps/npm/node_modules/tuf-js/package.json index 9280719230d9ab..e79a3d45f3f06a 100644 --- a/deps/npm/node_modules/tuf-js/package.json +++ b/deps/npm/node_modules/tuf-js/package.json @@ -1,12 +1,12 @@ { "name": "tuf-js", - "version": "2.2.1", + "version": "3.0.1", "description": "JavaScript implementation of The Update Framework (TUF)", "main": "dist/index.js", "types": "dist/index.d.ts", "scripts": { "build": "tsc --build", - "clean": "rm -rf dist", + "clean": "rm -rf dist && rm tsconfig.tsbuildinfo", "test": "jest" }, "repository": { @@ -28,16 +28,16 @@ }, "homepage": "https://github.com/theupdateframework/tuf-js/tree/main/packages/client#readme", "devDependencies": { - "@tufjs/repo-mock": "2.0.1", + "@tufjs/repo-mock": "3.0.1", "@types/debug": "^4.1.12", "@types/make-fetch-happen": "^10.0.4" }, "dependencies": { - "@tufjs/models": "2.0.1", - "debug": "^4.3.4", - "make-fetch-happen": "^13.0.1" + "@tufjs/models": "3.0.1", + "debug": "^4.3.6", + "make-fetch-happen": "^14.0.1" }, "engines": { - "node": "^16.14.0 || >=18.0.0" + "node": "^18.17.0 || >=20.5.0" } } diff --git a/deps/npm/node_modules/wrap-ansi/node_modules/ansi-regex/index.js b/deps/npm/node_modules/wrap-ansi/node_modules/ansi-regex/index.js index 130a0929b8ce8c..ddfdba39a783a4 100644 --- a/deps/npm/node_modules/wrap-ansi/node_modules/ansi-regex/index.js +++ b/deps/npm/node_modules/wrap-ansi/node_modules/ansi-regex/index.js @@ -1,7 +1,9 @@ export default function ansiRegex({onlyFirst = false} = {}) { + // Valid string terminator sequences are BEL, ESC\, and 0x9c + const ST = '(?:\\u0007|\\u001B\\u005C|\\u009C)'; const pattern = [ - '[\\u001B\\u009B][[\\]()#;?]*(?:(?:(?:(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]+)*|[a-zA-Z\\d]+(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)', - '(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))' + `[\\u001B\\u009B][[\\]()#;?]*(?:(?:(?:(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]+)*|[a-zA-Z\\d]+(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?${ST})`, + '(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-nq-uy=><~]))', ].join('|'); return new RegExp(pattern, onlyFirst ? undefined : 'g'); diff --git a/deps/npm/node_modules/wrap-ansi/node_modules/ansi-regex/package.json b/deps/npm/node_modules/wrap-ansi/node_modules/ansi-regex/package.json index 7bbb563bf2a70a..49f3f61021512b 100644 --- a/deps/npm/node_modules/wrap-ansi/node_modules/ansi-regex/package.json +++ b/deps/npm/node_modules/wrap-ansi/node_modules/ansi-regex/package.json @@ -1,6 +1,6 @@ { "name": "ansi-regex", - "version": "6.0.1", + "version": "6.1.0", "description": "Regular expression for matching ANSI escape codes", "license": "MIT", "repository": "chalk/ansi-regex", @@ -12,6 +12,8 @@ }, "type": "module", "exports": "./index.js", + "types": "./index.d.ts", + "sideEffects": false, "engines": { "node": ">=12" }, @@ -51,8 +53,9 @@ "pattern" ], "devDependencies": { + "ansi-escapes": "^5.0.0", "ava": "^3.15.0", - "tsd": "^0.14.0", - "xo": "^0.38.2" + "tsd": "^0.21.0", + "xo": "^0.54.2" } } diff --git a/deps/npm/package.json b/deps/npm/package.json index c92578506b30ac..8d01af4a7ce8d0 100644 --- a/deps/npm/package.json +++ b/deps/npm/package.json @@ -1,5 +1,5 @@ { - "version": "10.9.0", + "version": "10.9.2", "name": "npm", "description": "a package manager for JavaScript", "workspaces": [ @@ -55,25 +55,25 @@ "@npmcli/arborist": "^8.0.0", "@npmcli/config": "^9.0.0", "@npmcli/fs": "^4.0.0", - "@npmcli/map-workspaces": "^4.0.1", - "@npmcli/package-json": "^6.0.1", - "@npmcli/promise-spawn": "^8.0.1", + "@npmcli/map-workspaces": "^4.0.2", + "@npmcli/package-json": "^6.1.0", + "@npmcli/promise-spawn": "^8.0.2", "@npmcli/redact": "^3.0.0", "@npmcli/run-script": "^9.0.1", - "@sigstore/tuf": "^2.3.4", + "@sigstore/tuf": "^3.0.0", "abbrev": "^3.0.0", "archy": "~1.0.0", "cacache": "^19.0.1", "chalk": "^5.3.0", - "ci-info": "^4.0.0", + "ci-info": "^4.1.0", "cli-columns": "^4.0.0", "fastest-levenshtein": "^1.0.16", "fs-minipass": "^3.0.3", "glob": "^10.4.5", "graceful-fs": "^4.2.11", - "hosted-git-info": "^8.0.0", + "hosted-git-info": "^8.0.2", "ini": "^5.0.0", - "init-package-json": "^7.0.1", + "init-package-json": "^7.0.2", "is-cidr": "^5.1.0", "json-parse-even-better-errors": "^4.0.0", "libnpmaccess": "^9.0.0", @@ -83,27 +83,27 @@ "libnpmhook": "^11.0.0", "libnpmorg": "^7.0.0", "libnpmpack": "^8.0.0", - "libnpmpublish": "^10.0.0", + "libnpmpublish": "^10.0.1", "libnpmsearch": "^8.0.0", "libnpmteam": "^7.0.0", "libnpmversion": "^7.0.0", - "make-fetch-happen": "^14.0.1", + "make-fetch-happen": "^14.0.3", "minimatch": "^9.0.5", "minipass": "^7.1.1", "minipass-pipeline": "^1.2.4", "ms": "^2.1.2", - "node-gyp": "^10.2.0", + "node-gyp": "^11.0.0", "nopt": "^8.0.0", "normalize-package-data": "^7.0.0", "npm-audit-report": "^6.0.0", - "npm-install-checks": "^7.1.0", + "npm-install-checks": "^7.1.1", "npm-package-arg": "^12.0.0", "npm-pick-manifest": "^10.0.0", "npm-profile": "^11.0.1", - "npm-registry-fetch": "^18.0.1", + "npm-registry-fetch": "^18.0.2", "npm-user-validate": "^3.0.0", "p-map": "^4.0.0", - "pacote": "^19.0.0", + "pacote": "^19.0.1", "parse-conflict-json": "^4.0.0", "proc-log": "^5.0.0", "qrcode-terminal": "^0.12.0", diff --git a/deps/openssl/openssl.gyp b/deps/openssl/openssl.gyp index f6b157f8d60813..ea3a2dc09ef29b 100644 --- a/deps/openssl/openssl.gyp +++ b/deps/openssl/openssl.gyp @@ -5,19 +5,13 @@ 'nasm_version%': '0.0', 'openssl-cli': '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)openssl-cli<(EXECUTABLE_SUFFIX)', 'conditions': [ - ['OS == "win"', { - 'obj_dir_abs': '<(PRODUCT_DIR_ABS)/obj', - }], ['GENERATOR == "ninja"', { - 'obj_dir_abs': '<(PRODUCT_DIR_ABS)/obj', - 'modules_dir': '<(PRODUCT_DIR_ABS)/obj/lib/openssl-modules', + 'modules_dir': '<(PRODUCT_DIR_ABS_CSTR)/obj/lib/openssl-modules', }, { - 'obj_dir_abs%': '<(PRODUCT_DIR_ABS)/obj.target', - 'modules_dir': '<(PRODUCT_DIR_ABS)/obj.target/deps/openssl/lib/openssl-modules', + 'modules_dir': '<(PRODUCT_DIR_ABS_CSTR)/obj.target/deps/openssl/lib/openssl-modules', }], ['OS=="mac"', { - 'obj_dir_abs%': '<(PRODUCT_DIR_ABS)/obj.target', - 'modules_dir': '<(PRODUCT_DIR_ABS)/obj.target/deps/openssl/lib/openssl-modules', + 'modules_dir': '<(PRODUCT_DIR_ABS_CSTR)/obj.target/deps/openssl/lib/openssl-modules', }], ], }, diff --git a/deps/simdjson/simdjson.cpp b/deps/simdjson/simdjson.cpp index b5dd7243853217..d79fc703a815ca 100644 --- a/deps/simdjson/simdjson.cpp +++ b/deps/simdjson/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-08-01 09:31:50 -0400. Do not edit! */ +/* auto-generated on 2024-08-26 09:37:03 -0400. Do not edit! */ /* including simdjson.cpp: */ /* begin file simdjson.cpp */ #define SIMDJSON_SRC_SIMDJSON_CPP @@ -332,6 +332,8 @@ double from_chars(const char *first, const char* end) noexcept; #define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) #if SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __declspec(deprecated) #define simdjson_really_inline __forceinline #define simdjson_never_inline __declspec(noinline) @@ -370,6 +372,8 @@ double from_chars(const char *first, const char* end) noexcept; #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS #else // SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __attribute__((deprecated)) #define simdjson_really_inline inline __attribute__((always_inline)) #define simdjson_never_inline inline __attribute__((noinline)) diff --git a/deps/simdjson/simdjson.h b/deps/simdjson/simdjson.h index ddb6f2e4e0a6ed..f21cd9381eef59 100644 --- a/deps/simdjson/simdjson.h +++ b/deps/simdjson/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-08-01 09:31:50 -0400. Do not edit! */ +/* auto-generated on 2024-08-26 09:37:03 -0400. Do not edit! */ /* including simdjson.h: */ /* begin file simdjson.h */ #ifndef SIMDJSON_H @@ -352,6 +352,8 @@ double from_chars(const char *first, const char* end) noexcept; #define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) #if SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __declspec(deprecated) #define simdjson_really_inline __forceinline #define simdjson_never_inline __declspec(noinline) @@ -390,6 +392,8 @@ double from_chars(const char *first, const char* end) noexcept; #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS #else // SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __attribute__((deprecated)) #define simdjson_really_inline inline __attribute__((always_inline)) #define simdjson_never_inline inline __attribute__((noinline)) @@ -2366,7 +2370,7 @@ namespace std { #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION "3.10.0" +#define SIMDJSON_VERSION "3.10.1" namespace simdjson { enum { @@ -2381,7 +2385,7 @@ enum { /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 0 + SIMDJSON_VERSION_REVISION = 1 }; } // namespace simdjson @@ -34014,7 +34018,7 @@ class parser { private: /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -34557,7 +34561,7 @@ class document { " You may also add support for custom types, see our documentation."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -34580,7 +34584,7 @@ class document { */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -34593,7 +34597,10 @@ class document { * @returns An instance of type T */ template - explicit simdjson_inline operator T() noexcept(false); + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -35159,7 +35166,7 @@ struct simdjson_result : public arm64::implementation simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; @@ -35599,7 +35606,7 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand @@ -36561,16 +36568,14 @@ simdjson_inline simdjson_result &simdjson_resul /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -36725,24 +36730,26 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } -template simdjson_inline error_code document::get(T &out) && noexcept { +template simdjson_deprecated simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS template -simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -37029,7 +37036,7 @@ simdjson_inline simdjson_result simdjson_result::g return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } @@ -37045,7 +37052,7 @@ simdjson_inline error_code simdjson_result::get(T &ou } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } @@ -40014,8 +40021,6 @@ simdjson_inline simdjson_result::simdjson_resul /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ @@ -40561,9 +40566,9 @@ simdjson_inline simdjson_result simdjson_result implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -45023,7 +45028,7 @@ class document { " You may also add support for custom types, see our documentation."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -45046,7 +45051,7 @@ class document { */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -45059,7 +45064,10 @@ class document { * @returns An instance of type T */ template - explicit simdjson_inline operator T() noexcept(false); + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -45625,7 +45633,7 @@ struct simdjson_result : public fallback::implemen simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; @@ -46065,7 +46073,7 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand @@ -47027,16 +47035,14 @@ simdjson_inline simdjson_result &simdjson_re /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -47191,24 +47197,26 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } -template simdjson_inline error_code document::get(T &out) && noexcept { +template simdjson_deprecated simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS template -simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -47495,7 +47503,7 @@ simdjson_inline simdjson_result simdjson_result return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } @@ -47511,7 +47519,7 @@ simdjson_inline error_code simdjson_result::get(T } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } @@ -50480,8 +50488,6 @@ simdjson_inline simdjson_result::simdjson_re /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ @@ -51027,9 +51033,9 @@ simdjson_inline simdjson_result simdjson_result implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -55981,7 +55987,7 @@ class document { " You may also add support for custom types, see our documentation."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -56004,7 +56010,7 @@ class document { */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -56017,7 +56023,10 @@ class document { * @returns An instance of type T */ template - explicit simdjson_inline operator T() noexcept(false); + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -56583,7 +56592,7 @@ struct simdjson_result : public haswell::implementa simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; @@ -57023,7 +57032,7 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand @@ -57985,16 +57994,14 @@ simdjson_inline simdjson_result &simdjson_res /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -58149,24 +58156,26 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } -template simdjson_inline error_code document::get(T &out) && noexcept { +template simdjson_deprecated simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS template -simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -58453,7 +58462,7 @@ simdjson_inline simdjson_result simdjson_result: return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } @@ -58469,7 +58478,7 @@ simdjson_inline error_code simdjson_result::get(T & } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } @@ -61438,8 +61447,6 @@ simdjson_inline simdjson_result::simdjson_res /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ @@ -61985,9 +61992,9 @@ simdjson_inline simdjson_result simdjson_result implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -66938,7 +66945,7 @@ class document { " You may also add support for custom types, see our documentation."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -66961,7 +66968,7 @@ class document { */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -66974,7 +66981,10 @@ class document { * @returns An instance of type T */ template - explicit simdjson_inline operator T() noexcept(false); + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -67540,7 +67550,7 @@ struct simdjson_result : public icelake::implementa simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; @@ -67980,7 +67990,7 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand @@ -68942,16 +68952,14 @@ simdjson_inline simdjson_result &simdjson_res /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -69106,24 +69114,26 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } -template simdjson_inline error_code document::get(T &out) && noexcept { +template simdjson_deprecated simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS template -simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -69410,7 +69420,7 @@ simdjson_inline simdjson_result simdjson_result: return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } @@ -69426,7 +69436,7 @@ simdjson_inline error_code simdjson_result::get(T & } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } @@ -72395,8 +72405,6 @@ simdjson_inline simdjson_result::simdjson_res /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ @@ -72942,9 +72950,9 @@ simdjson_inline simdjson_result simdjson_result implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -78010,7 +78018,7 @@ class document { " You may also add support for custom types, see our documentation."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -78033,7 +78041,7 @@ class document { */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -78046,7 +78054,10 @@ class document { * @returns An instance of type T */ template - explicit simdjson_inline operator T() noexcept(false); + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -78612,7 +78623,7 @@ struct simdjson_result : public ppc64::implementation simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; @@ -79052,7 +79063,7 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand @@ -80014,16 +80025,14 @@ simdjson_inline simdjson_result &simdjson_resul /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -80178,24 +80187,26 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } -template simdjson_inline error_code document::get(T &out) && noexcept { +template simdjson_deprecated simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS template -simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -80482,7 +80493,7 @@ simdjson_inline simdjson_result simdjson_result::g return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } @@ -80498,7 +80509,7 @@ simdjson_inline error_code simdjson_result::get(T &ou } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } @@ -83467,8 +83478,6 @@ simdjson_inline simdjson_result::simdjson_resul /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ @@ -84014,9 +84023,9 @@ simdjson_inline simdjson_result simdjson_result implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -89405,7 +89414,7 @@ class document { " You may also add support for custom types, see our documentation."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -89428,7 +89437,7 @@ class document { */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -89441,7 +89450,10 @@ class document { * @returns An instance of type T */ template - explicit simdjson_inline operator T() noexcept(false); + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -90007,7 +90019,7 @@ struct simdjson_result : public westmere::implemen simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; @@ -90447,7 +90459,7 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand @@ -91409,16 +91421,14 @@ simdjson_inline simdjson_result &simdjson_re /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -91573,24 +91583,26 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } -template simdjson_inline error_code document::get(T &out) && noexcept { +template simdjson_deprecated simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS template -simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -91877,7 +91889,7 @@ simdjson_inline simdjson_result simdjson_result return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } @@ -91893,7 +91905,7 @@ simdjson_inline error_code simdjson_result::get(T } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } @@ -94862,8 +94874,6 @@ simdjson_inline simdjson_result::simdjson_re /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ @@ -95409,9 +95419,9 @@ simdjson_inline simdjson_result simdjson_result implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -100271,7 +100281,7 @@ class document { " You may also add support for custom types, see our documentation."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -100294,7 +100304,7 @@ class document { */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -100307,7 +100317,10 @@ class document { * @returns An instance of type T */ template - explicit simdjson_inline operator T() noexcept(false); + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -100873,7 +100886,7 @@ struct simdjson_result : public lsx::implementation_sim simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; @@ -101313,7 +101326,7 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand @@ -102275,16 +102288,14 @@ simdjson_inline simdjson_result &simdjson_result< /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -102439,24 +102450,26 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } -template simdjson_inline error_code document::get(T &out) && noexcept { +template simdjson_deprecated simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS template -simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -102743,7 +102756,7 @@ simdjson_inline simdjson_result simdjson_result::get return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } @@ -102759,7 +102772,7 @@ simdjson_inline error_code simdjson_result::get(T &out) } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } @@ -105728,8 +105741,6 @@ simdjson_inline simdjson_result::simdjson_result( /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ @@ -106275,9 +106286,9 @@ simdjson_inline simdjson_result simdjson_result implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -111150,7 +111161,7 @@ class document { " You may also add support for custom types, see our documentation."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -111173,7 +111184,7 @@ class document { */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -111186,7 +111197,10 @@ class document { * @returns An instance of type T */ template - explicit simdjson_inline operator T() noexcept(false); + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -111752,7 +111766,7 @@ struct simdjson_result : public lasx::implementation_s simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; @@ -112192,7 +112206,7 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand @@ -113154,16 +113168,14 @@ simdjson_inline simdjson_result &simdjson_result /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -113318,24 +113330,26 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } -template simdjson_inline error_code document::get(T &out) && noexcept { +template simdjson_deprecated simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS template -simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -113622,7 +113636,7 @@ simdjson_inline simdjson_result simdjson_result::ge return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } @@ -113638,7 +113652,7 @@ simdjson_inline error_code simdjson_result::get(T &out } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } @@ -116607,8 +116621,6 @@ simdjson_inline simdjson_result::simdjson_result /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ @@ -117154,9 +117166,9 @@ simdjson_inline simdjson_result simdjson_result + +#endif // SIMDUTF_LSX_INTRINSICS_H +/* end file src/simdutf/lsx/intrinsics.h */ +/* begin file src/simdutf/lsx/bitmanipulation.h */ +#ifndef SIMDUTF_LSX_BITMANIPULATION_H +#define SIMDUTF_LSX_BITMANIPULATION_H #include namespace simdutf { -namespace fallback { -namespace {} // unnamed namespace -} // namespace fallback -} // namespace simdutf +namespace lsx { +namespace { -#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H -/* end file src/simdutf/fallback/bitmanipulation.h */ +simdutf_really_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); +} -/* begin file src/simdutf/fallback/end.h */ -/* end file src/simdutf/fallback/end.h */ +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} +#endif -#endif // SIMDUTF_IMPLEMENTATION_FALLBACK -#endif // SIMDUTF_FALLBACK_H -/* end file src/simdutf/fallback.h */ +} // unnamed namespace +} // namespace lsx +} // namespace simdutf -/* begin file src/scalar/utf8.h */ -#ifndef SIMDUTF_UTF8_H -#define SIMDUTF_UTF8_H +#endif // SIMDUTF_LSX_BITMANIPULATION_H +/* end file src/simdutf/lsx/bitmanipulation.h */ +/* begin file src/simdutf/lsx/simd.h */ +#ifndef SIMDUTF_LSX_SIMD_H +#define SIMDUTF_LSX_SIMD_H + +#include namespace simdutf { -namespace scalar { +namespace lsx { namespace { -namespace utf8 { -#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_RVV -// only used by the fallback kernel. -// credit: based on code from Google Fuchsia (Apache Licensed) -inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 16 bytes are ascii. - uint64_t next_pos = pos + 16; - if (next_pos <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; +namespace simd { - while (byte < 0b10000000) { - if (++pos == len) { - return true; - } - byte = data[pos]; - } +template struct simd8; - if ((byte & 0b11100000) == 0b11000000) { - next_pos = pos + 2; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if ((code_point < 0x80) || (0x7ff < code_point)) { - return false; - } - } else if ((byte & 0b11110000) == 0b11100000) { - next_pos = pos + 3; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point) || - (0xd7ff < code_point && code_point < 0xe000)) { - return false; - } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return false; - } - } else { - // we may have a continuation - return false; - } - pos = next_pos; - } - return true; -} -#endif +// +// Base class of simd8 and simd8, both of which use __m128i +// internally. +// +template > struct base_u8 { + __m128i value; + static const int SIZE = sizeof(value); -inline simdutf_warn_unused result validate_with_errors(const char *buf, - size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 16 bytes are ascii. - size_t next_pos = pos + 16; - if (next_pos <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; + // Conversion from/to SIMD register + simdutf_really_inline base_u8(const __m128i _value) : value(_value) {} + simdutf_really_inline operator const __m128i &() const { return this->value; } + simdutf_really_inline operator __m128i &() { return this->value; } + simdutf_really_inline T first() const { + return __lsx_vpickve2gr_bu(this->value, 0); + } + simdutf_really_inline T last() const { + return __lsx_vpickve2gr_bu(this->value, 15); + } - while (byte < 0b10000000) { - if (++pos == len) { - return result(error_code::SUCCESS, len); - } - byte = data[pos]; - } + // Bit operations + simdutf_really_inline simd8 operator|(const simd8 other) const { + return __lsx_vor_v(this->value, other); + } + simdutf_really_inline simd8 operator&(const simd8 other) const { + return __lsx_vand_v(this->value, other); + } + simdutf_really_inline simd8 operator^(const simd8 other) const { + return __lsx_vxor_v(this->value, other); + } + simdutf_really_inline simd8 bit_andnot(const simd8 other) const { + return __lsx_vandn_v(this->value, other); + } + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdutf_really_inline simd8 &operator|=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdutf_really_inline simd8 &operator&=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdutf_really_inline simd8 &operator^=(const simd8 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } - if ((byte & 0b11100000) == 0b11000000) { - next_pos = pos + 2; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if ((code_point < 0x80) || (0x7ff < code_point)) { - return result(error_code::OVERLONG, pos); - } - } else if ((byte & 0b11110000) == 0b11100000) { - next_pos = pos + 3; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point)) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } else { - return result(error_code::HEADER_BITS, pos); - } - } - pos = next_pos; + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return __lsx_vseq_b(lhs, rhs); } - return result(error_code::SUCCESS, len); -} -// Finds the previous leading byte starting backward from buf and validates with -// errors from there Used to pinpoint the location of an error when an invalid -// chunk is detected We assume that the stream starts with a leading byte, and -// to check that it is the case, we ask that you pass a pointer to the start of -// the stream (start). -inline simdutf_warn_unused result rewind_and_validate_with_errors( - const char *start, const char *buf, size_t len) noexcept { - // First check that we start with a leading byte - if ((*start & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, 0); + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(this->value, N), + __lsx_vbsrl_v(prev_chunk.value, 16 - N)); } - size_t extra_len{0}; - // A leading byte cannot be further than 4 bytes away - for (int i = 0; i < 5; i++) { - unsigned char byte = *buf; - if ((byte & 0b11000000) != 0b10000000) { - break; - } else { - buf--; - extra_len++; - } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdutf_really_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); } - result res = validate_with_errors(buf, len + extra_len); - res.count -= extra_len; - return res; -} + simdutf_really_inline simd8(const __m128i _value) : base_u8(_value) {} + // False constructor + simdutf_really_inline simd8() : simd8(__lsx_vldi(0)) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} + simdutf_really_inline void store(uint8_t dst[16]) const { + return __lsx_vst(this->value, dst, 0); + } -inline size_t count_code_points(const char *buf, size_t len) { - const int8_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - // -65 is 0b10111111, anything larger in two-complement's should start a new - // code point. - if (p[i] > -65) { - counter++; - } + simdutf_really_inline uint32_t to_bitmask() const { + return __lsx_vpickve2gr_wu(__lsx_vmsknz_b(*this), 0); } - return counter; -} -inline size_t utf16_length_from_utf8(const char *buf, size_t len) { - const int8_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - if (p[i] > -65) { - counter++; - } - if (uint8_t(p[i]) >= 240) { - counter++; - } + simdutf_really_inline bool any() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) != 0; } - return counter; -} + simdutf_really_inline bool none() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) == 0; + } + simdutf_really_inline bool all() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) == 0xFFFF; + } +}; -simdutf_warn_unused inline size_t trim_partial_utf8(const char *input, - size_t length) { - if (length < 3) { - switch (length) { - case 2: - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 2]) >= 0xe0) { - return length - 2; - } // 3- and 4-byte characters with only 2 bytes left - return length; - case 1: - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - return length; - case 0: - return length; - } +// Unsigned bytes +template <> struct simd8 : base_u8 { + static simdutf_really_inline simd8 splat(uint8_t _value) { + return __lsx_vreplgr2vr_b(_value); } - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 2]) >= 0xe0) { - return length - 2; - } // 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 3]) >= 0xf0) { - return length - 3; - } // 4-byte characters with only 3 bytes left - return length; -} + static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd8 load(const uint8_t *values) { + return __lsx_vld(values, 0); + } + simdutf_really_inline simd8(const __m128i _value) + : base_u8(_value) {} + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization -} // namespace utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i)v16u8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15}) {} -#endif -/* end file src/scalar/utf8.h */ -/* begin file src/scalar/utf16.h */ -#ifndef SIMDUTF_UTF16_H -#define SIMDUTF_UTF16_H + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } -namespace simdutf { -namespace scalar { -namespace { -namespace utf16 { + // Store to array + simdutf_really_inline void store(uint8_t dst[16]) const { + return __lsx_vst(this->value, dst, 0); + } -inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) { - return uint16_t((word >> 8) | (word << 8)); -} + // Saturated math + simdutf_really_inline simd8 + saturating_add(const simd8 other) const { + return __lsx_vsadd_bu(this->value, other); + } + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return __lsx_vssub_bu(this->value, other); + } -template -inline simdutf_warn_unused bool validate(const char16_t *buf, - size_t len) noexcept { - const uint16_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) == 0xD800) { - if (pos + 1 >= len) { - return false; - } - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return false; - } - uint16_t next_word = - !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return false; - } - pos += 2; - } else { - pos++; - } + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 + operator+(const simd8 other) const { + return __lsx_vadd_b(this->value, other); } - return true; -} - -template -inline simdutf_warn_unused result validate_with_errors(const char16_t *buf, - size_t len) noexcept { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) == 0xD800) { - if (pos + 1 >= len) { - return result(error_code::SURROGATE, pos); - } - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - uint16_t next_word = - !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - pos += 2; - } else { - pos++; - } + simdutf_really_inline simd8 + operator-(const simd8 other) const { + return __lsx_vsub_b(this->value, other); } - return result(error_code::SUCCESS, pos); -} - -template -inline size_t count_code_points(const char16_t *buf, size_t len) { - // We are not BOM aware. - const uint16_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; - counter += ((word & 0xFC00) != 0xDC00); + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *this; } - return counter; -} - -template -inline size_t utf8_length_from_utf16(const char16_t *buf, size_t len) { - // We are not BOM aware. - const uint16_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; - counter++; // ASCII - counter += static_cast( - word > - 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes - counter += static_cast((word > 0x7FF && word <= 0xD7FF) || - (word >= 0xE000)); // three-byte + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *this; } - return counter; -} -template -inline size_t utf32_length_from_utf16(const char16_t *buf, size_t len) { - // We are not BOM aware. - const uint16_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; - counter += ((word & 0xFC00) != 0xDC00); + // Order-specific operations + simdutf_really_inline simd8 + max_val(const simd8 other) const { + return __lsx_vmax_bu(*this, other); + } + simdutf_really_inline simd8 + min_val(const simd8 other) const { + return __lsx_vmin_bu(*this, other); + } + simdutf_really_inline simd8 + operator<=(const simd8 other) const { + return __lsx_vsle_bu(*this, other); + } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return __lsx_vsle_bu(other, *this); + } + simdutf_really_inline simd8 + operator<(const simd8 other) const { + return __lsx_vslt_bu(*this, other); + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return __lsx_vslt_bu(other, *this); + } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true + // = nonzero. For ARM, returns all 1's. + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return simd8(*this > other); + } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true + // = nonzero. For ARM, returns all 1's. + simdutf_really_inline simd8 + lt_bits(const simd8 other) const { + return simd8(*this < other); } - return counter; -} -inline size_t latin1_length_from_utf16(size_t len) { return len; } + // Bit-specific operations + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return __lsx_vslt_bu(__lsx_vldi(0), __lsx_vand_v(this->value, bits)); + } + simdutf_really_inline bool is_ascii() const { + return __lsx_vpickve2gr_hu(__lsx_vmskgez_b(this->value), 0) == 0xFFFF; + } -simdutf_really_inline void change_endianness_utf16(const char16_t *in, - size_t size, char16_t *out) { - const uint16_t *input = reinterpret_cast(in); - uint16_t *output = reinterpret_cast(out); - for (size_t i = 0; i < size; i++) { - *output++ = uint16_t(input[i] >> 8 | input[i] << 8); + simdutf_really_inline bool any_bits_set_anywhere() const { + return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(this->value), 0) > 0; + } + simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { + return (*this & bits).any_bits_set_anywhere(); + } + template simdutf_really_inline simd8 shr() const { + return __lsx_vsrli_b(this->value, N); + } + template simdutf_really_inline simd8 shl() const { + return __lsx_vslli_b(this->value, N); } -} -template -simdutf_warn_unused inline size_t trim_partial_utf16(const char16_t *input, - size_t length) { - if (length <= 1) { - return length; + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); } - uint16_t last_word = uint16_t(input[length - 1]); - last_word = !match_system(big_endian) ? swap_bytes(last_word) : last_word; - length -= ((last_word & 0xFC00) == 0xD800); - return length; -} -} // namespace utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } -#endif -/* end file src/scalar/utf16.h */ -/* begin file src/scalar/utf32.h */ -#ifndef SIMDUTF_UTF32_H -#define SIMDUTF_UTF32_H + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); + return __lsx_vshuf_b(__lsx_vldi(0), *this, simd8(original_tmp)); + } +}; -namespace simdutf { -namespace scalar { -namespace { -namespace utf32 { +// Signed bytes +template <> struct simd8 { + __m128i value; -inline simdutf_warn_unused bool validate(const char32_t *buf, - size_t len) noexcept { - const uint32_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - for (; pos < len; pos++) { - uint32_t word = data[pos]; - if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { - return false; - } + static simdutf_really_inline simd8 splat(int8_t _value) { + return __lsx_vreplgr2vr_b(_value); + } + static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd8 load(const int8_t values[16]) { + return __lsx_vld(values, 0); } - return true; -} -inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, - size_t len) noexcept { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - for (; pos < len; pos++) { - uint32_t word = data[pos]; - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); - } - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { + __m128i zero = __lsx_vldi(0); + if (match_system(big_endian)) { + __lsx_vst(__lsx_vilvl_b(zero, (__m128i)this->value), + reinterpret_cast(p), 0); + __lsx_vst(__lsx_vilvh_b(zero, (__m128i)this->value), + reinterpret_cast(p + 8), 0); + } else { + __lsx_vst(__lsx_vilvl_b((__m128i)this->value, zero), + reinterpret_cast(p), 0); + __lsx_vst(__lsx_vilvh_b((__m128i)this->value, zero), + reinterpret_cast(p + 8), 0); } } - return result(error_code::SUCCESS, pos); -} -inline size_t utf8_length_from_utf32(const char32_t *buf, size_t len) { - // We are not BOM aware. - const uint32_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - // credit: @ttsugriy for the vectorizable approach - counter++; // ASCII - counter += static_cast(p[i] > 0x7F); // two-byte - counter += static_cast(p[i] > 0x7FF); // three-byte - counter += static_cast(p[i] > 0xFFFF); // four-bytes + simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, (__m128i)this->value); + __m128i in16high = __lsx_vilvh_b(zero, (__m128i)this->value); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + __lsx_vst(in32_0, reinterpret_cast(p), 0); + __lsx_vst(in32_1, reinterpret_cast(p + 4), 0); + __lsx_vst(in32_2, reinterpret_cast(p + 8), 0); + __lsx_vst(in32_3, reinterpret_cast(p + 12), 0); } - return counter; -} -inline size_t utf16_length_from_utf32(const char32_t *buf, size_t len) { - // We are not BOM aware. - const uint32_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - counter++; // non-surrogate word - counter += static_cast(p[i] > 0xFFFF); // surrogate pair - } - return counter; -} + // In places where the table can be reused, which is most uses in simdutf, it + // is worth it to do 4 table lookups, as there is no direct zero extension + // from u8 to u32. + simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { + const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, + 2, 255, 255, 255, 3, 255, 255, 255}; + const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, + 6, 255, 255, 255, 7, 255, 255, 255}; + const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, + 10, 255, 255, 255, 11, 255, 255, 255}; + const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, + 14, 255, 255, 255, 15, 255, 255, 255}; -inline size_t latin1_length_from_utf32(size_t len) { - // We are not BOM aware. - return len; // a utf32 codepoint will always represent 1 latin1 character -} + // encourage store pairing and interleaving + const auto shuf1 = this->apply_lookup_16_to(tb1); + const auto shuf2 = this->apply_lookup_16_to(tb2); + shuf1.store(reinterpret_cast(p)); + shuf2.store(reinterpret_cast(p + 4)); -inline simdutf_warn_unused uint32_t swap_bytes(const uint32_t word) { - return ((word >> 24) & 0xff) | // move byte 3 to byte 0 - ((word << 8) & 0xff0000) | // move byte 1 to byte 2 - ((word >> 8) & 0xff00) | // move byte 2 to byte 1 - ((word << 24) & 0xff000000); // byte 0 to byte 3 -} + const auto shuf3 = this->apply_lookup_16_to(tb3); + const auto shuf4 = this->apply_lookup_16_to(tb4); + shuf3.store(reinterpret_cast(p + 8)); + shuf4.store(reinterpret_cast(p + 12)); + } + // Conversion from/to SIMD register + simdutf_really_inline simd8(const __m128i _value) : value(_value) {} + simdutf_really_inline operator const __m128i &() const { return this->value; } -} // namespace utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + simdutf_really_inline operator const __m128i() const { return this->value; } -#endif -/* end file src/scalar/utf32.h */ -/* begin file src/scalar/base64.h */ -#ifndef SIMDUTF_BASE64_H -#define SIMDUTF_BASE64_H + simdutf_really_inline operator __m128i &() { return this->value; } -#include -#include -#include -#include + // Zero constructor + simdutf_really_inline simd8() : simd8(zero()) {} + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization -namespace simdutf { -namespace scalar { -namespace { -namespace base64 { + simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)v16i8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15}) {} -// This function is not expected to be fast. Do not use in long loops. -template bool is_ascii_white_space(char_type c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; -} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } -template bool is_eight_byte(char_type c) { - if (sizeof(char_type) == 1) { - return true; + // Store to array + simdutf_really_inline void store(int8_t dst[16]) const { + return __lsx_vst(value, dst, 0); } - return uint8_t(c) == c; -} -// Returns true upon success. The destination buffer must be large enough. -// This functions assumes that the padding (=) has been removed. -template -full_result -base64_tail_decode(char *dst, const char_type *src, size_t length, - size_t padded_characters, // number of padding characters - // '=', typically 0, 1, 2. - base64_options options, - last_chunk_handling_options last_chunk_options) { - // This looks like 5 branches, but we expect the compiler to resolve this to a - // single branch: - const uint8_t *to_base64 = (options & base64_url) - ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - const uint32_t *d0 = (options & base64_url) - ? tables::base64::base64_url::d0 - : tables::base64::base64_default::d0; - const uint32_t *d1 = (options & base64_url) - ? tables::base64::base64_url::d1 - : tables::base64::base64_default::d1; - const uint32_t *d2 = (options & base64_url) - ? tables::base64::base64_url::d2 - : tables::base64::base64_default::d2; - const uint32_t *d3 = (options & base64_url) - ? tables::base64::base64_url::d3 - : tables::base64::base64_default::d3; + simdutf_really_inline operator simd8() const { + return ((__m128i)this->value); + } - const char_type *srcend = src + length; - const char_type *srcinit = src; - const char *dstinit = dst; + simdutf_really_inline simd8 + operator|(const simd8 other) const { + return __lsx_vor_v((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + operator&(const simd8 other) const { + return __lsx_vand_v((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + operator^(const simd8 other) const { + return __lsx_vxor_v((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + bit_andnot(const simd8 other) const { + return __lsx_vandn_v((__m128i)other.value, (__m128i)value); + } - uint32_t x; - size_t idx; - uint8_t buffer[4]; - while (true) { - while (src + 4 <= srcend && is_eight_byte(src[0]) && - is_eight_byte(src[1]) && is_eight_byte(src[2]) && - is_eight_byte(src[3]) && - (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | - d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { - if (match_system(endianness::BIG)) { - x = scalar::utf32::swap_bytes(x); - } - std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes - dst += 3; - src += 4; - } - idx = 0; - // we need at least four characters. - while (idx < 4 && src < srcend) { - char_type c = *src; - uint8_t code = to_base64[uint8_t(c)]; - buffer[idx] = uint8_t(code); - if (is_eight_byte(c) && code <= 63) { - idx++; - } else if (code > 64 || !scalar::base64::is_eight_byte(c)) { - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else { - // We have a space or a newline. We ignore it. - } - src++; - } - if (idx != 4) { - if (last_chunk_options == last_chunk_handling_options::strict && - (idx != 1) && ((idx + padded_characters) & 3) != 0) { - // The partial chunk was at src - idx - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial && - (idx != 1) && ((idx + padded_characters) & 3) != 0) { - // Rewind src to before partial chunk - src -= idx; - return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; - } else { - if (idx == 2) { - uint32_t triple = - (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6); - if ((last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xffff)) { - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 1); - } else { - triple = scalar::utf32::swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 1); - } - dst += 1; - } else if (idx == 3) { - uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + - (uint32_t(buffer[1]) << 2 * 6) + - (uint32_t(buffer[2]) << 1 * 6); - if ((last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xff)) { - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 2); - } else { - triple = scalar::utf32::swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 2); - } - dst += 2; - } else if (idx == 1) { - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; - } - } + // Math + simdutf_really_inline simd8 + operator+(const simd8 other) const { + return __lsx_vadd_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + operator-(const simd8 other) const { + return __lsx_vsub_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *this; + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *this; + } - uint32_t triple = - (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + - (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6); - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 3); - } else { - triple = scalar::utf32::swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 3); - } - dst += 3; + simdutf_really_inline bool is_ascii() const { + return (__lsx_vpickve2gr_hu(__lsx_vmskgez_b((__m128i)this->value), 0) == + 0xffff); } -} -// like base64_tail_decode, but it will not write past the end of the output -// buffer. The outlen paramter is modified to reflect the number of bytes -// written. This functions assumes that the padding (=) has been removed. -template -result base64_tail_decode_safe( - char *dst, size_t &outlen, const char_type *&srcr, size_t length, - size_t padded_characters, // number of padding characters '=', typically 0, - // 1, 2. - base64_options options, last_chunk_handling_options last_chunk_options) { - const char_type *src = srcr; - if (length == 0) { - outlen = 0; - return {SUCCESS, 0}; + // Order-sensitive comparisons + simdutf_really_inline simd8 max_val(const simd8 other) const { + return __lsx_vmax_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 min_val(const simd8 other) const { + return __lsx_vmin_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 operator>(const simd8 other) const { + return __lsx_vslt_b((__m128i)other.value, (__m128i)value); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return __lsx_vslt_b((__m128i)value, (__m128i)other.value); + } + simdutf_really_inline simd8 + operator==(const simd8 other) const { + return __lsx_vseq_b((__m128i)value, (__m128i)other.value); } - // This looks like 5 branches, but we expect the compiler to resolve this to a - // single branch: - const uint8_t *to_base64 = (options & base64_url) - ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - const uint32_t *d0 = (options & base64_url) - ? tables::base64::base64_url::d0 - : tables::base64::base64_default::d0; - const uint32_t *d1 = (options & base64_url) - ? tables::base64::base64_url::d1 - : tables::base64::base64_default::d1; - const uint32_t *d2 = (options & base64_url) - ? tables::base64::base64_url::d2 - : tables::base64::base64_default::d2; - const uint32_t *d3 = (options & base64_url) - ? tables::base64::base64_url::d3 - : tables::base64::base64_default::d3; - - const char_type *srcend = src + length; - const char_type *srcinit = src; - const char *dstinit = dst; - const char *dstend = dst + outlen; - - uint32_t x; - size_t idx; - uint8_t buffer[4]; - while (true) { - while (src + 4 <= srcend && is_eight_byte(src[0]) && - is_eight_byte(src[1]) && is_eight_byte(src[2]) && - is_eight_byte(src[3]) && - (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | - d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { - if (dstend - dst < 3) { - outlen = size_t(dst - dstinit); - srcr = src; - return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit)}; - } - if (match_system(endianness::BIG)) { - x = scalar::utf32::swap_bytes(x); - } - std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes - dst += 3; - src += 4; - } - idx = 0; - const char_type *srccur = src; - // We need at least four characters. - while (idx < 4 && src < srcend) { - char_type c = *src; - uint8_t code = to_base64[uint8_t(c)]; - - buffer[idx] = uint8_t(code); - if (is_eight_byte(c) && code <= 63) { - idx++; - } else if (code > 64 || !scalar::base64::is_eight_byte(c)) { - outlen = size_t(dst - dstinit); - srcr = src; - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; - } else { - // We have a space or a newline. We ignore it. - } - src++; - } - if (idx != 4) { - if (last_chunk_options == last_chunk_handling_options::strict && - ((idx + padded_characters) & 3) != 0) { - outlen = size_t(dst - dstinit); - srcr = src; - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial && - ((idx + padded_characters) & 3) != 0) { - // Rewind src to before partial chunk - srcr = srccur; - outlen = size_t(dst - dstinit); - return {SUCCESS, size_t(dst - dstinit)}; - } else { // loose mode - if (idx == 0) { - // No data left; return success - outlen = size_t(dst - dstinit); - srcr = src; - return {SUCCESS, size_t(dst - dstinit)}; - } else if (idx == 1) { - // Error: Incomplete chunk of length 1 is invalid in loose mode - outlen = size_t(dst - dstinit); - srcr = src; - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; - } else if (idx == 2 || idx == 3) { - // Check if there's enough space in the destination buffer - size_t required_space = (idx == 2) ? 1 : 2; - if (size_t(dstend - dst) < required_space) { - outlen = size_t(dst - dstinit); - srcr = src; - return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; - } - uint32_t triple = 0; - if (idx == 2) { - triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12); - if ((last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xffff)) { - srcr = src; - return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; - } - // Extract the first byte - triple >>= 16; - dst[0] = static_cast(triple & 0xFF); - dst += 1; - } else if (idx == 3) { - triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12) + - (uint32_t(buffer[2]) << 6); - if ((last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xff)) { - srcr = src; - return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; - } - // Extract the first two bytes - triple >>= 8; - dst[0] = static_cast((triple >> 8) & 0xFF); - dst[1] = static_cast(triple & 0xFF); - dst += 2; - } - outlen = size_t(dst - dstinit); - srcr = src; - return {SUCCESS, size_t(dst - dstinit)}; - } - } - } - if (dstend - dst < 3) { - outlen = size_t(dst - dstinit); - srcr = src; - return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; - } - uint32_t triple = (uint32_t(buffer[0]) << 18) + - (uint32_t(buffer[1]) << 12) + (uint32_t(buffer[2]) << 6) + - (uint32_t(buffer[3])); - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 3); - } else { - triple = scalar::utf32::swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 3); - } - dst += 3; + template + simdutf_really_inline simd8 + prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(this->value, N), + __lsx_vbsrl_v(prev_chunk.value, 16 - N)); } -} -// Returns the number of bytes written. The destination buffer must be large -// enough. It will add padding (=) if needed. -size_t tail_encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // By default, we use padding if we are not using the URL variant. - // This is check with ((options & base64_url) == 0) which returns true if we - // are not using the URL variant. However, we also allow 'inversion' of the - // convention with the base64_reverse_padding option. If the - // base64_reverse_padding option is set, we use padding if we are using the - // URL variant, and we omit it if we are not using the URL variant. This is - // checked with - // ((options & base64_reverse_padding) == base64_reverse_padding). - bool use_padding = - ((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding); - // This looks like 3 branches, but we expect the compiler to resolve this to - // a single branch: - const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 - : tables::base64::base64_default::e0; - const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 - : tables::base64::base64_default::e1; - const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2 - : tables::base64::base64_default::e2; - char *out = dst; - size_t i = 0; - uint8_t t1, t2, t3; - for (; i + 2 < srclen; i += 3) { - t1 = uint8_t(src[i]); - t2 = uint8_t(src[i + 1]); - t3 = uint8_t(src[i + 2]); - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; - *out++ = e2[t3]; + // Perform a lookup assuming no value is larger than 16 + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); } - switch (srclen - i) { - case 0: - break; - case 1: - t1 = uint8_t(src[i]); - *out++ = e0[t1]; - *out++ = e1[(t1 & 0x03) << 4]; - if (use_padding) { - *out++ = '='; - *out++ = '='; - } - break; - default: /* case 2 */ - t1 = uint8_t(src[i]); - t2 = uint8_t(src[i + 1]); - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - if (use_padding) { - *out++ = '='; - } + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); } - return (size_t)(out - dst); -} -template -simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char_type *input, size_t length) noexcept { - // We follow https://infra.spec.whatwg.org/#forgiving-base64-decode - size_t padding = 0; - if (length > 0) { - if (input[length - 1] == '=') { - padding++; - if (length > 1 && input[length - 2] == '=') { - padding++; - } - } - } - size_t actual_length = length - padding; - if (actual_length % 4 <= 1) { - return actual_length / 4 * 3; + template + simdutf_really_inline simd8 + apply_lookup_16_to(const simd8 original) const { + __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); + return __lsx_vshuf_b(__lsx_vldi(0), (__m128i)this->value, + simd8(original_tmp)); } - // if we have a valid input, then the remainder must be 2 or 3 adding one or - // two extra bytes. - return actual_length / 4 * 3 + (actual_length % 4) - 1; -} +}; -simdutf_warn_unused size_t -base64_length_from_binary(size_t length, base64_options options) noexcept { - // By default, we use padding if we are not using the URL variant. - // This is check with ((options & base64_url) == 0) which returns true if we - // are not using the URL variant. However, we also allow 'inversion' of the - // convention with the base64_reverse_padding option. If the - // base64_reverse_padding option is set, we use padding if we are using the - // URL variant, and we omit it if we are not using the URL variant. This is - // checked with - // ((options & base64_reverse_padding) == base64_reverse_padding). - bool use_padding = - ((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding); - if (!use_padding) { - return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0); +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert( + NUM_CHUNKS == 4, + "LoongArch kernel should use four registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), + simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); } - return (length + 2) / 3 * - 4; // We use padding to make the length a multiple of 4. -} -} // namespace base64 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + this->chunks[2] |= other.chunks[2]; + this->chunks[3] |= other.chunks[3]; + return *this; + } -#endif -/* end file src/scalar/base64.h */ -/* begin file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF8_H -#define SIMDUTF_LATIN1_TO_UTF8_H + simdutf_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } -namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf8 { + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } -inline size_t convert(const char *buf, size_t len, char *utf8_output) { - const unsigned char *data = reinterpret_cast(buf); - size_t pos = 0; - size_t utf8_pos = 0; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - utf8_output[utf8_pos++] = char(buf[pos]); - pos++; - } - continue; - } - } + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); + this->chunks[2].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 2); + this->chunks[3].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 3); + } - unsigned char byte = data[pos]; - if ((byte & 0x80) == 0) { // if ASCII - // will generate one UTF-8 bytes - utf8_output[utf8_pos++] = char(byte); - pos++; - } else { - // will generate two UTF-8 bytes - utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); - utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); - pos++; - } + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); + this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); + this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); } - return utf8_pos; -} -inline size_t convert_safe(const char *buf, size_t len, char *utf8_output, - size_t utf8_len) { - const unsigned char *data = reinterpret_cast(buf); - size_t pos = 0; - size_t skip_pos = 0; - size_t utf8_pos = 0; - while (pos < len && utf8_pos < utf8_len) { - // try to convert the next block of 16 ASCII bytes - if (pos >= skip_pos && pos + 16 <= len && - utf8_pos + 16 <= utf8_len) { // if it is safe to read 16 more bytes, - // check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - ::memcpy(utf8_output + utf8_pos, buf + pos, 16); - utf8_pos += 16; - pos += 16; - } else { - // At least one of the next 16 bytes are not ASCII, we will process them - // one by one - skip_pos = pos + 16; - } - } else { - const auto byte = data[pos]; - if ((byte & 0x80) == 0) { // if ASCII - // will generate one UTF-8 bytes - utf8_output[utf8_pos++] = char(byte); - pos++; - } else if (utf8_pos + 2 <= utf8_len) { - // will generate two UTF-8 bytes - utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); - utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); - pos++; - } else { - break; - } - } + simdutf_really_inline uint64_t to_bitmask() const { + __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[3]), 6); + mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[2]), 4)); + mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[1]), 2)); + mask = __lsx_vor_v(mask, __lsx_vmsknz_b(this->chunks[0])); + return __lsx_vpickve2gr_du(mask, 0); } - return utf8_pos; -} -} // namespace latin1_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } -#endif -/* end file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ + simdutf_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } -namespace simdutf { -bool implementation::supported_by_runtime_system() const { - uint32_t required_instruction_sets = this->required_instruction_sets(); - uint32_t supported_instruction_sets = - internal::detect_supported_architectures(); - return ((supported_instruction_sets & required_instruction_sets) == - required_instruction_sets); -} + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); -simdutf_warn_unused encoding_type implementation::autodetect_encoding( - const char *input, size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; + return simd8x64( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), + (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), + (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) + .to_bitmask(); } - // UTF8 is common, it includes ASCII, and is commonly represented - // without a BOM, so if it fits, go with that. Note that it is still - // possible to get it wrong, we are only 'guessing'. If some has UTF-16 - // data without a BOM, it could pass as UTF-8. - // - // An interesting twist might be to check for UTF-16 ASCII first (every - // other byte is zero). - if (validate_utf8(input, length)) { - return encoding_type::UTF8; + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + return simd8x64( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), + (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), + (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) + .to_bitmask(); } - // The next most common encoding that might appear without BOM is probably - // UTF-16LE, so try that next. - if ((length % 2) == 0) { - // important: we need to divide by two - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - return encoding_type::UTF16_LE; - } + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - return encoding_type::UTF32_LE; - } + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, + this->chunks[2] > mask, this->chunks[3] > mask) + .to_bitmask(); } - return encoding_type::unspecified; -} + simdutf_really_inline uint64_t gteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask, + this->chunks[2] >= mask, this->chunks[3] >= mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(simd8(this->chunks[0].value) >= mask, + simd8(this->chunks[1].value) >= mask, + simd8(this->chunks[2].value) >= mask, + simd8(this->chunks[3].value) >= mask) + .to_bitmask(); + } +}; // struct simd8x64 +/* begin file src/simdutf/lsx/simd16-inl.h */ +template struct simd16; -namespace internal { -// When there is a single implementation, we should not pay a price -// for dispatching to the best implementation. We should just use the -// one we have. This is a compile-time check. -#define SIMDUTF_SINGLE_IMPLEMENTATION \ - (SIMDUTF_IMPLEMENTATION_ICELAKE + SIMDUTF_IMPLEMENTATION_HASWELL + \ - SIMDUTF_IMPLEMENTATION_WESTMERE + SIMDUTF_IMPLEMENTATION_ARM64 + \ - SIMDUTF_IMPLEMENTATION_PPC64 + SIMDUTF_IMPLEMENTATION_FALLBACK == \ - 1) - -// Static array of known implementations. We are hoping these get baked into the -// executable without requiring a static initializer. - -#if SIMDUTF_IMPLEMENTATION_ICELAKE -static const icelake::implementation *get_icelake_singleton() { - static const icelake::implementation icelake_singleton{}; - return &icelake_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_HASWELL -static const haswell::implementation *get_haswell_singleton() { - static const haswell::implementation haswell_singleton{}; - return &haswell_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_WESTMERE -static const westmere::implementation *get_westmere_singleton() { - static const westmere::implementation westmere_singleton{}; - return &westmere_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_ARM64 -static const arm64::implementation *get_arm64_singleton() { - static const arm64::implementation arm64_singleton{}; - return &arm64_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_PPC64 -static const ppc64::implementation *get_ppc64_singleton() { - static const ppc64::implementation ppc64_singleton{}; - return &ppc64_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_RVV -static const rvv::implementation *get_rvv_singleton() { - static const rvv::implementation rvv_singleton{}; - return &rvv_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_FALLBACK -static const fallback::implementation *get_fallback_singleton() { - static const fallback::implementation fallback_singleton{}; - return &fallback_singleton; -} -#endif - -#if SIMDUTF_SINGLE_IMPLEMENTATION -static const implementation *get_single_implementation() { - return - #if SIMDUTF_IMPLEMENTATION_ICELAKE - get_icelake_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_HASWELL - get_haswell_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_WESTMERE - get_westmere_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_ARM64 - get_arm64_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_PPC64 - get_ppc64_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_FALLBACK - get_fallback_singleton(); - #endif -} -#endif +template > struct base_u16 { + __m128i value; + static const int SIZE = sizeof(value); -/** - * @private Detects best supported implementation on first use, and sets it - */ -class detect_best_supported_implementation_on_first_use final - : public implementation { -public: - std::string name() const noexcept final { return set_best()->name(); } - std::string description() const noexcept final { - return set_best()->description(); + // Conversion from/to SIMD register + simdutf_really_inline base_u16() = default; + simdutf_really_inline base_u16(const __m128i _value) : value(_value) {} + // Bit operations + simdutf_really_inline simd16 operator|(const simd16 other) const { + return __lsx_vor_v(this->value, other.value); } - uint32_t required_instruction_sets() const noexcept final { - return set_best()->required_instruction_sets(); + simdutf_really_inline simd16 operator&(const simd16 other) const { + return __lsx_vand_v(this->value, other.value); } - - simdutf_warn_unused int - detect_encodings(const char *input, size_t length) const noexcept override { - return set_best()->detect_encodings(input, length); + simdutf_really_inline simd16 operator^(const simd16 other) const { + return __lsx_vxor_v(this->value, other.value); } - - simdutf_warn_unused bool - validate_utf8(const char *buf, size_t len) const noexcept final override { - return set_best()->validate_utf8(buf, len); + simdutf_really_inline simd16 bit_andnot(const simd16 other) const { + return __lsx_vandn_v(this->value, other.value); } - - simdutf_warn_unused result validate_utf8_with_errors( - const char *buf, size_t len) const noexcept final override { - return set_best()->validate_utf8_with_errors(buf, len); + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } + simdutf_really_inline simd16 &operator|=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast | other; + return *this_cast; } - - simdutf_warn_unused bool - validate_ascii(const char *buf, size_t len) const noexcept final override { - return set_best()->validate_ascii(buf, len); + simdutf_really_inline simd16 &operator&=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast & other; + return *this_cast; } - - simdutf_warn_unused result validate_ascii_with_errors( - const char *buf, size_t len) const noexcept final override { - return set_best()->validate_ascii_with_errors(buf, len); + simdutf_really_inline simd16 &operator^=(const simd16 other) { + auto this_cast = static_cast *>(this); + *this_cast = *this_cast ^ other; + return *this_cast; } - simdutf_warn_unused bool - validate_utf16le(const char16_t *buf, - size_t len) const noexcept final override { - return set_best()->validate_utf16le(buf, len); + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return __lsx_vseq_h(lhs.value, rhs.value); } - simdutf_warn_unused bool - validate_utf16be(const char16_t *buf, - size_t len) const noexcept final override { - return set_best()->validate_utf16be(buf, len); + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), + __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); } +}; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final override { - return set_best()->validate_utf16le_with_errors(buf, len); - } +template > +struct base16 : base_u16 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final override { - return set_best()->validate_utf16be_with_errors(buf, len); - } + simdutf_really_inline base16() : base_u16() {} + simdutf_really_inline base16(const __m128i _value) : base_u16(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) + : base16(__lsx_vld(ptr, 0)) {} - simdutf_warn_unused bool - validate_utf32(const char32_t *buf, - size_t len) const noexcept final override { - return set_best()->validate_utf32(buf, len); - } + static const int SIZE = sizeof(base_u16::value); - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final override { - return set_best()->validate_utf32_with_errors(buf, len); + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), + __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); } +}; - simdutf_warn_unused size_t - convert_latin1_to_utf8(const char *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_latin1_to_utf8(buf, len, utf8_output); +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return __lsx_vreplgr2vr_h(uint16_t(-(!!_value))); } - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_latin1_to_utf16le(buf, len, utf16_output); - } + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} +}; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_latin1_to_utf16be(buf, len, utf16_output); +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return __lsx_vreplgr2vr_h(_value); } - - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, - char32_t *latin1_output) const noexcept final override { - return set_best()->convert_latin1_to_utf32(buf, len, latin1_output); + static simdutf_really_inline simd16 zero() { return __lsx_vldi(0); } + static simdutf_really_inline simd16 load(const T values[8]) { + return __lsx_vld(reinterpret_cast(values), 0); } - simdutf_warn_unused size_t - convert_utf8_to_latin1(const char *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf8_to_latin1(buf, len, latin1_output); - } + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const __m128i _value) + : base16(_value) {} - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf8_to_latin1_with_errors(buf, len, - latin1_output); + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return __lsx_vst(this->value, dst, 0); } - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_latin1(buf, len, latin1_output); - } + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16le(buf, len, utf16_output); + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return __lsx_vadd_b(*this, other); } - - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16be(buf, len, utf16_output); + simdutf_really_inline simd16 operator-(const simd16 other) const { + return __lsx_vsub_b(*this, other); } - - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16le_with_errors(buf, len, - utf16_output); + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); } - - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16be_with_errors(buf, len, - utf16_output); + simdutf_really_inline simd16 &operator-=(const simd16 other) { + *this = *this - other; + return *static_cast *>(this); } +}; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_utf16le(buf, len, utf16_output); - } +// Signed code unitstemplate<> +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m128i _value) + : base16_numeric(_value) {} + simdutf_really_inline simd16(simd16 other) + : base16_numeric(other.value) {} - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_utf16be(buf, len, utf16_output); - } + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + simdutf_really_inline operator simd16() const; - simdutf_warn_unused size_t - convert_utf8_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf8_to_utf32(buf, len, utf32_output); + // Order-sensitive comparisons + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return __lsx_vmax_h(this->value, other.value); } - - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf8_to_utf32_with_errors(buf, len, - utf32_output); + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return __lsx_vmin_h(this->value, other.value); } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output); + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return __lsx_vsle_h(other.value, this->value); } - - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16le_to_latin1(buf, len, latin1_output); + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return __lsx_vslt_h(this->value, other.value); } +}; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16be_to_latin1(buf, len, latin1_output); - } +// Unsigned code unitstemplate<> +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m128i _value) + : base16_numeric((__m128i)_value) {} + simdutf_really_inline simd16(simd16 other) + : base16_numeric(other.value) {} - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16le_to_latin1_with_errors(buf, len, - latin1_output); - } + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16be_to_latin1_with_errors(buf, len, - latin1_output); + // Saturated math + simdutf_really_inline simd16 + saturating_add(const simd16 other) const { + return __lsx_vsadd_hu(this->value, other.value); } - - simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_valid_utf16le_to_latin1(buf, len, latin1_output); + simdutf_really_inline simd16 + saturating_sub(const simd16 other) const { + return __lsx_vssub_hu(this->value, other.value); } - simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_valid_utf16be_to_latin1(buf, len, latin1_output); + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return __lsx_vmax_hu(this->value, other.value); } - - simdutf_warn_unused size_t - convert_utf16le_to_utf8(const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf8(buf, len, utf8_output); + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return __lsx_vmin_hu(this->value, other.value); } - - simdutf_warn_unused size_t - convert_utf16be_to_utf8(const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf8(buf, len, utf8_output); + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + gt_bits(const simd16 other) const { + return this->saturating_sub(other); } - - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf8_with_errors(buf, len, - utf8_output); + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + lt_bits(const simd16 other) const { + return other.saturating_sub(*this); } - - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf8_with_errors(buf, len, - utf8_output); + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return __lsx_vsle_hu(this->value, other.value); } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_valid_utf16le_to_utf8(buf, len, utf8_output); + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return __lsx_vsle_hu(other.value, this->value); } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_valid_utf16be_to_utf8(buf, len, utf8_output); + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return __lsx_vslt_hu(other.value, this->value); } - - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return __lsx_vslt_hu(this->value, other.value); } - simdutf_warn_unused result convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf32_to_latin1_with_errors(buf, len, - latin1_output); + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); } - - simdutf_warn_unused size_t convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); + template simdutf_really_inline simd16 shr() const { + return simd16(__lsx_vsrli_h(this->value, N)); } - - simdutf_warn_unused size_t - convert_utf32_to_utf8(const char32_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf32_to_utf8(buf, len, utf8_output); + template simdutf_really_inline simd16 shl() const { + return simd16(__lsx_vslli_h(this->value, N)); } - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + // logical operations + simdutf_really_inline simd16 + operator|(const simd16 other) const { + return __lsx_vor_v(this->value, other.value); } - - simdutf_warn_unused size_t - convert_valid_utf32_to_utf8(const char32_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_valid_utf32_to_utf8(buf, len, utf8_output); + simdutf_really_inline simd16 + operator&(const simd16 other) const { + return __lsx_vand_v(this->value, other.value); } - - simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16le(buf, len, utf16_output); + simdutf_really_inline simd16 + operator^(const simd16 other) const { + return __lsx_vxor_v(this->value, other.value); } - simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16be(buf, len, utf16_output); + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return __lsx_vssrlni_bu_h(v1.value, v0.value, 0); } - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16le_with_errors(buf, len, - utf16_output); + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return __lsx_vshuf4i_b(this->value, 0b10110001); } +}; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16be_with_errors(buf, len, - utf16_output); - } +simdutf_really_inline simd16::operator simd16() const { + return this->value; +} - simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf32_to_utf16le(buf, len, utf16_output); +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert( + NUM_CHUNKS == 4, + "LOONGARCH kernel should use four registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline + simd16x32(const simd16 chunk0, const simd16 chunk1, + const simd16 chunk2, const simd16 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), + simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); + this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); } - simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf32_to_utf16be(buf, len, utf16_output); + simdutf_really_inline simd16 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); } - simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf32(buf, len, utf32_output); + simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); + this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); + this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); } - simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf32(buf, len, utf32_output); + simdutf_really_inline uint64_t to_bitmask() const { + __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b((this->chunks[3]).value), 6); + mask = __lsx_vor_v( + mask, __lsx_vbsll_v(__lsx_vmsknz_b((this->chunks[2]).value), 4)); + mask = __lsx_vor_v( + mask, __lsx_vbsll_v(__lsx_vmsknz_b((this->chunks[1]).value), 2)); + mask = __lsx_vor_v(mask, __lsx_vmsknz_b((this->chunks[0]).value)); + return __lsx_vpickve2gr_du(mask, 0); } - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf32_with_errors(buf, len, - utf32_output); - } - - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf32_with_errors(buf, len, - utf32_output); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_valid_utf16le_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_valid_utf16be_to_utf32(buf, len, utf32_output); - } - - void change_endianness_utf16(const char16_t *buf, size_t len, - char16_t *output) const noexcept final override { - set_best()->change_endianness_utf16(buf, len, output); - } - - simdutf_warn_unused size_t - count_utf16le(const char16_t *buf, size_t len) const noexcept final override { - return set_best()->count_utf16le(buf, len); - } - - simdutf_warn_unused size_t - count_utf16be(const char16_t *buf, size_t len) const noexcept final override { - return set_best()->count_utf16be(buf, len); - } - - simdutf_warn_unused size_t - count_utf8(const char *buf, size_t len) const noexcept final override { - return set_best()->count_utf8(buf, len); - } - - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *buf, size_t len) const noexcept override { - return set_best()->latin1_length_from_utf8(buf, len); - } - - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t len) const noexcept override { - return set_best()->latin1_length_from_utf16(len); - } - - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t len) const noexcept override { - return set_best()->latin1_length_from_utf32(len); - } - - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_latin1(buf, len); - } - - simdutf_warn_unused size_t utf8_length_from_utf16le( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_utf16le(buf, len); - } - - simdutf_warn_unused size_t utf8_length_from_utf16be( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_utf16be(buf, len); - } - - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t len) const noexcept override { - return set_best()->utf16_length_from_latin1(len); - } - - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t len) const noexcept override { - return set_best()->utf32_length_from_latin1(len); - } - - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf32_length_from_utf16le(buf, len); - } - - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf32_length_from_utf16be(buf, len); - } - - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *buf, size_t len) const noexcept override { - return set_best()->utf16_length_from_utf8(buf, len); + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + this->chunks[2] = this->chunks[2].swap_bytes(); + this->chunks[3] = this->chunks[3].swap_bytes(); } - simdutf_warn_unused size_t utf8_length_from_utf32( - const char32_t *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_utf32(buf, len); + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); } - simdutf_warn_unused size_t utf16_length_from_utf32( - const char32_t *buf, size_t len) const noexcept override { - return set_best()->utf16_length_from_utf32(buf, len); + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); } - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *buf, size_t len) const noexcept override { - return set_best()->utf32_length_from_utf8(buf, len); - } + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept override { - return set_best()->maximal_binary_length_from_base64(input, length); + return simd16x32( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low), + (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low), + (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)) + .to_bitmask(); } - - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary(input, length, output, options, - last_chunk_handling_options); + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + return simd16x32( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), + (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), + (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) + .to_bitmask(); } - - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary_details(input, length, output, options, - last_chunk_handling_options); + simdutf_really_inline uint64_t lt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask, + this->chunks[2] < mask, this->chunks[3] < mask) + .to_bitmask(); } - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept override { - return set_best()->maximal_binary_length_from_base64(input, length); - } +}; // struct simd16x32 - simdutf_warn_unused result base64_to_binary( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary(input, length, output, options, - last_chunk_handling_options); - } +template <> +simdutf_really_inline uint64_t simd16x32::not_in_range( + const uint16_t low, const uint16_t high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + simd16x32 x(simd16((this->chunks[0] > mask_high) | + (this->chunks[0] < mask_low)), + simd16((this->chunks[1] > mask_high) | + (this->chunks[1] < mask_low)), + simd16((this->chunks[2] > mask_high) | + (this->chunks[2] < mask_low)), + simd16((this->chunks[3] > mask_high) | + (this->chunks[3] < mask_low))); + return x.to_bitmask(); +} +/* end file src/simdutf/lsx/simd16-inl.h */ +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdutf - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary_details(input, length, output, options, - last_chunk_handling_options); - } +#endif // SIMDUTF_LSX_SIMD_H +/* end file src/simdutf/lsx/simd.h */ - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options) const noexcept override { - return set_best()->base64_length_from_binary(length, options); - } +/* begin file src/simdutf/lsx/end.h */ +/* end file src/simdutf/lsx/end.h */ - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept override { - return set_best()->binary_to_base64(input, length, output, options); - } +#endif // SIMDUTF_IMPLEMENTATION_LSX - simdutf_really_inline - detect_best_supported_implementation_on_first_use() noexcept - : implementation("best_supported_detector", - "Detects the best supported implementation and sets it", - 0) {} +#endif // SIMDUTF_LSX_H +/* end file src/simdutf/lsx.h */ +/* begin file src/simdutf/lasx.h */ +#ifndef SIMDUTF_LASX_H +#define SIMDUTF_LASX_H -private: - const implementation *set_best() const noexcept; -}; +#ifdef SIMDUTF_FALLBACK_H + #error "lasx.h must be included before fallback.h" +#endif -static_assert(std::is_trivially_destructible< - detect_best_supported_implementation_on_first_use>::value, - "detect_best_supported_implementation_on_first_use should be " - "trivially destructible"); -static const std::initializer_list & -get_available_implementation_pointers() { - static const std::initializer_list - available_implementation_pointers{ -#if SIMDUTF_IMPLEMENTATION_ICELAKE - get_icelake_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_HASWELL - get_haswell_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_WESTMERE - get_westmere_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_ARM64 - get_arm64_singleton(), +#ifndef SIMDUTF_IMPLEMENTATION_LASX + #define SIMDUTF_IMPLEMENTATION_LASX (SIMDUTF_IS_LASX) #endif -#if SIMDUTF_IMPLEMENTATION_PPC64 - get_ppc64_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_RVV - get_rvv_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_FALLBACK - get_fallback_singleton(), +#if SIMDUTF_IMPLEMENTATION_LASX && SIMDUTF_IS_LASX + #define SIMDUTF_CAN_ALWAYS_RUN_LASX 1 +#else + #define SIMDUTF_CAN_ALWAYS_RUN_LASX 0 #endif - }; // available_implementation_pointers - return available_implementation_pointers; -} -// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no -// support -class unsupported_implementation final : public implementation { -public: - simdutf_warn_unused int detect_encodings(const char *, - size_t) const noexcept override { - return encoding_type::unspecified; - } +#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) - simdutf_warn_unused bool validate_utf8(const char *, - size_t) const noexcept final override { - return false; // Just refuse to validate. Given that we have a fallback - // implementation - // it seems unlikely that unsupported_implementation will ever be used. If - // it is used, then it will flag all strings as invalid. The alternative is - // to return an error_code from which the user has to figure out whether the - // string is valid UTF-8... which seems like a lot of work just to handle - // the very unlikely case that we have an unsupported implementation. And, - // when it does happen (that we have an unsupported implementation), what - // are the chances that the programmer has a fallback? Given that *we* - // provide the fallback, it implies that the programmer would need a - // fallback for our fallback. - } +#if SIMDUTF_IMPLEMENTATION_LASX - simdutf_warn_unused result validate_utf8_with_errors( - const char *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } +namespace simdutf { +/** + * Implementation for LoongArch ASX. + */ +namespace lasx {} // namespace lasx +} // namespace simdutf - simdutf_warn_unused bool - validate_ascii(const char *, size_t) const noexcept final override { - return false; - } +/* begin file src/simdutf/lasx/implementation.h */ +#ifndef SIMDUTF_LASX_IMPLEMENTATION_H +#define SIMDUTF_LASX_IMPLEMENTATION_H - simdutf_warn_unused result validate_ascii_with_errors( - const char *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - simdutf_warn_unused bool - validate_utf16le(const char16_t *, size_t) const noexcept final override { - return false; - } +namespace simdutf { +namespace lasx { - simdutf_warn_unused bool - validate_utf16be(const char16_t *, size_t) const noexcept final override { - return false; - } +namespace { +using namespace simdutf; +} +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("lasx", "LOONGARCH ASX", + internal::instruction_set::LSX | + internal::instruction_set::LASX) {} + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char16_t *buf, size_t len) const noexcept final; simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused bool - validate_utf32(const char32_t *, size_t) const noexcept final override { - return false; - } - + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char32_t *buf, size_t len) const noexcept final; simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *, size_t, char *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char *utf8_output) const noexcept final; simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *, size_t, char32_t *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *, size_t, char *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char *latin1_output) const noexcept final; simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char *buf, size_t len, char *latin1_buffer) const noexcept final; simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *, size_t, char *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char *latin1_output) const noexcept final; simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *, size_t, char32_t *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *, size_t, char32_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16le_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16be_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf32_to_latin1( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf32_to_latin1_with_errors( - const char32_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_latin1( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } - + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t + latin1_length_from_utf16(size_t length) const noexcept; + simdutf_warn_unused size_t + latin1_length_from_utf32(size_t length) const noexcept; + simdutf_warn_unused size_t + utf32_length_from_latin1(size_t length) const noexcept; + simdutf_warn_unused size_t + utf16_length_from_latin1(size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char *input, size_t length, char *output, + base64_options options) const noexcept; + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options) const noexcept; + simdutf_warn_unused size_t base64_length_from_binary( + size_t length, base64_options options) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; - simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } + simdutf_warn_unused virtual result + base64_to_binary(const char *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused virtual full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused virtual result + base64_to_binary(const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused virtual full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; +}; - simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } +} // namespace lasx +} // namespace simdutf - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } +#endif // SIMDUTF_LASX_IMPLEMENTATION_H +/* end file src/simdutf/lasx/implementation.h */ - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } +/* begin file src/simdutf/lasx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lasx" +// #define SIMDUTF_IMPLEMENTATION lasx +/* end file src/simdutf/lasx/begin.h */ - simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } + // Declarations +/* begin file src/simdutf/lasx/intrinsics.h */ +#ifndef SIMDUTF_LASX_INTRINSICS_H +#define SIMDUTF_LASX_INTRINSICS_H - simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } - simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } +// This should be the correct header whether +// you use visual studio or other compilers. +#include +#include + +#if defined(__loongarch_asx) + #ifdef __clang__ + #define VREGS_PREFIX "$vr" + #define XREGS_PREFIX "$xr" + #else // GCC + #define VREGS_PREFIX "$f" + #define XREGS_PREFIX "$f" + #endif + #define __ALL_REGS \ + "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26," \ + "27,28,29,30,31" +// Convert __m128i to __m256i +static inline __m256i ____m256i(__m128i in) { + __m256i out = __lasx_xvldi(0); + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " XREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " VREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + : [out] "+f"(out) + : [in] "f"(in)); + return out; +} +// Convert two __m128i to __m256i +static inline __m256i lasx_set_q(__m128i inhi, __m128i inlo) { + __m256i out; + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[hi], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[lo], " VREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x20 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".ifnc %[out], %[hi] \n\t" + ".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " XREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[hi], " VREGS_PREFIX "\\j \n\t" + " xvori.b $xr\\i, $xr\\j, 0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".endif \n\t" + : [out] "=f"(out), [hi] "+f"(inhi) + : [lo] "f"(inlo)); + return out; +} +// Convert __m256i low part to __m128i +static inline __m128i lasx_extracti128_lo(__m256i in) { + __m128i out; + __asm__ volatile(".ifnc %[out], %[in] \n\t" + ".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " XREGS_PREFIX "\\j \n\t" + " vori.b $vr\\i, $vr\\j, 0 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + ".endif \n\t" + : [out] "=f"(out) + : [in] "f"(in)); + return out; +} +// Convert __m256i high part to __m128i +static inline __m128i lasx_extracti128_hi(__m256i in) { + __m128i out; + __asm__ volatile(".irp i," __ALL_REGS "\n\t" + " .ifc %[out], " VREGS_PREFIX "\\i \n\t" + " .irp j," __ALL_REGS "\n\t" + " .ifc %[in], " XREGS_PREFIX "\\j \n\t" + " xvpermi.q $xr\\i, $xr\\j, 0x11 \n\t" + " .endif \n\t" + " .endr \n\t" + " .endif \n\t" + ".endr \n\t" + : [out] "=f"(out) + : [in] "f"(in)); + return out; +} +#endif - simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } +#endif // SIMDUTF_LASX_INTRINSICS_H +/* end file src/simdutf/lasx/intrinsics.h */ +/* begin file src/simdutf/lasx/bitmanipulation.h */ +#ifndef SIMDUTF_LASX_BITMANIPULATION_H +#define SIMDUTF_LASX_BITMANIPULATION_H - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *, size_t, char32_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } +#include - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *, size_t, char32_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } +namespace simdutf { +namespace lasx { +namespace { - simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } +simdutf_really_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); +} - simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } +#if SIMDUTF_NEED_TRAILING_ZEROES +simdutf_really_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} +#endif - void change_endianness_utf16(const char16_t *, size_t, - char16_t *) const noexcept final override {} +} // unnamed namespace +} // namespace lasx +} // namespace simdutf - simdutf_warn_unused size_t - count_utf16le(const char16_t *, size_t) const noexcept final override { - return 0; - } +#endif // SIMDUTF_LASX_BITMANIPULATION_H +/* end file src/simdutf/lasx/bitmanipulation.h */ +/* begin file src/simdutf/lasx/simd.h */ +#ifndef SIMDUTF_LASX_SIMD_H +#define SIMDUTF_LASX_SIMD_H - simdutf_warn_unused size_t - count_utf16be(const char16_t *, size_t) const noexcept final override { - return 0; - } +#include - simdutf_warn_unused size_t count_utf8(const char *, - size_t) const noexcept final override { - return 0; - } +namespace simdutf { +namespace lasx { +namespace { +namespace simd { - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *, size_t) const noexcept override { - return 0; - } +__attribute__((aligned(32))) static const uint8_t prev_shuf_table[32][32] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + {0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, + {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, + 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0}, + {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0}, + {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0}, + {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0}, + {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0}, + {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0}, + {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, +}; - simdutf_warn_unused size_t - latin1_length_from_utf16(size_t) const noexcept override { - return 0; +__attribute__((aligned(32))) static const uint8_t bitsel_mask_table[32][32] = { + {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0}}; + +// Forward-declared so they can be used by splat and friends. +template struct base { + __m256i value; + + // Zero constructor + simdutf_really_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdutf_really_inline base(const __m256i _value) : value(_value) {} + // Conversion to SIMD register + simdutf_really_inline operator const __m256i &() const { return this->value; } + simdutf_really_inline operator __m256i &() { return this->value; } + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + if (big_endian) { + __m256i zero = __lasx_xvldi(0); + __m256i in8 = __lasx_xvpermi_d(this->value, 0b11011000); + __m256i inlow = __lasx_xvilvl_b(in8, zero); + __m256i inhigh = __lasx_xvilvh_b(in8, zero); + __lasx_xvst(inlow, reinterpret_cast(ptr), 0); + __lasx_xvst(inhigh, reinterpret_cast(ptr), 32); + } else { + __m256i inlow = __lasx_vext2xv_hu_bu(this->value); + __m256i inhigh = __lasx_vext2xv_hu_bu( + __lasx_xvpermi_q(this->value, this->value, 0b00000001)); + __lasx_xvst(inlow, reinterpret_cast<__m256i *>(ptr), 0); + __lasx_xvst(inhigh, reinterpret_cast<__m256i *>(ptr), 32); + } } + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + __m256i in32_0 = __lasx_vext2xv_wu_bu(this->value); + __lasx_xvst(in32_0, reinterpret_cast(ptr), 0); - simdutf_warn_unused size_t - latin1_length_from_utf32(size_t) const noexcept override { - return 0; + __m256i in8_1 = __lasx_xvpermi_d(this->value, 0b00000001); + __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); + __lasx_xvst(in32_1, reinterpret_cast(ptr), 32); + + __m256i in8_2 = __lasx_xvpermi_d(this->value, 0b00000010); + __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); + __lasx_xvst(in32_2, reinterpret_cast(ptr), 64); + + __m256i in8_3 = __lasx_xvpermi_d(this->value, 0b00000011); + __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); + __lasx_xvst(in32_3, reinterpret_cast(ptr), 96); } - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *, size_t) const noexcept override { - return 0; + // Bit operations + simdutf_really_inline Child operator|(const Child other) const { + return __lasx_xvor_v(this->value, other); } - - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *, size_t) const noexcept override { - return 0; + simdutf_really_inline Child operator&(const Child other) const { + return __lasx_xvand_v(this->value, other); } - - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *, size_t) const noexcept override { - return 0; + simdutf_really_inline Child operator^(const Child other) const { + return __lasx_xvxor_v(this->value, other); } - - simdutf_warn_unused size_t - utf32_length_from_utf16le(const char16_t *, size_t) const noexcept override { - return 0; + simdutf_really_inline Child bit_andnot(const Child other) const { + return __lasx_xvandn_v(this->value, other); } - - simdutf_warn_unused size_t - utf32_length_from_utf16be(const char16_t *, size_t) const noexcept override { - return 0; + simdutf_really_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; } - - simdutf_warn_unused size_t - utf32_length_from_latin1(size_t) const noexcept override { - return 0; + simdutf_really_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; } + simdutf_really_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *, size_t) const noexcept override { - return 0; +template struct simd8; + +template > +struct base8 : base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdutf_really_inline base8() : base>() {} + simdutf_really_inline base8(const __m256i _value) : base>(_value) {} + simdutf_really_inline T first() const { + return __lasx_xvpickve2gr_wu(this->value, 0); } - simdutf_warn_unused size_t - utf16_length_from_latin1(size_t) const noexcept override { - return 0; + simdutf_really_inline T last() const { + return __lasx_xvpickve2gr_wu(this->value, 7); } - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *, size_t) const noexcept override { - return 0; + friend simdutf_really_inline Mask operator==(const simd8 lhs, + const simd8 rhs) { + return __lasx_xvseq_b(lhs, rhs); } - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *, size_t) const noexcept override { - return 0; - } + static const int SIZE = sizeof(base::value); - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *, size_t) const noexcept override { - return 0; + template + simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { + if (!N) + return this->value; + + __m256i zero = __lasx_xvldi(0); + __m256i result, shuf; + if (N < 16) { + shuf = __lasx_xvld(prev_shuf_table[N], 0); + + result = __lasx_xvshuf_b( + __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, + shuf); + __m256i srl_prev = __lasx_xvbsrl_v( + __lasx_xvpermi_q(zero, prev_chunk.value, 0b00110001), (16 - N)); + __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); + result = __lasx_xvbitsel_v(result, srl_prev, mask); + + return result; + } else if (N == 16) { + return __lasx_xvpermi_q(this->value, prev_chunk.value, 0b00100001); + } /*else { + __m256i sll_value = __lasx_xvbsll_v( + __lasx_xvpermi_q(zero, this->value, 0b00000011), (N - 16) % 32); + __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); + shuf = __lasx_xvld(prev_shuf_table[N], 0); + result = __lasx_xvshuf_b( + __lasx_xvpermi_q(prev_chunk.value, prev_chunk.value, 0b00000001), + prev_chunk.value, shuf); + result = __lasx_xvbitsel_v(sll_value, result, mask); + return result; + }*/ } +}; - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *, size_t) const noexcept override { - return 0; +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdutf_really_inline simd8 splat(bool _value) { + return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } - simdutf_warn_unused result - base64_to_binary(const char *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return result(error_code::OTHER, 0); - } + simdutf_really_inline simd8() : base8() {} + simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} - simdutf_warn_unused full_result base64_to_binary_details( - const char *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return full_result(error_code::OTHER, 0, 0); + simdutf_really_inline uint32_t to_bitmask() const { + __m256i mask = __lasx_xvmsknz_b(this->value); + uint32_t mask0 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t mask1 = __lasx_xvpickve2gr_wu(mask, 4); + return (mask0 | (mask1 << 16)); } - - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *, size_t) const noexcept override { - return 0; + simdutf_really_inline bool any() const { + if (__lasx_xbz_b(this->value)) + return false; + return true; } - - simdutf_warn_unused result - base64_to_binary(const char16_t *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return result(error_code::OTHER, 0); + simdutf_really_inline bool none() const { + if (__lasx_xbz_b(this->value)) + return true; + return false; } - - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return full_result(error_code::OTHER, 0, 0); + simdutf_really_inline bool all() const { + if (__lasx_xbnz_b(this->value)) + return true; + return false; } + simdutf_really_inline simd8 operator~() const { return *this ^ true; } +}; - simdutf_warn_unused size_t - base64_length_from_binary(size_t, base64_options) const noexcept override { - return 0; +template struct base8_numeric : base8 { + static simdutf_really_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); } - - size_t binary_to_base64(const char *, size_t, char *, - base64_options) const noexcept override { - return 0; + static simdutf_really_inline simd8 zero() { return __lasx_xvldi(0); } + static simdutf_really_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15); } - unsupported_implementation() - : implementation("unsupported", - "Unsupported CPU (no detected SIMD instructions)", 0) {} -}; + simdutf_really_inline base8_numeric() : base8() {} + simdutf_really_inline base8_numeric(const __m256i _value) + : base8(_value) {} -const unsupported_implementation *get_unsupported_singleton() { - static const unsupported_implementation unsupported_singleton{}; - return &unsupported_singleton; -} -static_assert(std::is_trivially_destructible::value, - "unsupported_singleton should be trivially destructible"); + // Store to array + simdutf_really_inline void store(T dst[32]) const { + return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); + } -size_t available_implementation_list::size() const noexcept { - return internal::get_available_implementation_pointers().size(); -} -const implementation *const * -available_implementation_list::begin() const noexcept { - return internal::get_available_implementation_pointers().begin(); -} -const implementation *const * -available_implementation_list::end() const noexcept { - return internal::get_available_implementation_pointers().end(); -} -const implementation * -available_implementation_list::detect_best_supported() const noexcept { - // They are prelisted in priority order, so we just go down the list - uint32_t supported_instruction_sets = - internal::detect_supported_architectures(); - for (const implementation *impl : - internal::get_available_implementation_pointers()) { - uint32_t required_instruction_sets = impl->required_instruction_sets(); - if ((supported_instruction_sets & required_instruction_sets) == - required_instruction_sets) { - return impl; - } + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd8 operator+(const simd8 other) const { + return __lasx_xvadd_b(this->value, other); + } + simdutf_really_inline simd8 operator-(const simd8 other) const { + return __lasx_xvsub_b(this->value, other); + } + simdutf_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); } - return get_unsupported_singleton(); // this should never happen? -} -const implementation * -detect_best_supported_implementation_on_first_use::set_best() const noexcept { - SIMDUTF_PUSH_DISABLE_WARNINGS - SIMDUTF_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: - // manually verified this is safe - char *force_implementation_name = getenv("SIMDUTF_FORCE_IMPLEMENTATION"); - SIMDUTF_POP_DISABLE_WARNINGS + // Override to distinguish from bool version + simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - if (force_implementation_name) { - auto force_implementation = - get_available_implementations()[force_implementation_name]; - if (force_implementation) { - return get_active_implementation() = force_implementation; - } else { - // Note: abort() and stderr usage within the library is forbidden. - return get_active_implementation() = get_unsupported_singleton(); - } + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { + __m256i origin = __lasx_xvand_v(this->value, __lasx_xvldi(0x1f)); + return __lasx_xvshuf_b(__lasx_xvldi(0), lookup_table, origin); } - return get_active_implementation() = - get_available_implementations().detect_best_supported(); -} -} // namespace internal + template + simdutf_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; -/** - * The list of available implementations compiled into simdutf. - */ -SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list & -get_available_implementations() { - static const internal::available_implementation_list - available_implementations{}; - return available_implementations; -} +// Signed bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} -/** - * The active implementation. - */ -SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr & -get_active_implementation() { -#if SIMDUTF_SINGLE_IMPLEMENTATION - // skip runtime detection - static internal::atomic_ptr active_implementation{ - internal::get_single_implementation()}; - return active_implementation; -#else - static const internal::detect_best_supported_implementation_on_first_use - detect_best_supported_implementation_on_first_use_singleton; - static internal::atomic_ptr active_implementation{ - &detect_best_supported_implementation_on_first_use_singleton}; - return active_implementation; -#endif -} + // Splat constructor + simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + simdutf_really_inline operator simd8() const; + // Member-by-member initialization + simdutf_really_inline + simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, + int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, + int8_t v30, int8_t v31) + : simd8((__m256i)v32i8{v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15); + } + simdutf_really_inline bool is_ascii() const { + __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); + if (__lasx_xbnz_v(ascii_mask)) + return false; + return true; + } + // Order-sensitive comparisons + simdutf_really_inline simd8 max_val(const simd8 other) const { + return __lasx_xvmax_b(this->value, other); + } + simdutf_really_inline simd8 min_val(const simd8 other) const { + return __lasx_xvmin_b(this->value, other); + } + simdutf_really_inline simd8 operator>(const simd8 other) const { + return __lasx_xvslt_b(other, this->value); + } + simdutf_really_inline simd8 operator<(const simd8 other) const { + return __lasx_xvslt_b(this->value, other); + } +}; -#if SIMDUTF_SINGLE_IMPLEMENTATION -const implementation *get_default_implementation() { - return internal::get_single_implementation(); -} -#else -internal::atomic_ptr &get_default_implementation() { - return get_active_implementation(); -} -#endif -#define SIMDUTF_GET_CURRENT_IMPLEMENTION +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdutf_really_inline simd8() : base8_numeric() {} + simdutf_really_inline simd8(const __m256i _value) + : base8_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdutf_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, + uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, + uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, + uint8_t v31) + : simd8((__m256i)v32u8{v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdutf_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15); + } -simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { - return get_default_implementation()->validate_utf8(buf, len); -} -simdutf_warn_unused result validate_utf8_with_errors(const char *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf8_with_errors(buf, len); -} -simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept { - return get_default_implementation()->validate_ascii(buf, len); -} -simdutf_warn_unused result validate_ascii_with_errors(const char *buf, - size_t len) noexcept { - return get_default_implementation()->validate_ascii_with_errors(buf, len); -} -simdutf_warn_unused size_t convert_utf8_to_utf16( - const char *input, size_t length, char16_t *utf16_output) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf8_to_utf16be(input, length, utf16_output); -#else - return convert_utf8_to_utf16le(input, length, utf16_output); -#endif -} -simdutf_warn_unused size_t convert_latin1_to_utf8(const char *buf, size_t len, - char *utf8_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf8(buf, len, - utf8_output); -} -simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf16le(buf, len, - utf16_output); -} -simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf16be(buf, len, - utf16_output); -} -simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *latin1_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf32(buf, len, - latin1_output); -} -simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) noexcept { - return get_default_implementation()->convert_utf8_to_latin1(buf, len, - latin1_output); -} -simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) noexcept { - return get_default_implementation()->convert_utf8_to_latin1_with_errors( - buf, len, latin1_output); -} -simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) noexcept { - return get_default_implementation()->convert_valid_utf8_to_latin1( - buf, len, latin1_output); -} -simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16le(input, length, - utf16_output); -} -simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16be(input, length, - utf16_output); -} -simdutf_warn_unused result convert_utf8_to_utf16_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf8_to_utf16be_with_errors(input, length, utf16_output); -#else - return convert_utf8_to_utf16le_with_errors(input, length, utf16_output); -#endif -} -simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16le_with_errors( - input, length, utf16_output); -} -simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16be_with_errors( - input, length, utf16_output); -} -simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *input, size_t length, char32_t *utf32_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf32(input, length, - utf32_output); -} -simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *input, size_t length, char32_t *utf32_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf32_with_errors( - input, length, utf32_output); -} -simdutf_warn_unused bool validate_utf16(const char16_t *buf, - size_t len) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return validate_utf16be(buf, len); -#else - return validate_utf16le(buf, len); -#endif -} -simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16le(buf, len); -} -simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16be(buf, len); -} -simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, - size_t len) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return validate_utf16be_with_errors(buf, len); -#else - return validate_utf16le_with_errors(buf, len); -#endif -} -simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16le_with_errors(buf, len); -} -simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16be_with_errors(buf, len); -} -simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf32(buf, len); -} -simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf32_with_errors(buf, len); -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf16( - const char *input, size_t length, char16_t *utf16_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf8_to_utf16be(input, length, utf16_buffer); -#else - return convert_valid_utf8_to_utf16le(input, length, utf16_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *input, size_t length, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf8_to_utf16le( - input, length, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *input, size_t length, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf8_to_utf16be( - input, length, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *input, size_t length, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_valid_utf8_to_utf32( - input, length, utf32_buffer); -} -simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *buf, - size_t len, - char *utf8_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf8(buf, len, utf8_buffer); -#else - return convert_utf16le_to_utf8(buf, len, utf8_buffer); -#endif -} -simdutf_warn_unused size_t convert_utf16_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_latin1(buf, len, latin1_buffer); -#else - return convert_utf16le_to_latin1(buf, len, latin1_buffer); -#endif -} -simdutf_warn_unused size_t convert_latin1_to_utf16( - const char *buf, size_t len, char16_t *utf16_output) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_latin1_to_utf16be(buf, len, utf16_output); -#else - return convert_latin1_to_utf16le(buf, len, utf16_output); -#endif -} -simdutf_warn_unused size_t convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_latin1(buf, len, - latin1_buffer); -} -simdutf_warn_unused size_t convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_latin1(buf, len, - latin1_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16be_to_latin1( - buf, len, latin1_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16le_to_latin1( - buf, len, latin1_buffer); -} -simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_latin1_with_errors( - buf, len, latin1_buffer); -} -simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_latin1_with_errors( - buf, len, latin1_buffer); -} -simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *buf, - size_t len, - char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *buf, - size_t len, - char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused result convert_utf16_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer); -#else - return convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer); -#endif -} -simdutf_warn_unused result convert_utf16_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer); -#else - return convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer); -#endif -} -simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf8_with_errors( - buf, len, utf8_buffer); -} -simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf8_with_errors( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer); -#else - return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf16_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf16be_to_latin1(buf, len, latin1_buffer); -#else - return convert_valid_utf16le_to_latin1(buf, len, latin1_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16le_to_utf8( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16be_to_utf8( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *buf, - size_t len, - char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf8_with_errors( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_valid_utf32_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused size_t convert_utf32_to_utf16( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf32_to_utf16be(buf, len, utf16_buffer); -#else - return convert_utf32_to_utf16le(buf, len, utf16_buffer); -#endif -} -simdutf_warn_unused size_t convert_utf32_to_latin1( - const char32_t *input, size_t length, char *latin1_output) noexcept { - return get_default_implementation()->convert_utf32_to_latin1(input, length, - latin1_output); -} -simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16le(buf, len, - utf16_buffer); -} -simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16be(buf, len, - utf16_buffer); -} -simdutf_warn_unused result convert_utf32_to_utf16_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer); -#else - return convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer); -#endif -} -simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16le_with_errors( - buf, len, utf16_buffer); -} -simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16be_with_errors( - buf, len, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf16( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf32_to_utf16be(buf, len, utf16_buffer); -#else - return convert_valid_utf32_to_utf16le(buf, len, utf16_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf32_to_utf16le( - buf, len, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf32_to_utf16be( - buf, len, utf16_buffer); -} -simdutf_warn_unused size_t convert_utf16_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf32(buf, len, utf32_buffer); -#else - return convert_utf16le_to_utf32(buf, len, utf32_buffer); -#endif -} -simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf32(buf, len, - utf32_buffer); -} -simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf32(buf, len, - utf32_buffer); -} -simdutf_warn_unused result convert_utf16_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer); -#else - return convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer); -#endif -} -simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf32_with_errors( - buf, len, utf32_buffer); -} -simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf32_with_errors( - buf, len, utf32_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf16be_to_utf32(buf, len, utf32_buffer); -#else - return convert_valid_utf16le_to_utf32(buf, len, utf32_buffer); -#endif -} -simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16le_to_utf32( - buf, len, utf32_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16be_to_utf32( - buf, len, utf32_buffer); -} -void change_endianness_utf16(const char16_t *input, size_t length, - char16_t *output) noexcept { - get_default_implementation()->change_endianness_utf16(input, length, output); -} -simdutf_warn_unused size_t count_utf16(const char16_t *input, - size_t length) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return count_utf16be(input, length); -#else - return count_utf16le(input, length); -#endif -} -simdutf_warn_unused size_t count_utf16le(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->count_utf16le(input, length); -} -simdutf_warn_unused size_t count_utf16be(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->count_utf16be(input, length); -} -simdutf_warn_unused size_t count_utf8(const char *input, - size_t length) noexcept { - return get_default_implementation()->count_utf8(input, length); -} -simdutf_warn_unused size_t latin1_length_from_utf8(const char *buf, - size_t len) noexcept { - return get_default_implementation()->latin1_length_from_utf8(buf, len); -} -simdutf_warn_unused size_t latin1_length_from_utf16(size_t len) noexcept { - return get_default_implementation()->latin1_length_from_utf16(len); -} -simdutf_warn_unused size_t latin1_length_from_utf32(size_t len) noexcept { - return get_default_implementation()->latin1_length_from_utf32(len); -} -simdutf_warn_unused size_t utf8_length_from_latin1(const char *buf, - size_t len) noexcept { - return get_default_implementation()->utf8_length_from_latin1(buf, len); -} -simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, - size_t length) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return utf8_length_from_utf16be(input, length); -#else - return utf8_length_from_utf16le(input, length); -#endif -} -simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf8_length_from_utf16le(input, length); -} -simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf8_length_from_utf16be(input, length); -} -simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, - size_t length) noexcept { -#if SIMDUTF_IS_BIG_ENDIAN - return utf32_length_from_utf16be(input, length); -#else - return utf32_length_from_utf16le(input, length); -#endif -} -simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf32_length_from_utf16le(input, length); -} -simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf32_length_from_utf16be(input, length); -} -simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, - size_t length) noexcept { - return get_default_implementation()->utf16_length_from_utf8(input, length); -} -simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept { - return get_default_implementation()->utf16_length_from_latin1(length); -} -simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, - size_t length) noexcept { - return get_default_implementation()->utf8_length_from_utf32(input, length); -} -simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, - size_t length) noexcept { - return get_default_implementation()->utf16_length_from_utf32(input, length); -} -simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, - size_t length) noexcept { - return get_default_implementation()->utf32_length_from_utf8(input, length); -} - -simdutf_warn_unused size_t -maximal_binary_length_from_base64(const char *input, size_t length) noexcept { - return get_default_implementation()->maximal_binary_length_from_base64( - input, length); + // Saturated math + simdutf_really_inline simd8 + saturating_add(const simd8 other) const { + return __lasx_xvsadd_bu(this->value, other); + } + simdutf_really_inline simd8 + saturating_sub(const simd8 other) const { + return __lasx_xvssub_bu(this->value, other); + } + + // Order-specific operations + simdutf_really_inline simd8 + max_val(const simd8 other) const { + return __lasx_xvmax_bu(*this, other); + } + simdutf_really_inline simd8 + min_val(const simd8 other) const { + return __lasx_xvmin_bu(*this, other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd8 + operator<=(const simd8 other) const { + return __lasx_xvsle_bu(*this, other); + } + simdutf_really_inline simd8 + operator>=(const simd8 other) const { + return __lasx_xvsle_bu(other, *this); + } + simdutf_really_inline simd8 + operator>(const simd8 other) const { + return __lasx_xvslt_bu(*this, other); + } + simdutf_really_inline simd8 + operator<(const simd8 other) const { + return __lasx_xvslt_bu(other, *this); + } + + // Bit-specific operations + simdutf_really_inline simd8 bits_not_set() const { + return *this == uint8_t(0); + } + simdutf_really_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdutf_really_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdutf_really_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdutf_really_inline bool is_ascii() const { + __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); + if (__lasx_xbnz_v(ascii_mask)) + return false; + return true; + } + simdutf_really_inline bool any_bits_set_anywhere() const { + if (__lasx_xbnz_v(this->value)) + return true; + return false; + } + simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const { + return (*this & bits).any_bits_set_anywhere(); + } + template simdutf_really_inline simd8 shr() const { + return __lasx_xvsrli_b(this->value, N); + } + template simdutf_really_inline simd8 shl() const { + return __lasx_xvslli_b(this->value, N); + } +}; +simdutf_really_inline simd8::operator simd8() const { + return this->value; } -simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return get_default_implementation()->base64_to_binary( - input, length, output, options, last_chunk_handling_options); -} +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, + "LASX kernel should use two registers per 64-byte block."); + simd8 chunks[NUM_CHUNKS]; -simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) noexcept { - return get_default_implementation()->maximal_binary_length_from_base64( - input, length); -} + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8 other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed -simdutf_warn_unused result base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return get_default_implementation()->base64_to_binary( - input, length, output, options, last_chunk_handling_options); -} + simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd8x64(const T *ptr) + : chunks{simd8::load(ptr), + simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} -template -simdutf_warn_unused result base64_to_binary_safe_impl( - const chartype *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - static_assert(std::is_same::value || - std::is_same::value, - "Only char and char16_t are supported."); - // The implementation could be nicer, but we expect that most times, the user - // will provide us with a buffer that is large enough. - size_t max_length = maximal_binary_length_from_base64(input, length); - if (outlen >= max_length) { - // fast path - full_result r = get_default_implementation()->base64_to_binary_details( - input, length, output, options, last_chunk_handling_options); - if (r.error != error_code::INVALID_BASE64_CHARACTER && - r.error != error_code::BASE64_EXTRA_BITS) { - outlen = r.output_count; - return {r.error, length}; - } - return r; + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); } - // The output buffer is maybe too small. We will decode a truncated version of - // the input. - size_t outlen3 = outlen / 3 * 3; // round down to multiple of 3 - size_t safe_input = base64_length_from_binary(outlen3, options); - full_result r = get_default_implementation()->base64_to_binary_details( - input, safe_input, output, options, loose); - if (r.error == error_code::INVALID_BASE64_CHARACTER) { - return r; + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); } - size_t offset = - (r.error == error_code::BASE64_INPUT_REMAINDER) - ? 1 - : ((r.output_count % 3) == 0 ? 0 : (r.output_count % 3) + 1); - size_t output_index = r.output_count - (r.output_count % 3); - size_t input_index = safe_input; - // offset is a value that is no larger than 3. We backtrack - // by up to offset characters + an undetermined number of - // white space characters. It is expected that the next loop - // runs at most 3 times + the number of white space characters - // in between them, so we are not worried about performance. - while (offset > 0 && input_index > 0) { - chartype c = input[--input_index]; - if (scalar::base64::is_ascii_white_space(c)) { - // skipping - } else { - offset--; - } + + simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { + this->chunks[0] |= other.chunks[0]; + this->chunks[1] |= other.chunks[1]; + return *this; } - size_t remaining_out = outlen - output_index; - const chartype *tail_input = input + input_index; - size_t tail_length = length - input_index; - while (tail_length > 0 && - scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { - tail_length--; + + simdutf_really_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; } - size_t padding_characts = 0; - if (tail_length > 0 && tail_input[tail_length - 1] == '=') { - tail_length--; - padding_characts++; - while (tail_length > 0 && - scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { - tail_length--; - } - if (tail_length > 0 && tail_input[tail_length - 1] == '=') { - tail_length--; - padding_characts++; - } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); } - // this will advance tail_input and tail_length - result rr = scalar::base64::base64_tail_decode_safe( - output + output_index, remaining_out, tail_input, tail_length, - padding_characts, options, last_chunk_handling_options); - outlen = output_index + remaining_out; - if (last_chunk_handling_options != stop_before_partial && - rr.error == error_code::SUCCESS && padding_characts > 0) { - // additional checks - if ((outlen % 3 == 0) || ((outlen % 3) + 1 + padding_characts != 4)) { - rr.error = error_code::INVALID_BASE64_CHARACTER; - } + + template + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 0); + this->chunks[1].template store_ascii_as_utf16(ptr + + sizeof(simd8) * 1); } - if (rr.error == error_code::SUCCESS && - last_chunk_handling_options == stop_before_partial) { - rr.count = tail_input - input; - return rr; + + simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { + this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); + this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); } - rr.count += input_index; - return rr; -} -simdutf_warn_unused size_t convert_latin1_to_utf8_safe( - const char *buf, size_t len, char *utf8_output, size_t utf8_len) noexcept { - const auto start{utf8_output}; + simdutf_really_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] | mask, this->chunks[1] | mask); + } - while (true) { - // convert_latin1_to_utf8 will never write more than input length * 2 - auto read_len = std::min(len, utf8_len >> 1); - if (read_len <= 16) { - break; - } + simdutf_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask) + .to_bitmask(); + } - const auto write_len = - simdutf::convert_latin1_to_utf8(buf, read_len, utf8_output); + simdutf_really_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1]) + .to_bitmask(); + } - utf8_output += write_len; - utf8_len -= write_len; - buf += read_len; - len -= read_len; + simdutf_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask) + .to_bitmask(); } - utf8_output += - scalar::latin1_to_utf8::convert_safe(buf, len, utf8_output, utf8_len); + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); - return utf8_output - start; -} + return simd8x64( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd8 mask_low = simd8::splat(low); + const simd8 mask_high = simd8::splat(high); + return simd8x64( + (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), + (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) + .to_bitmask(); + } -simdutf_warn_unused result base64_to_binary_safe( - const char *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return base64_to_binary_safe_impl(input, length, output, outlen, - options, last_chunk_handling_options); -} -simdutf_warn_unused result base64_to_binary_safe( - const char16_t *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return base64_to_binary_safe_impl( - input, length, output, outlen, options, last_chunk_handling_options); -} + simdutf_really_inline uint64_t gt(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask) + .to_bitmask(); + } + simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { + const simd8 mask = simd8::splat(m); + return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), + (simd8(__m256i(this->chunks[1])) >= mask)) + .to_bitmask(); + } +}; // struct simd8x64 -simdutf_warn_unused size_t -base64_length_from_binary(size_t length, base64_options options) noexcept { - return get_default_implementation()->base64_length_from_binary(length, - options); -} +/* begin file src/simdutf/lasx/simd16-inl.h */ +template struct simd16; -size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) noexcept { - return get_default_implementation()->binary_to_base64(input, length, output, - options); -} +template > +struct base16 : base> { + using bitmask_type = uint32_t; -simdutf_warn_unused simdutf::encoding_type -autodetect_encoding(const char *buf, size_t length) noexcept { - return get_default_implementation()->autodetect_encoding(buf, length); -} -simdutf_warn_unused int detect_encodings(const char *buf, - size_t length) noexcept { - return get_default_implementation()->detect_encodings(buf, length); -} -const implementation *builtin_implementation() { - static const implementation *builtin_impl = - get_available_implementations()[SIMDUTF_STRINGIFY( - SIMDUTF_BUILTIN_IMPLEMENTATION)]; - return builtin_impl; -} + simdutf_really_inline base16() : base>() {} + simdutf_really_inline base16(const __m256i _value) + : base>(_value) {} + template + simdutf_really_inline base16(const Pointer *ptr) + : base16(__lasx_xvld(reinterpret_cast(ptr), 0)) {} + friend simdutf_really_inline Mask operator==(const simd16 lhs, + const simd16 rhs) { + return __lasx_xvseq_h(lhs.value, rhs.value); + } -simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length) { - return scalar::utf8::trim_partial_utf8(input, length); -} + /// the size of vector in bytes + static const int SIZE = sizeof(base>::value); -simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, - size_t length) { - return scalar::utf16::trim_partial_utf16(input, length); -} + /// the number of elements of type T a vector can hold + static const int ELEMENTS = SIZE / sizeof(T); -simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, - size_t length) { - return scalar::utf16::trim_partial_utf16(input, length); -} + template + simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { + if (!N) + return this->value; + + __m256i zero = __lasx_xvldi(0); + __m256i result, shuf; + if (N < 8) { + shuf = __lasx_xvld(prev_shuf_table[N * 2], 0); + + result = __lasx_xvshuf_b( + __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, + shuf); + __m256i srl_prev = __lasx_xvbsrl_v( + __lasx_xvpermi_q(zero, prev_chunk, 0b00110001), (16 - N * 2)); + __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); + result = __lasx_xvbitsel_v(result, srl_prev, mask); + + return result; + } else if (N == 8) { + return __lasx_xvpermi_q(this->value, prev_chunk, 0b00100001); + } else { + __m256i sll_value = __lasx_xvbsll_v( + __lasx_xvpermi_q(zero, this->value, 0b00000011), (N * 2 - 16)); + __m256i mask = __lasx_xvld(bitsel_mask_table[N * 2], 0); + shuf = __lasx_xvld(prev_shuf_table[N * 2], 0); + result = + __lasx_xvshuf_b(__lasx_xvpermi_q(prev_chunk, prev_chunk, 0b00000001), + prev_chunk, shuf); + result = __lasx_xvbitsel_v(sll_value, result, mask); + return result; + } + } +}; -simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, - size_t length) { -#if SIMDUTF_IS_BIG_ENDIAN - return trim_partial_utf16be(input, length); -#else - return trim_partial_utf16le(input, length); -#endif -} +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd16 : base16 { + static simdutf_really_inline simd16 splat(bool _value) { + return __lasx_xvreplgr2vr_h(uint8_t(-(!!_value))); + } -} // namespace simdutf -/* end file src/implementation.cpp */ -/* begin file src/encoding_types.cpp */ + simdutf_really_inline simd16() : base16() {} + simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} + // Splat constructor + simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} -namespace simdutf { -bool match_system(endianness e) { -#if SIMDUTF_IS_BIG_ENDIAN - return e == endianness::BIG; -#else - return e == endianness::LITTLE; -#endif -} + simdutf_really_inline bitmask_type to_bitmask() const { + __m256i mask = __lasx_xvmsknz_b(this->value); + bitmask_type mask0 = __lasx_xvpickve2gr_wu(mask, 0); + bitmask_type mask1 = __lasx_xvpickve2gr_wu(mask, 4); + return (mask0 | (mask1 << 16)); + } + simdutf_really_inline bool any() const { + if (__lasx_xbz_v(this->value)) + return false; + return true; + } + simdutf_really_inline simd16 operator~() const { return *this ^ true; } +}; -std::string to_string(encoding_type bom) { - switch (bom) { - case UTF16_LE: - return "UTF16 little-endian"; - case UTF16_BE: - return "UTF16 big-endian"; - case UTF32_LE: - return "UTF32 little-endian"; - case UTF32_BE: - return "UTF32 big-endian"; - case UTF8: - return "UTF8"; - case unspecified: - return "unknown"; - default: - return "error"; +template struct base16_numeric : base16 { + static simdutf_really_inline simd16 splat(T _value) { + return __lasx_xvreplgr2vr_h((uint16_t)_value); + } + static simdutf_really_inline simd16 zero() { return __lasx_xvldi(0); } + static simdutf_really_inline simd16 load(const T values[8]) { + return __lasx_xvld(reinterpret_cast(values), 0); } -} -namespace BOM { -// Note that BOM for UTF8 is discouraged. -encoding_type check_bom(const uint8_t *byte, size_t length) { - if (length >= 2 && byte[0] == 0xff and byte[1] == 0xfe) { - if (length >= 4 && byte[2] == 0x00 and byte[3] == 0x0) { - return encoding_type::UTF32_LE; - } else { - return encoding_type::UTF16_LE; - } - } else if (length >= 2 && byte[0] == 0xfe and byte[1] == 0xff) { - return encoding_type::UTF16_BE; - } else if (length >= 4 && byte[0] == 0x00 and byte[1] == 0x00 and - byte[2] == 0xfe and byte[3] == 0xff) { - return encoding_type::UTF32_BE; - } else if (length >= 4 && byte[0] == 0xef and byte[1] == 0xbb and - byte[2] == 0xbf) { - return encoding_type::UTF8; + simdutf_really_inline base16_numeric() : base16() {} + simdutf_really_inline base16_numeric(const __m256i _value) + : base16(_value) {} + + // Store to array + simdutf_really_inline void store(T dst[8]) const { + return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); } - return encoding_type::unspecified; -} -encoding_type check_bom(const char *byte, size_t length) { - return check_bom(reinterpret_cast(byte), length); -} + // Override to distinguish from bool version + simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } -size_t bom_byte_size(encoding_type bom) { - switch (bom) { - case UTF16_LE: - return 2; - case UTF16_BE: - return 2; - case UTF32_LE: - return 4; - case UTF32_BE: - return 4; - case UTF8: - return 3; - case unspecified: - return 0; - default: - return 0; + // Addition/subtraction are the same for signed and unsigned + simdutf_really_inline simd16 operator+(const simd16 other) const { + return __lasx_xvadd_h(*this, other); } -} + simdutf_really_inline simd16 operator-(const simd16 other) const { + return __lasx_xvsub_h(*this, other); + } + simdutf_really_inline simd16 &operator+=(const simd16 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdutf_really_inline simd16 &operator-=(const simd16 other) { + *this = *this - other; + return *static_cast *>(this); + } +}; -} // namespace BOM +// Signed code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m256i _value) + : base16_numeric(_value) {} + // Splat constructor + simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + // Order-sensitive comparisons + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return __lasx_xvmax_h(*this, other); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return __lasx_xvmin_h(*this, other); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return __lasx_xvsle_h(other.value, this->value); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return __lasx_xvslt_h(this->value, other.value); + } +}; + +// Unsigned code units +template <> struct simd16 : base16_numeric { + simdutf_really_inline simd16() : base16_numeric() {} + simdutf_really_inline simd16(const __m256i _value) + : base16_numeric(_value) {} + + // Splat constructor + simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} + // Array constructor + simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} + simdutf_really_inline simd16(const char16_t *values) + : simd16(load(reinterpret_cast(values))) {} + + // Saturated math + simdutf_really_inline simd16 + saturating_add(const simd16 other) const { + return __lasx_xvsadd_hu(this->value, other.value); + } + simdutf_really_inline simd16 + saturating_sub(const simd16 other) const { + return __lasx_xvssub_hu(this->value, other.value); + } + + // Order-specific operations + simdutf_really_inline simd16 + max_val(const simd16 other) const { + return __lasx_xvmax_hu(this->value, other.value); + } + simdutf_really_inline simd16 + min_val(const simd16 other) const { + return __lasx_xvmin_hu(this->value, other.value); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + gt_bits(const simd16 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdutf_really_inline simd16 + lt_bits(const simd16 other) const { + return other.saturating_sub(*this); + } + simdutf_really_inline simd16 + operator<=(const simd16 other) const { + return __lasx_xvsle_hu(this->value, other.value); + } + simdutf_really_inline simd16 + operator>=(const simd16 other) const { + return __lasx_xvsle_hu(other.value, this->value); + } + simdutf_really_inline simd16 + operator>(const simd16 other) const { + return __lasx_xvslt_hu(other.value, this->value); + } + simdutf_really_inline simd16 + operator<(const simd16 other) const { + return __lasx_xvslt_hu(this->value, other.value); + } + + // Bit-specific operations + simdutf_really_inline simd16 bits_not_set() const { + return *this == uint16_t(0); + } + simdutf_really_inline simd16 bits_not_set(simd16 bits) const { + return (*this & bits).bits_not_set(); + } + simdutf_really_inline simd16 any_bits_set() const { + return ~this->bits_not_set(); + } + simdutf_really_inline simd16 any_bits_set(simd16 bits) const { + return ~this->bits_not_set(bits); + } + + simdutf_really_inline bool any_bits_set_anywhere() const { + if (__lasx_xbnz_v(this->value)) + return true; + return false; + } + simdutf_really_inline bool + any_bits_set_anywhere(simd16 bits) const { + return (*this & bits).any_bits_set_anywhere(); + } + + template simdutf_really_inline simd16 shr() const { + return simd16(__lasx_xvsrli_h(this->value, N)); + } + template simdutf_really_inline simd16 shl() const { + return simd16(__lasx_xvslli_h(this->value, N)); + } + + // Change the endianness + simdutf_really_inline simd16 swap_bytes() const { + return __lasx_xvshuf4i_b(this->value, 0b10110001); + } + + // Pack with the unsigned saturation of two uint16_t code units into single + // uint8_t vector + static simdutf_really_inline simd8 pack(const simd16 &v0, + const simd16 &v1) { + return __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(v1.value, v0.value, 0), + 0b11011000); + } +}; + +template struct simd16x32 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); + static_assert(NUM_CHUNKS == 2, + "LASX kernel should use two registers per 64-byte block."); + simd16 chunks[NUM_CHUNKS]; + + simd16x32(const simd16x32 &o) = delete; // no copy allowed + simd16x32 & + operator=(const simd16 other) = delete; // no assignment allowed + simd16x32() = delete; // no default constructor allowed + + simdutf_really_inline simd16x32(const simd16 chunk0, + const simd16 chunk1) + : chunks{chunk0, chunk1} {} + simdutf_really_inline simd16x32(const T *ptr) + : chunks{simd16::load(ptr), + simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} + + simdutf_really_inline void store(T *ptr) const { + this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); + this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); + } + + simdutf_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdutf_really_inline simd16 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdutf_really_inline bool is_ascii() const { + return this->reduce_or().is_ascii(); + } + + simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { + this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); + this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16)); + } + + simdutf_really_inline simd16x32 bit_or(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] | mask, this->chunks[1] | mask); + } + + simdutf_really_inline void swap_bytes() { + this->chunks[0] = this->chunks[0].swap_bytes(); + this->chunks[1] = this->chunks[1].swap_bytes(); + } + + simdutf_really_inline uint64_t eq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] == mask, this->chunks[1] == mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t eq(const simd16x32 &other) const { + return simd16x32(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1]) + .to_bitmask(); + } + + simdutf_really_inline uint64_t lteq(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) + .to_bitmask(); + } + + simdutf_really_inline uint64_t in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(low); + const simd16 mask_high = simd16::splat(high); + + return simd16x32( + (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), + (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { + const simd16 mask_low = simd16::splat(static_cast(low - 1)); + const simd16 mask_high = simd16::splat(static_cast(high + 1)); + return simd16x32( + (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), + (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low)) + .to_bitmask(); + } + simdutf_really_inline uint64_t lt(const T m) const { + const simd16 mask = simd16::splat(m); + return simd16x32(this->chunks[0] < mask, this->chunks[1] < mask) + .to_bitmask(); + } +}; // struct simd16x32 +/* end file src/simdutf/lasx/simd16-inl.h */ +} // namespace simd +} // unnamed namespace +} // namespace lasx } // namespace simdutf -/* end file src/encoding_types.cpp */ -/* begin file src/error.cpp */ -namespace simdutf { -// deliberately empty -} -/* end file src/error.cpp */ -// The large tables should be included once and they -// should not depend on a kernel. -/* begin file src/tables/utf8_to_utf16_tables.h */ -#ifndef SIMDUTF_UTF8_TO_UTF16_TABLES_H -#define SIMDUTF_UTF8_TO_UTF16_TABLES_H -#include + +#endif // SIMDUTF_LASX_SIMD_H +/* end file src/simdutf/lasx/simd.h */ + +/* begin file src/simdutf/lasx/end.h */ +/* end file src/simdutf/lasx/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_LASX + +#endif // SIMDUTF_LASX_H +/* end file src/simdutf/lasx.h */ +/* begin file src/simdutf/fallback.h */ +#ifndef SIMDUTF_FALLBACK_H +#define SIMDUTF_FALLBACK_H + + +// Note that fallback.h is always imported last. + +// Default Fallback to on unless a builtin implementation has already been +// selected. +#ifndef SIMDUTF_IMPLEMENTATION_FALLBACK + #if SIMDUTF_CAN_ALWAYS_RUN_ARM64 || SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || \ + SIMDUTF_CAN_ALWAYS_RUN_HASWELL || SIMDUTF_CAN_ALWAYS_RUN_WESTMERE || \ + SIMDUTF_CAN_ALWAYS_RUN_PPC64 || SIMDUTF_CAN_ALWAYS_RUN_RVV || \ + SIMDUTF_CAN_ALWAYS_RUN_LSX || SIMDUTF_CAN_ALWAYS_RUN_LASX + #define SIMDUTF_IMPLEMENTATION_FALLBACK 0 + #else + #define SIMDUTF_IMPLEMENTATION_FALLBACK 1 + #endif +#endif + +#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) + +#if SIMDUTF_IMPLEMENTATION_FALLBACK namespace simdutf { -namespace { -namespace tables { -namespace utf8_to_utf16 { /** - * utf8bigindex uses about 8 kB - * shufutf8 uses about 3344 B - * - * So we use a bit over 11 kB. It would be - * easy to save about 4 kB by only - * storing the index in utf8bigindex, and - * deriving the consumed bytes otherwise. - * However, this may come at a significant (10% to 20%) - * performance penalty. + * Fallback implementation (runs on any machine). */ +namespace fallback {} // namespace fallback +} // namespace simdutf -const uint8_t shufutf8[209][16] = { - {0, 255, 1, 255, 2, 255, 3, 255, 4, 255, 5, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 3, 255, 4, 255, 6, 5, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 3, 255, 5, 4, 6, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 3, 255, 5, 4, 7, 6, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 4, 3, 5, 255, 6, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 4, 3, 5, 255, 7, 6, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 4, 3, 6, 5, 7, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 4, 3, 6, 5, 8, 7, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 6, 255, 7, 255, 8, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 6, 255, 7, 255, 9, 8, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 6, 255, 8, 7, 9, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 6, 255, 8, 7, 10, 9, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 7, 6, 8, 255, 9, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 7, 6, 8, 255, 10, 9, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 0, 0, 0, 0}, - {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 4, 255, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 5, 4, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 6, 5, 4, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 5, 255, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 6, 5, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 7, 6, 5, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 6, 255, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 7, 6, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 8, 7, 6, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 5, 255, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 6, 5, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 7, 6, 5, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 6, 255, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 7, 6, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 8, 7, 6, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 7, 255, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 8, 7, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 9, 8, 7, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 4, 255, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 5, 4, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 6, 5, 4, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 5, 255, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 6, 5, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 7, 6, 5, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 6, 255, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 7, 6, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 8, 7, 6, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 5, 255, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 6, 5, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 7, 6, 5, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 6, 255, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 7, 6, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 8, 7, 6, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 7, 255, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 8, 7, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 9, 8, 7, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 6, 255, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 7, 6, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 8, 7, 6, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 7, 255, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 8, 7, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 9, 8, 7, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 8, 255, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 9, 8, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 10, 9, 8, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 5, 255, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 6, 5, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 7, 6, 5, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 6, 255, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 7, 6, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 8, 7, 6, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 7, 255, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 8, 7, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 9, 8, 7, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 6, 255, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 7, 6, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 8, 7, 6, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 7, 255, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 8, 7, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 9, 8, 7, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 8, 255, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 9, 8, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 10, 9, 8, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 7, 255, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 8, 7, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 9, 8, 7, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 8, 255, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 9, 8, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 10, 9, 8, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 9, 255, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 10, 9, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 11, 10, 9, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 1, 255, 255, 255, 5, 4, 3, 2, 0, 0, 0, 0}, - {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 2, 1, 255, 255, 6, 5, 4, 3, 0, 0, 0, 0}, - {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 3, 2, 1, 255, 7, 6, 5, 4, 0, 0, 0, 0}, - {0, 255, 255, 255, 4, 3, 2, 1, 5, 255, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 4, 3, 2, 1, 6, 5, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 4, 3, 2, 1, 7, 6, 5, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 4, 3, 2, 1, 8, 7, 6, 5, 0, 0, 0, 0}, - {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 2, 255, 255, 255, 6, 5, 4, 3, 0, 0, 0, 0}, - {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 3, 2, 255, 255, 7, 6, 5, 4, 0, 0, 0, 0}, - {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 4, 3, 2, 255, 8, 7, 6, 5, 0, 0, 0, 0}, - {1, 0, 255, 255, 5, 4, 3, 2, 6, 255, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 5, 4, 3, 2, 7, 6, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 5, 4, 3, 2, 8, 7, 6, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 5, 4, 3, 2, 9, 8, 7, 6, 0, 0, 0, 0}, - {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 3, 255, 255, 255, 7, 6, 5, 4, 0, 0, 0, 0}, - {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 4, 3, 255, 255, 8, 7, 6, 5, 0, 0, 0, 0}, - {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 5, 4, 3, 255, 9, 8, 7, 6, 0, 0, 0, 0}, - {2, 1, 0, 255, 6, 5, 4, 3, 7, 255, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 6, 5, 4, 3, 8, 7, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 6, 5, 4, 3, 9, 8, 7, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 6, 5, 4, 3, 10, 9, 8, 7, 0, 0, 0, 0}, - {3, 2, 1, 0, 4, 255, 255, 255, 5, 255, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 4, 255, 255, 255, 6, 5, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 4, 255, 255, 255, 7, 6, 5, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 4, 255, 255, 255, 8, 7, 6, 5, 0, 0, 0, 0}, - {3, 2, 1, 0, 5, 4, 255, 255, 6, 255, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 5, 4, 255, 255, 7, 6, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 5, 4, 255, 255, 8, 7, 6, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 5, 4, 255, 255, 9, 8, 7, 6, 0, 0, 0, 0}, - {3, 2, 1, 0, 6, 5, 4, 255, 7, 255, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 6, 5, 4, 255, 8, 7, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 6, 5, 4, 255, 9, 8, 7, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 6, 5, 4, 255, 10, 9, 8, 7, 0, 0, 0, 0}, - {3, 2, 1, 0, 7, 6, 5, 4, 8, 255, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 7, 6, 5, 4, 9, 8, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 7, 6, 5, 4, 10, 9, 8, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 0, 0, 0, 0}}; -/* number of two bytes : 64 */ -/* number of two + three bytes : 145 */ -/* number of two + three + four bytes : 209 */ -const uint8_t utf8bigindex[4096][2] = { - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, - {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, - {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, - {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, - {148, 6}, {209, 12}, {151, 6}, {163, 6}, {66, 6}, {209, 12}, {154, 6}, - {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, - {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, - {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {152, 7}, +/* begin file src/simdutf/fallback/implementation.h */ +#ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H +#define SIMDUTF_FALLBACK_IMPLEMENTATION_H + + +namespace simdutf { +namespace fallback { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() + : simdutf::implementation("fallback", "Generic fallback implementation", + 0) {} + simdutf_warn_unused int detect_encodings(const char *input, + size_t length) const noexcept final; + simdutf_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_ascii(const char *buf, + size_t len) const noexcept final; + simdutf_warn_unused result + validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) const noexcept final; + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused result + convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, + char16_t *utf16_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + simdutf_warn_unused size_t + convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_buffer) const noexcept final; + void change_endianness_utf16(const char16_t *buf, size_t length, + char16_t *output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t count_utf8(const char *buf, + size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t + latin1_length_from_utf16(size_t length) const noexcept; + simdutf_warn_unused size_t + latin1_length_from_utf32(size_t length) const noexcept; + simdutf_warn_unused size_t + utf32_length_from_latin1(size_t length) const noexcept; + simdutf_warn_unused size_t + utf16_length_from_latin1(size_t length) const noexcept; + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *input, size_t length) const noexcept; + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept; + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept; + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept; + simdutf_warn_unused size_t base64_length_from_binary( + size_t length, base64_options options) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept; +}; +} // namespace fallback +} // namespace simdutf + +#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H +/* end file src/simdutf/fallback/implementation.h */ + +/* begin file src/simdutf/fallback/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "fallback" +// #define SIMDUTF_IMPLEMENTATION fallback +/* end file src/simdutf/fallback/begin.h */ + + // Declarations +/* begin file src/simdutf/fallback/bitmanipulation.h */ +#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H +#define SIMDUTF_FALLBACK_BITMANIPULATION_H + +#include + +namespace simdutf { +namespace fallback { +namespace {} // unnamed namespace +} // namespace fallback +} // namespace simdutf + +#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H +/* end file src/simdutf/fallback/bitmanipulation.h */ + +/* begin file src/simdutf/fallback/end.h */ +/* end file src/simdutf/fallback/end.h */ + +#endif // SIMDUTF_IMPLEMENTATION_FALLBACK +#endif // SIMDUTF_FALLBACK_H +/* end file src/simdutf/fallback.h */ + +/* begin file src/scalar/utf8.h */ +#ifndef SIMDUTF_UTF8_H +#define SIMDUTF_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8 { +#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_RVV +// only used by the fallback kernel. +// credit: based on code from Google Fuchsia (Apache Licensed) +inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { + const uint8_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 16 bytes are ascii. + uint64_t next_pos = pos + 16; + if (next_pos <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + + while (byte < 0b10000000) { + if (++pos == len) { + return true; + } + byte = data[pos]; + } + + if ((byte & 0b11100000) == 0b11000000) { + next_pos = pos + 2; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if ((code_point < 0x80) || (0x7ff < code_point)) { + return false; + } + } else if ((byte & 0b11110000) == 0b11100000) { + next_pos = pos + 3; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = (byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point) || + (0xd7ff < code_point && code_point < 0xe000)) { + return false; + } + } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { + return false; + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return false; + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return false; + } + // range check + code_point = + (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return false; + } + } else { + // we may have a continuation + return false; + } + pos = next_pos; + } + return true; +} +#endif + +inline simdutf_warn_unused result validate_with_errors(const char *buf, + size_t len) noexcept { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 16 bytes are ascii. + size_t next_pos = pos + 16; + if (next_pos <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + + while (byte < 0b10000000) { + if (++pos == len) { + return result(error_code::SUCCESS, len); + } + byte = data[pos]; + } + + if ((byte & 0b11100000) == 0b11000000) { + next_pos = pos + 2; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if ((code_point < 0x80) || (0x7ff < code_point)) { + return result(error_code::OVERLONG, pos); + } + } else if ((byte & 0b11110000) == 0b11100000) { + next_pos = pos + 3; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = (byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point)) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + code_point = + (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + pos = next_pos; + } + return result(error_code::SUCCESS, len); +} + +// Finds the previous leading byte starting backward from buf and validates with +// errors from there Used to pinpoint the location of an error when an invalid +// chunk is detected We assume that the stream starts with a leading byte, and +// to check that it is the case, we ask that you pass a pointer to the start of +// the stream (start). +inline simdutf_warn_unused result rewind_and_validate_with_errors( + const char *start, const char *buf, size_t len) noexcept { + // First check that we start with a leading byte + if ((*start & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, 0); + } + size_t extra_len{0}; + // A leading byte cannot be further than 4 bytes away + for (int i = 0; i < 5; i++) { + unsigned char byte = *buf; + if ((byte & 0b11000000) != 0b10000000) { + break; + } else { + buf--; + extra_len++; + } + } + + result res = validate_with_errors(buf, len + extra_len); + res.count -= extra_len; + return res; +} + +inline size_t count_code_points(const char *buf, size_t len) { + const int8_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + // -65 is 0b10111111, anything larger in two-complement's should start a new + // code point. + if (p[i] > -65) { + counter++; + } + } + return counter; +} + +inline size_t utf16_length_from_utf8(const char *buf, size_t len) { + const int8_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + if (p[i] > -65) { + counter++; + } + if (uint8_t(p[i]) >= 240) { + counter++; + } + } + return counter; +} + +simdutf_warn_unused inline size_t trim_partial_utf8(const char *input, + size_t length) { + if (length < 3) { + switch (length) { + case 2: + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 2]) >= 0xe0) { + return length - 2; + } // 3- and 4-byte characters with only 2 bytes left + return length; + case 1: + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + return length; + case 0: + return length; + } + } + if (uint8_t(input[length - 1]) >= 0xc0) { + return length - 1; + } // 2-, 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 2]) >= 0xe0) { + return length - 2; + } // 3- and 4-byte characters with only 1 byte left + if (uint8_t(input[length - 3]) >= 0xf0) { + return length - 3; + } // 4-byte characters with only 3 bytes left + return length; +} + +} // namespace utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf8.h */ +/* begin file src/scalar/utf16.h */ +#ifndef SIMDUTF_UTF16_H +#define SIMDUTF_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16 { + +inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) { + return uint16_t((word >> 8) | (word << 8)); +} + +template +inline simdutf_warn_unused bool validate(const char16_t *buf, + size_t len) noexcept { + const uint16_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) == 0xD800) { + if (pos + 1 >= len) { + return false; + } + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return false; + } + uint16_t next_word = + !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return false; + } + pos += 2; + } else { + pos++; + } + } + return true; +} + +template +inline simdutf_warn_unused result validate_with_errors(const char16_t *buf, + size_t len) noexcept { + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) == 0xD800) { + if (pos + 1 >= len) { + return result(error_code::SURROGATE, pos); + } + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + uint16_t next_word = + !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + pos += 2; + } else { + pos++; + } + } + return result(error_code::SUCCESS, pos); +} + +template +inline size_t count_code_points(const char16_t *buf, size_t len) { + // We are not BOM aware. + const uint16_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; + counter += ((word & 0xFC00) != 0xDC00); + } + return counter; +} + +template +inline size_t utf8_length_from_utf16(const char16_t *buf, size_t len) { + // We are not BOM aware. + const uint16_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; + counter++; // ASCII + counter += static_cast( + word > + 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes + counter += static_cast((word > 0x7FF && word <= 0xD7FF) || + (word >= 0xE000)); // three-byte + } + return counter; +} + +template +inline size_t utf32_length_from_utf16(const char16_t *buf, size_t len) { + // We are not BOM aware. + const uint16_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; + counter += ((word & 0xFC00) != 0xDC00); + } + return counter; +} + +inline size_t latin1_length_from_utf16(size_t len) { return len; } + +simdutf_really_inline void change_endianness_utf16(const char16_t *in, + size_t size, char16_t *out) { + const uint16_t *input = reinterpret_cast(in); + uint16_t *output = reinterpret_cast(out); + for (size_t i = 0; i < size; i++) { + *output++ = uint16_t(input[i] >> 8 | input[i] << 8); + } +} + +template +simdutf_warn_unused inline size_t trim_partial_utf16(const char16_t *input, + size_t length) { + if (length <= 1) { + return length; + } + uint16_t last_word = uint16_t(input[length - 1]); + last_word = !match_system(big_endian) ? swap_bytes(last_word) : last_word; + length -= ((last_word & 0xFC00) == 0xD800); + return length; +} + +} // namespace utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf16.h */ +/* begin file src/scalar/utf32.h */ +#ifndef SIMDUTF_UTF32_H +#define SIMDUTF_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32 { + +inline simdutf_warn_unused bool validate(const char32_t *buf, + size_t len) noexcept { + const uint32_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + for (; pos < len; pos++) { + uint32_t word = data[pos]; + if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { + return false; + } + } + return true; +} + +inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, + size_t len) noexcept { + const uint32_t *data = reinterpret_cast(buf); + size_t pos = 0; + for (; pos < len; pos++) { + uint32_t word = data[pos]; + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + } + return result(error_code::SUCCESS, pos); +} + +inline size_t utf8_length_from_utf32(const char32_t *buf, size_t len) { + // We are not BOM aware. + const uint32_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + // credit: @ttsugriy for the vectorizable approach + counter++; // ASCII + counter += static_cast(p[i] > 0x7F); // two-byte + counter += static_cast(p[i] > 0x7FF); // three-byte + counter += static_cast(p[i] > 0xFFFF); // four-bytes + } + return counter; +} + +inline size_t utf16_length_from_utf32(const char32_t *buf, size_t len) { + // We are not BOM aware. + const uint32_t *p = reinterpret_cast(buf); + size_t counter{0}; + for (size_t i = 0; i < len; i++) { + counter++; // non-surrogate word + counter += static_cast(p[i] > 0xFFFF); // surrogate pair + } + return counter; +} + +inline size_t latin1_length_from_utf32(size_t len) { + // We are not BOM aware. + return len; // a utf32 codepoint will always represent 1 latin1 character +} + +inline simdutf_warn_unused uint32_t swap_bytes(const uint32_t word) { + return ((word >> 24) & 0xff) | // move byte 3 to byte 0 + ((word << 8) & 0xff0000) | // move byte 1 to byte 2 + ((word >> 8) & 0xff00) | // move byte 2 to byte 1 + ((word << 24) & 0xff000000); // byte 0 to byte 3 +} + +} // namespace utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf32.h */ +/* begin file src/scalar/base64.h */ +#ifndef SIMDUTF_BASE64_H +#define SIMDUTF_BASE64_H + +#include +#include +#include +#include + +namespace simdutf { +namespace scalar { +namespace { +namespace base64 { + +// This function is not expected to be fast. Do not use in long loops. +template bool is_ascii_white_space(char_type c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; +} + +template bool is_ascii_white_space_or_padding(char_type c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || + c == '='; +} + +template bool is_eight_byte(char_type c) { + if (sizeof(char_type) == 1) { + return true; + } + return uint8_t(c) == c; +} + +// Returns true upon success. The destination buffer must be large enough. +// This functions assumes that the padding (=) has been removed. +template +full_result +base64_tail_decode(char *dst, const char_type *src, size_t length, + size_t padded_characters, // number of padding characters + // '=', typically 0, 1, 2. + base64_options options, + last_chunk_handling_options last_chunk_options) { + // This looks like 5 branches, but we expect the compiler to resolve this to a + // single branch: + const uint8_t *to_base64 = (options & base64_url) + ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + const uint32_t *d0 = (options & base64_url) + ? tables::base64::base64_url::d0 + : tables::base64::base64_default::d0; + const uint32_t *d1 = (options & base64_url) + ? tables::base64::base64_url::d1 + : tables::base64::base64_default::d1; + const uint32_t *d2 = (options & base64_url) + ? tables::base64::base64_url::d2 + : tables::base64::base64_default::d2; + const uint32_t *d3 = (options & base64_url) + ? tables::base64::base64_url::d3 + : tables::base64::base64_default::d3; + + const char_type *srcend = src + length; + const char_type *srcinit = src; + const char *dstinit = dst; + + uint32_t x; + size_t idx; + uint8_t buffer[4]; + while (true) { + while (src + 4 <= srcend && is_eight_byte(src[0]) && + is_eight_byte(src[1]) && is_eight_byte(src[2]) && + is_eight_byte(src[3]) && + (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | + d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { + if (match_system(endianness::BIG)) { + x = scalar::utf32::swap_bytes(x); + } + std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes + dst += 3; + src += 4; + } + idx = 0; + // we need at least four characters. + while (idx < 4 && src < srcend) { + char_type c = *src; + uint8_t code = to_base64[uint8_t(c)]; + buffer[idx] = uint8_t(code); + if (is_eight_byte(c) && code <= 63) { + idx++; + } else if (code > 64 || !scalar::base64::is_eight_byte(c)) { + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else { + // We have a space or a newline. We ignore it. + } + src++; + } + if (idx != 4) { + if (last_chunk_options == last_chunk_handling_options::strict && + (idx != 1) && ((idx + padded_characters) & 3) != 0) { + // The partial chunk was at src - idx + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial && + (idx != 1) && ((idx + padded_characters) & 3) != 0) { + // Rewind src to before partial chunk + src -= idx; + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } else { + if (idx == 2) { + uint32_t triple = + (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6); + if ((last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xffff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (match_system(endianness::BIG)) { + triple <<= 8; + std::memcpy(dst, &triple, 1); + } else { + triple = scalar::utf32::swap_bytes(triple); + triple >>= 8; + std::memcpy(dst, &triple, 1); + } + dst += 1; + } else if (idx == 3) { + uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + + (uint32_t(buffer[1]) << 2 * 6) + + (uint32_t(buffer[2]) << 1 * 6); + if ((last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (match_system(endianness::BIG)) { + triple <<= 8; + std::memcpy(dst, &triple, 2); + } else { + triple = scalar::utf32::swap_bytes(triple); + triple >>= 8; + std::memcpy(dst, &triple, 2); + } + dst += 2; + } else if (idx == 1) { + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } + } + + uint32_t triple = + (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + + (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6); + if (match_system(endianness::BIG)) { + triple <<= 8; + std::memcpy(dst, &triple, 3); + } else { + triple = scalar::utf32::swap_bytes(triple); + triple >>= 8; + std::memcpy(dst, &triple, 3); + } + dst += 3; + } +} + +// like base64_tail_decode, but it will not write past the end of the output +// buffer. The outlen paramter is modified to reflect the number of bytes +// written. This functions assumes that the padding (=) has been removed. +template +result base64_tail_decode_safe( + char *dst, size_t &outlen, const char_type *&srcr, size_t length, + size_t padded_characters, // number of padding characters '=', typically 0, + // 1, 2. + base64_options options, last_chunk_handling_options last_chunk_options) { + const char_type *src = srcr; + if (length == 0) { + outlen = 0; + return {SUCCESS, 0}; + } + // This looks like 5 branches, but we expect the compiler to resolve this to a + // single branch: + const uint8_t *to_base64 = (options & base64_url) + ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + const uint32_t *d0 = (options & base64_url) + ? tables::base64::base64_url::d0 + : tables::base64::base64_default::d0; + const uint32_t *d1 = (options & base64_url) + ? tables::base64::base64_url::d1 + : tables::base64::base64_default::d1; + const uint32_t *d2 = (options & base64_url) + ? tables::base64::base64_url::d2 + : tables::base64::base64_default::d2; + const uint32_t *d3 = (options & base64_url) + ? tables::base64::base64_url::d3 + : tables::base64::base64_default::d3; + + const char_type *srcend = src + length; + const char_type *srcinit = src; + const char *dstinit = dst; + const char *dstend = dst + outlen; + + uint32_t x; + size_t idx; + uint8_t buffer[4]; + while (true) { + while (src + 4 <= srcend && is_eight_byte(src[0]) && + is_eight_byte(src[1]) && is_eight_byte(src[2]) && + is_eight_byte(src[3]) && + (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | + d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { + if (dstend - dst < 3) { + outlen = size_t(dst - dstinit); + srcr = src; + return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit)}; + } + if (match_system(endianness::BIG)) { + x = scalar::utf32::swap_bytes(x); + } + std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes + dst += 3; + src += 4; + } + idx = 0; + const char_type *srccur = src; + // We need at least four characters. + while (idx < 4 && src < srcend) { + char_type c = *src; + uint8_t code = to_base64[uint8_t(c)]; + + buffer[idx] = uint8_t(code); + if (is_eight_byte(c) && code <= 63) { + idx++; + } else if (code > 64 || !scalar::base64::is_eight_byte(c)) { + outlen = size_t(dst - dstinit); + srcr = src; + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + } else { + // We have a space or a newline. We ignore it. + } + src++; + } + if (idx != 4) { + if (last_chunk_options == last_chunk_handling_options::strict && + ((idx + padded_characters) & 3) != 0) { + outlen = size_t(dst - dstinit); + srcr = src; + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial && + ((idx + padded_characters) & 3) != 0) { + // Rewind src to before partial chunk + srcr = srccur; + outlen = size_t(dst - dstinit); + return {SUCCESS, size_t(dst - dstinit)}; + } else { // loose mode + if (idx == 0) { + // No data left; return success + outlen = size_t(dst - dstinit); + srcr = src; + return {SUCCESS, size_t(dst - dstinit)}; + } else if (idx == 1) { + // Error: Incomplete chunk of length 1 is invalid in loose mode + outlen = size_t(dst - dstinit); + srcr = src; + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; + } else if (idx == 2 || idx == 3) { + // Check if there's enough space in the destination buffer + size_t required_space = (idx == 2) ? 1 : 2; + if (size_t(dstend - dst) < required_space) { + outlen = size_t(dst - dstinit); + srcr = src; + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; + } + uint32_t triple = 0; + if (idx == 2) { + triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12); + if ((last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xffff)) { + srcr = src; + return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; + } + // Extract the first byte + triple >>= 16; + dst[0] = static_cast(triple & 0xFF); + dst += 1; + } else if (idx == 3) { + triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12) + + (uint32_t(buffer[2]) << 6); + if ((last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xff)) { + srcr = src; + return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; + } + // Extract the first two bytes + triple >>= 8; + dst[0] = static_cast((triple >> 8) & 0xFF); + dst[1] = static_cast(triple & 0xFF); + dst += 2; + } + outlen = size_t(dst - dstinit); + srcr = src; + return {SUCCESS, size_t(dst - dstinit)}; + } + } + } + + if (dstend - dst < 3) { + outlen = size_t(dst - dstinit); + srcr = src; + return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; + } + uint32_t triple = (uint32_t(buffer[0]) << 18) + + (uint32_t(buffer[1]) << 12) + (uint32_t(buffer[2]) << 6) + + (uint32_t(buffer[3])); + if (match_system(endianness::BIG)) { + triple <<= 8; + std::memcpy(dst, &triple, 3); + } else { + triple = scalar::utf32::swap_bytes(triple); + triple >>= 8; + std::memcpy(dst, &triple, 3); + } + dst += 3; + } +} + +// Returns the number of bytes written. The destination buffer must be large +// enough. It will add padding (=) if needed. +size_t tail_encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + // By default, we use padding if we are not using the URL variant. + // This is check with ((options & base64_url) == 0) which returns true if we + // are not using the URL variant. However, we also allow 'inversion' of the + // convention with the base64_reverse_padding option. If the + // base64_reverse_padding option is set, we use padding if we are using the + // URL variant, and we omit it if we are not using the URL variant. This is + // checked with + // ((options & base64_reverse_padding) == base64_reverse_padding). + bool use_padding = + ((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding); + // This looks like 3 branches, but we expect the compiler to resolve this to + // a single branch: + const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 + : tables::base64::base64_default::e0; + const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 + : tables::base64::base64_default::e1; + const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2 + : tables::base64::base64_default::e2; + char *out = dst; + size_t i = 0; + uint8_t t1, t2, t3; + for (; i + 2 < srclen; i += 3) { + t1 = uint8_t(src[i]); + t2 = uint8_t(src[i + 1]); + t3 = uint8_t(src[i + 2]); + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; + *out++ = e2[t3]; + } + switch (srclen - i) { + case 0: + break; + case 1: + t1 = uint8_t(src[i]); + *out++ = e0[t1]; + *out++ = e1[(t1 & 0x03) << 4]; + if (use_padding) { + *out++ = '='; + *out++ = '='; + } + break; + default: /* case 2 */ + t1 = uint8_t(src[i]); + t2 = uint8_t(src[i + 1]); + *out++ = e0[t1]; + *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; + *out++ = e2[(t2 & 0x0F) << 2]; + if (use_padding) { + *out++ = '='; + } + } + return (size_t)(out - dst); +} + +template +simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char_type *input, size_t length) noexcept { + // We follow https://infra.spec.whatwg.org/#forgiving-base64-decode + size_t padding = 0; + if (length > 0) { + if (input[length - 1] == '=') { + padding++; + if (length > 1 && input[length - 2] == '=') { + padding++; + } + } + } + size_t actual_length = length - padding; + if (actual_length % 4 <= 1) { + return actual_length / 4 * 3; + } + // if we have a valid input, then the remainder must be 2 or 3 adding one or + // two extra bytes. + return actual_length / 4 * 3 + (actual_length % 4) - 1; +} + +simdutf_warn_unused size_t +base64_length_from_binary(size_t length, base64_options options) noexcept { + // By default, we use padding if we are not using the URL variant. + // This is check with ((options & base64_url) == 0) which returns true if we + // are not using the URL variant. However, we also allow 'inversion' of the + // convention with the base64_reverse_padding option. If the + // base64_reverse_padding option is set, we use padding if we are using the + // URL variant, and we omit it if we are not using the URL variant. This is + // checked with + // ((options & base64_reverse_padding) == base64_reverse_padding). + bool use_padding = + ((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding); + if (!use_padding) { + return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0); + } + return (length + 2) / 3 * + 4; // We use padding to make the length a multiple of 4. +} + +} // namespace base64 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/base64.h */ +/* begin file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF8_H +#define SIMDUTF_LATIN1_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf8 { + +inline size_t convert(const char *buf, size_t len, char *utf8_output) { + const unsigned char *data = reinterpret_cast(buf); + size_t pos = 0; + size_t utf8_pos = 0; + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + utf8_output[utf8_pos++] = char(buf[pos]); + pos++; + } + continue; + } + } + + unsigned char byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } + } + return utf8_pos; +} + +inline size_t convert_safe(const char *buf, size_t len, char *utf8_output, + size_t utf8_len) { + const unsigned char *data = reinterpret_cast(buf); + size_t pos = 0; + size_t skip_pos = 0; + size_t utf8_pos = 0; + while (pos < len && utf8_pos < utf8_len) { + // try to convert the next block of 16 ASCII bytes + if (pos >= skip_pos && pos + 16 <= len && + utf8_pos + 16 <= utf8_len) { // if it is safe to read 16 more bytes, + // check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + ::memcpy(utf8_output + utf8_pos, buf + pos, 16); + utf8_pos += 16; + pos += 16; + } else { + // At least one of the next 16 bytes are not ASCII, we will process them + // one by one + skip_pos = pos + 16; + } + } else { + const auto byte = data[pos]; + if ((byte & 0x80) == 0) { // if ASCII + // will generate one UTF-8 bytes + utf8_output[utf8_pos++] = char(byte); + pos++; + } else if (utf8_pos + 2 <= utf8_len) { + // will generate two UTF-8 bytes + utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); + utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); + pos++; + } else { + break; + } + } + } + return utf8_pos; +} + +} // namespace latin1_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ + +namespace simdutf { +bool implementation::supported_by_runtime_system() const { + uint32_t required_instruction_sets = this->required_instruction_sets(); + uint32_t supported_instruction_sets = + internal::detect_supported_architectures(); + return ((supported_instruction_sets & required_instruction_sets) == + required_instruction_sets); +} + +simdutf_warn_unused encoding_type implementation::autodetect_encoding( + const char *input, size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + // UTF8 is common, it includes ASCII, and is commonly represented + // without a BOM, so if it fits, go with that. Note that it is still + // possible to get it wrong, we are only 'guessing'. If some has UTF-16 + // data without a BOM, it could pass as UTF-8. + // + // An interesting twist might be to check for UTF-16 ASCII first (every + // other byte is zero). + if (validate_utf8(input, length)) { + return encoding_type::UTF8; + } + // The next most common encoding that might appear without BOM is probably + // UTF-16LE, so try that next. + if ((length % 2) == 0) { + // important: we need to divide by two + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + return encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + return encoding_type::UTF32_LE; + } + } + return encoding_type::unspecified; +} + +namespace internal { +// When there is a single implementation, we should not pay a price +// for dispatching to the best implementation. We should just use the +// one we have. This is a compile-time check. +#define SIMDUTF_SINGLE_IMPLEMENTATION \ + (SIMDUTF_IMPLEMENTATION_ICELAKE + SIMDUTF_IMPLEMENTATION_HASWELL + \ + SIMDUTF_IMPLEMENTATION_WESTMERE + SIMDUTF_IMPLEMENTATION_ARM64 + \ + SIMDUTF_IMPLEMENTATION_PPC64 + SIMDUTF_IMPLEMENTATION_LSX + \ + SIMDUTF_IMPLEMENTATION_LASX + SIMDUTF_IMPLEMENTATION_FALLBACK == \ + 1) + +// Static array of known implementations. We are hoping these get baked into the +// executable without requiring a static initializer. + +#if SIMDUTF_IMPLEMENTATION_ICELAKE +static const icelake::implementation *get_icelake_singleton() { + static const icelake::implementation icelake_singleton{}; + return &icelake_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_HASWELL +static const haswell::implementation *get_haswell_singleton() { + static const haswell::implementation haswell_singleton{}; + return &haswell_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_WESTMERE +static const westmere::implementation *get_westmere_singleton() { + static const westmere::implementation westmere_singleton{}; + return &westmere_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_ARM64 +static const arm64::implementation *get_arm64_singleton() { + static const arm64::implementation arm64_singleton{}; + return &arm64_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_PPC64 +static const ppc64::implementation *get_ppc64_singleton() { + static const ppc64::implementation ppc64_singleton{}; + return &ppc64_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_RVV +static const rvv::implementation *get_rvv_singleton() { + static const rvv::implementation rvv_singleton{}; + return &rvv_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_LSX +static const lsx::implementation *get_lsx_singleton() { + static const lsx::implementation lsx_singleton{}; + return &lsx_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_LASX +static const lasx::implementation *get_lasx_singleton() { + static const lasx::implementation lasx_singleton{}; + return &lasx_singleton; +} +#endif +#if SIMDUTF_IMPLEMENTATION_FALLBACK +static const fallback::implementation *get_fallback_singleton() { + static const fallback::implementation fallback_singleton{}; + return &fallback_singleton; +} +#endif + +#if SIMDUTF_SINGLE_IMPLEMENTATION +static const implementation *get_single_implementation() { + return + #if SIMDUTF_IMPLEMENTATION_ICELAKE + get_icelake_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_HASWELL + get_haswell_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_WESTMERE + get_westmere_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_ARM64 + get_arm64_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_PPC64 + get_ppc64_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_LSX + get_lsx_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_LASX + get_lasx_singleton(); + #endif + #if SIMDUTF_IMPLEMENTATION_FALLBACK + get_fallback_singleton(); + #endif +} +#endif + +/** + * @private Detects best supported implementation on first use, and sets it + */ +class detect_best_supported_implementation_on_first_use final + : public implementation { +public: + std::string name() const noexcept final { return set_best()->name(); } + std::string description() const noexcept final { + return set_best()->description(); + } + uint32_t required_instruction_sets() const noexcept final { + return set_best()->required_instruction_sets(); + } + + simdutf_warn_unused int + detect_encodings(const char *input, size_t length) const noexcept override { + return set_best()->detect_encodings(input, length); + } + + simdutf_warn_unused bool + validate_utf8(const char *buf, size_t len) const noexcept final override { + return set_best()->validate_utf8(buf, len); + } + + simdutf_warn_unused result validate_utf8_with_errors( + const char *buf, size_t len) const noexcept final override { + return set_best()->validate_utf8_with_errors(buf, len); + } + + simdutf_warn_unused bool + validate_ascii(const char *buf, size_t len) const noexcept final override { + return set_best()->validate_ascii(buf, len); + } + + simdutf_warn_unused result validate_ascii_with_errors( + const char *buf, size_t len) const noexcept final override { + return set_best()->validate_ascii_with_errors(buf, len); + } + + simdutf_warn_unused bool + validate_utf16le(const char16_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf16le(buf, len); + } + + simdutf_warn_unused bool + validate_utf16be(const char16_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf16be(buf, len); + } + + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept final override { + return set_best()->validate_utf16le_with_errors(buf, len); + } + + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept final override { + return set_best()->validate_utf16be_with_errors(buf, len); + } + + simdutf_warn_unused bool + validate_utf32(const char32_t *buf, + size_t len) const noexcept final override { + return set_best()->validate_utf32(buf, len); + } + + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept final override { + return set_best()->validate_utf32_with_errors(buf, len); + } + + simdutf_warn_unused size_t + convert_latin1_to_utf8(const char *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_latin1_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_latin1_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_latin1_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, + char32_t *latin1_output) const noexcept final override { + return set_best()->convert_latin1_to_utf32(buf, len, latin1_output); + } + + simdutf_warn_unused size_t + convert_utf8_to_latin1(const char *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf8_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf8_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16le_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf8_to_utf16be_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused size_t + convert_utf8_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf8_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf8_to_utf32_with_errors(buf, len, + utf32_output); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused size_t + convert_utf16le_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16le_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused size_t + convert_utf16be_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16be_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16le_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf16be_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_valid_utf16le_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_valid_utf16be_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused size_t + convert_utf16le_to_utf8(const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused size_t + convert_utf16be_to_utf8(const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf8_with_errors(buf, len, + utf8_output); + } + + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf8_with_errors(buf, len, + utf8_output); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_valid_utf16le_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_valid_utf16be_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused size_t + convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused result convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf32_to_latin1_with_errors(buf, len, + latin1_output); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, + char *latin1_output) const noexcept final override { + return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); + } + + simdutf_warn_unused size_t + convert_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf32_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + } + + simdutf_warn_unused size_t + convert_valid_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) const noexcept final override { + return set_best()->convert_valid_utf32_to_utf8(buf, len, utf8_output); + } + + simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16le_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_utf32_to_utf16be_with_errors(buf, len, + utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf32_to_utf16le(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, + char16_t *utf16_output) const noexcept final override { + return set_best()->convert_valid_utf32_to_utf16be(buf, len, utf16_output); + } + + simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16le_to_utf32_with_errors(buf, len, + utf32_output); + } + + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_utf16be_to_utf32_with_errors(buf, len, + utf32_output); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_valid_utf16le_to_utf32(buf, len, utf32_output); + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, + char32_t *utf32_output) const noexcept final override { + return set_best()->convert_valid_utf16be_to_utf32(buf, len, utf32_output); + } + + void change_endianness_utf16(const char16_t *buf, size_t len, + char16_t *output) const noexcept final override { + set_best()->change_endianness_utf16(buf, len, output); + } + + simdutf_warn_unused size_t + count_utf16le(const char16_t *buf, size_t len) const noexcept final override { + return set_best()->count_utf16le(buf, len); + } + + simdutf_warn_unused size_t + count_utf16be(const char16_t *buf, size_t len) const noexcept final override { + return set_best()->count_utf16be(buf, len); + } + + simdutf_warn_unused size_t + count_utf8(const char *buf, size_t len) const noexcept final override { + return set_best()->count_utf8(buf, len); + } + + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *buf, size_t len) const noexcept override { + return set_best()->latin1_length_from_utf8(buf, len); + } + + simdutf_warn_unused size_t + latin1_length_from_utf16(size_t len) const noexcept override { + return set_best()->latin1_length_from_utf16(len); + } + + simdutf_warn_unused size_t + latin1_length_from_utf32(size_t len) const noexcept override { + return set_best()->latin1_length_from_utf32(len); + } + + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_latin1(buf, len); + } + + simdutf_warn_unused size_t utf8_length_from_utf16le( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_utf16le(buf, len); + } + + simdutf_warn_unused size_t utf8_length_from_utf16be( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_utf16be(buf, len); + } + + simdutf_warn_unused size_t + utf16_length_from_latin1(size_t len) const noexcept override { + return set_best()->utf16_length_from_latin1(len); + } + + simdutf_warn_unused size_t + utf32_length_from_latin1(size_t len) const noexcept override { + return set_best()->utf32_length_from_latin1(len); + } + + simdutf_warn_unused size_t utf32_length_from_utf16le( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf32_length_from_utf16le(buf, len); + } + + simdutf_warn_unused size_t utf32_length_from_utf16be( + const char16_t *buf, size_t len) const noexcept override { + return set_best()->utf32_length_from_utf16be(buf, len); + } + + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *buf, size_t len) const noexcept override { + return set_best()->utf16_length_from_utf8(buf, len); + } + + simdutf_warn_unused size_t utf8_length_from_utf32( + const char32_t *buf, size_t len) const noexcept override { + return set_best()->utf8_length_from_utf32(buf, len); + } + + simdutf_warn_unused size_t utf16_length_from_utf32( + const char32_t *buf, size_t len) const noexcept override { + return set_best()->utf16_length_from_utf32(buf, len); + } + + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *buf, size_t len) const noexcept override { + return set_best()->utf32_length_from_utf8(buf, len); + } + + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept override { + return set_best()->maximal_binary_length_from_base64(input, length); + } + + simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary_details(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept override { + return set_best()->maximal_binary_length_from_base64(input, length); + } + + simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary_details(input, length, output, options, + last_chunk_handling_options); + } + + simdutf_warn_unused size_t base64_length_from_binary( + size_t length, base64_options options) const noexcept override { + return set_best()->base64_length_from_binary(length, options); + } + + size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) const noexcept override { + return set_best()->binary_to_base64(input, length, output, options); + } + + simdutf_really_inline + detect_best_supported_implementation_on_first_use() noexcept + : implementation("best_supported_detector", + "Detects the best supported implementation and sets it", + 0) {} + +private: + const implementation *set_best() const noexcept; +}; + +static_assert(std::is_trivially_destructible< + detect_best_supported_implementation_on_first_use>::value, + "detect_best_supported_implementation_on_first_use should be " + "trivially destructible"); + +static const std::initializer_list & +get_available_implementation_pointers() { + static const std::initializer_list + available_implementation_pointers{ +#if SIMDUTF_IMPLEMENTATION_ICELAKE + get_icelake_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_HASWELL + get_haswell_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_WESTMERE + get_westmere_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_ARM64 + get_arm64_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_PPC64 + get_ppc64_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_RVV + get_rvv_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_LSX + get_lsx_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_LASX + get_lasx_singleton(), +#endif +#if SIMDUTF_IMPLEMENTATION_FALLBACK + get_fallback_singleton(), +#endif + }; // available_implementation_pointers + return available_implementation_pointers; +} + +// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no +// support +class unsupported_implementation final : public implementation { +public: + simdutf_warn_unused int detect_encodings(const char *, + size_t) const noexcept override { + return encoding_type::unspecified; + } + + simdutf_warn_unused bool validate_utf8(const char *, + size_t) const noexcept final override { + return false; // Just refuse to validate. Given that we have a fallback + // implementation + // it seems unlikely that unsupported_implementation will ever be used. If + // it is used, then it will flag all strings as invalid. The alternative is + // to return an error_code from which the user has to figure out whether the + // string is valid UTF-8... which seems like a lot of work just to handle + // the very unlikely case that we have an unsupported implementation. And, + // when it does happen (that we have an unsupported implementation), what + // are the chances that the programmer has a fallback? Given that *we* + // provide the fallback, it implies that the programmer would need a + // fallback for our fallback. + } + + simdutf_warn_unused result validate_utf8_with_errors( + const char *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused bool + validate_ascii(const char *, size_t) const noexcept final override { + return false; + } + + simdutf_warn_unused result validate_ascii_with_errors( + const char *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused bool + validate_utf16le(const char16_t *, size_t) const noexcept final override { + return false; + } + + simdutf_warn_unused bool + validate_utf16be(const char16_t *, size_t) const noexcept final override { + return false; + } + + simdutf_warn_unused result validate_utf16le_with_errors( + const char16_t *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result validate_utf16be_with_errors( + const char16_t *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused bool + validate_utf32(const char32_t *, size_t) const noexcept final override { + return false; + } + + simdutf_warn_unused result validate_utf32_with_errors( + const char32_t *, size_t) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_latin1_to_utf8( + const char *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *, size_t, char32_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16le_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16be_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16le_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16be_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf32_to_latin1( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf32_to_latin1_with_errors( + const char32_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_latin1( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf32_to_utf8( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *, size_t, char *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *, size_t, char *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *, size_t, char16_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *, size_t, char16_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *, size_t, char32_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *, size_t, char32_t *) const noexcept final override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *, size_t, char32_t *) const noexcept final override { + return 0; + } + + void change_endianness_utf16(const char16_t *, size_t, + char16_t *) const noexcept final override {} + + simdutf_warn_unused size_t + count_utf16le(const char16_t *, size_t) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t + count_utf16be(const char16_t *, size_t) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t count_utf8(const char *, + size_t) const noexcept final override { + return 0; + } + + simdutf_warn_unused size_t + latin1_length_from_utf8(const char *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + latin1_length_from_utf16(size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + latin1_length_from_utf32(size_t) const noexcept override { + return 0; + } + simdutf_warn_unused size_t + utf8_length_from_latin1(const char *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf8_length_from_utf16le(const char16_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf8_length_from_utf16be(const char16_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf32_length_from_utf16le(const char16_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf32_length_from_utf16be(const char16_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf32_length_from_latin1(size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf16_length_from_utf8(const char *, size_t) const noexcept override { + return 0; + } + simdutf_warn_unused size_t + utf16_length_from_latin1(size_t) const noexcept override { + return 0; + } + simdutf_warn_unused size_t + utf8_length_from_utf32(const char32_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf16_length_from_utf32(const char32_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t + utf32_length_from_utf8(const char *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused result + base64_to_binary(const char *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return full_result(error_code::OTHER, 0, 0); + } + + simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *, size_t) const noexcept override { + return 0; + } + + simdutf_warn_unused result + base64_to_binary(const char16_t *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return result(error_code::OTHER, 0); + } + + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return full_result(error_code::OTHER, 0, 0); + } + + simdutf_warn_unused size_t + base64_length_from_binary(size_t, base64_options) const noexcept override { + return 0; + } + + size_t binary_to_base64(const char *, size_t, char *, + base64_options) const noexcept override { + return 0; + } + + unsupported_implementation() + : implementation("unsupported", + "Unsupported CPU (no detected SIMD instructions)", 0) {} +}; + +const unsupported_implementation *get_unsupported_singleton() { + static const unsupported_implementation unsupported_singleton{}; + return &unsupported_singleton; +} +static_assert(std::is_trivially_destructible::value, + "unsupported_singleton should be trivially destructible"); + +size_t available_implementation_list::size() const noexcept { + return internal::get_available_implementation_pointers().size(); +} +const implementation *const * +available_implementation_list::begin() const noexcept { + return internal::get_available_implementation_pointers().begin(); +} +const implementation *const * +available_implementation_list::end() const noexcept { + return internal::get_available_implementation_pointers().end(); +} +const implementation * +available_implementation_list::detect_best_supported() const noexcept { + // They are prelisted in priority order, so we just go down the list + uint32_t supported_instruction_sets = + internal::detect_supported_architectures(); + for (const implementation *impl : + internal::get_available_implementation_pointers()) { + uint32_t required_instruction_sets = impl->required_instruction_sets(); + if ((supported_instruction_sets & required_instruction_sets) == + required_instruction_sets) { + return impl; + } + } + return get_unsupported_singleton(); // this should never happen? +} + +const implementation * +detect_best_supported_implementation_on_first_use::set_best() const noexcept { + SIMDUTF_PUSH_DISABLE_WARNINGS + SIMDUTF_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: + // manually verified this is safe + char *force_implementation_name = getenv("SIMDUTF_FORCE_IMPLEMENTATION"); + SIMDUTF_POP_DISABLE_WARNINGS + + if (force_implementation_name) { + auto force_implementation = + get_available_implementations()[force_implementation_name]; + if (force_implementation) { + return get_active_implementation() = force_implementation; + } else { + // Note: abort() and stderr usage within the library is forbidden. + return get_active_implementation() = get_unsupported_singleton(); + } + } + return get_active_implementation() = + get_available_implementations().detect_best_supported(); +} + +} // namespace internal + +/** + * The list of available implementations compiled into simdutf. + */ +SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list & +get_available_implementations() { + static const internal::available_implementation_list + available_implementations{}; + return available_implementations; +} + +/** + * The active implementation. + */ +SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr & +get_active_implementation() { +#if SIMDUTF_SINGLE_IMPLEMENTATION + // skip runtime detection + static internal::atomic_ptr active_implementation{ + internal::get_single_implementation()}; + return active_implementation; +#else + static const internal::detect_best_supported_implementation_on_first_use + detect_best_supported_implementation_on_first_use_singleton; + static internal::atomic_ptr active_implementation{ + &detect_best_supported_implementation_on_first_use_singleton}; + return active_implementation; +#endif +} + +#if SIMDUTF_SINGLE_IMPLEMENTATION +const implementation *get_default_implementation() { + return internal::get_single_implementation(); +} +#else +internal::atomic_ptr &get_default_implementation() { + return get_active_implementation(); +} +#endif +#define SIMDUTF_GET_CURRENT_IMPLEMENTION + +simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { + return get_default_implementation()->validate_utf8(buf, len); +} +simdutf_warn_unused result validate_utf8_with_errors(const char *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf8_with_errors(buf, len); +} +simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept { + return get_default_implementation()->validate_ascii(buf, len); +} +simdutf_warn_unused result validate_ascii_with_errors(const char *buf, + size_t len) noexcept { + return get_default_implementation()->validate_ascii_with_errors(buf, len); +} +simdutf_warn_unused size_t convert_utf8_to_utf16( + const char *input, size_t length, char16_t *utf16_output) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf8_to_utf16be(input, length, utf16_output); +#else + return convert_utf8_to_utf16le(input, length, utf16_output); +#endif +} +simdutf_warn_unused size_t convert_latin1_to_utf8(const char *buf, size_t len, + char *utf8_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf8(buf, len, + utf8_output); +} +simdutf_warn_unused size_t convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf16le(buf, len, + utf16_output); +} +simdutf_warn_unused size_t convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf16be(buf, len, + utf16_output); +} +simdutf_warn_unused size_t convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *latin1_output) noexcept { + return get_default_implementation()->convert_latin1_to_utf32(buf, len, + latin1_output); +} +simdutf_warn_unused size_t convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) noexcept { + return get_default_implementation()->convert_utf8_to_latin1(buf, len, + latin1_output); +} +simdutf_warn_unused result convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) noexcept { + return get_default_implementation()->convert_utf8_to_latin1_with_errors( + buf, len, latin1_output); +} +simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) noexcept { + return get_default_implementation()->convert_valid_utf8_to_latin1( + buf, len, latin1_output); +} +simdutf_warn_unused size_t convert_utf8_to_utf16le( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16le(input, length, + utf16_output); +} +simdutf_warn_unused size_t convert_utf8_to_utf16be( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16be(input, length, + utf16_output); +} +simdutf_warn_unused result convert_utf8_to_utf16_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf8_to_utf16be_with_errors(input, length, utf16_output); +#else + return convert_utf8_to_utf16le_with_errors(input, length, utf16_output); +#endif +} +simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16le_with_errors( + input, length, utf16_output); +} +simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( + const char *input, size_t length, char16_t *utf16_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf16be_with_errors( + input, length, utf16_output); +} +simdutf_warn_unused size_t convert_utf8_to_utf32( + const char *input, size_t length, char32_t *utf32_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf32(input, length, + utf32_output); +} +simdutf_warn_unused result convert_utf8_to_utf32_with_errors( + const char *input, size_t length, char32_t *utf32_output) noexcept { + return get_default_implementation()->convert_utf8_to_utf32_with_errors( + input, length, utf32_output); +} +simdutf_warn_unused bool validate_utf16(const char16_t *buf, + size_t len) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return validate_utf16be(buf, len); +#else + return validate_utf16le(buf, len); +#endif +} +simdutf_warn_unused bool validate_utf16le(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16le(buf, len); +} +simdutf_warn_unused bool validate_utf16be(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16be(buf, len); +} +simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, + size_t len) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return validate_utf16be_with_errors(buf, len); +#else + return validate_utf16le_with_errors(buf, len); +#endif +} +simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16le_with_errors(buf, len); +} +simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf16be_with_errors(buf, len); +} +simdutf_warn_unused bool validate_utf32(const char32_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf32(buf, len); +} +simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, + size_t len) noexcept { + return get_default_implementation()->validate_utf32_with_errors(buf, len); +} +simdutf_warn_unused size_t convert_valid_utf8_to_utf16( + const char *input, size_t length, char16_t *utf16_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf8_to_utf16be(input, length, utf16_buffer); +#else + return convert_valid_utf8_to_utf16le(input, length, utf16_buffer); +#endif +} +simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const char *input, size_t length, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf8_to_utf16le( + input, length, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const char *input, size_t length, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf8_to_utf16be( + input, length, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const char *input, size_t length, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_valid_utf8_to_utf32( + input, length, utf32_buffer); +} +simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *buf, + size_t len, + char *utf8_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf8(buf, len, utf8_buffer); +#else + return convert_utf16le_to_utf8(buf, len, utf8_buffer); +#endif +} +simdutf_warn_unused size_t convert_utf16_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_latin1(buf, len, latin1_buffer); +#else + return convert_utf16le_to_latin1(buf, len, latin1_buffer); +#endif +} +simdutf_warn_unused size_t convert_latin1_to_utf16( + const char *buf, size_t len, char16_t *utf16_output) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_latin1_to_utf16be(buf, len, utf16_output); +#else + return convert_latin1_to_utf16le(buf, len, utf16_output); +#endif +} +simdutf_warn_unused size_t convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_latin1(buf, len, + latin1_buffer); +} +simdutf_warn_unused size_t convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_latin1(buf, len, + latin1_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16be_to_latin1( + buf, len, latin1_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16le_to_latin1( + buf, len, latin1_buffer); +} +simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_latin1_with_errors( + buf, len, latin1_buffer); +} +simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_latin1_with_errors( + buf, len, latin1_buffer); +} +simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *buf, + size_t len, + char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *buf, + size_t len, + char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused result convert_utf16_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer); +#else + return convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer); +#endif +} +simdutf_warn_unused result convert_utf16_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer); +#else + return convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer); +#endif +} +simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf8_with_errors( + buf, len, utf8_buffer); +} +simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf8_with_errors( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer); +#else + return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer); +#endif +} +simdutf_warn_unused size_t convert_valid_utf16_to_latin1( + const char16_t *buf, size_t len, char *latin1_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf16be_to_latin1(buf, len, latin1_buffer); +#else + return convert_valid_utf16le_to_latin1(buf, len, latin1_buffer); +#endif +} +simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16le_to_utf8( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16be_to_utf8( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *buf, + size_t len, + char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused result convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf8_with_errors( + buf, len, utf8_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_utf8(buf, len, + utf8_buffer); +} +simdutf_warn_unused size_t convert_utf32_to_utf16( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf32_to_utf16be(buf, len, utf16_buffer); +#else + return convert_utf32_to_utf16le(buf, len, utf16_buffer); +#endif +} +simdutf_warn_unused size_t convert_utf32_to_latin1( + const char32_t *input, size_t length, char *latin1_output) noexcept { + return get_default_implementation()->convert_utf32_to_latin1(input, length, + latin1_output); +} +simdutf_warn_unused size_t convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16le(buf, len, + utf16_buffer); +} +simdutf_warn_unused size_t convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16be(buf, len, + utf16_buffer); +} +simdutf_warn_unused result convert_utf32_to_utf16_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer); +#else + return convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer); +#endif +} +simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16le_with_errors( + buf, len, utf16_buffer); +} +simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_utf32_to_utf16be_with_errors( + buf, len, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf16( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf32_to_utf16be(buf, len, utf16_buffer); +#else + return convert_valid_utf32_to_utf16le(buf, len, utf16_buffer); +#endif +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_utf16le( + buf, len, utf16_buffer); +} +simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { + return get_default_implementation()->convert_valid_utf32_to_utf16be( + buf, len, utf16_buffer); +} +simdutf_warn_unused size_t convert_utf16_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf32(buf, len, utf32_buffer); +#else + return convert_utf16le_to_utf32(buf, len, utf32_buffer); +#endif +} +simdutf_warn_unused size_t convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf32(buf, len, + utf32_buffer); +} +simdutf_warn_unused size_t convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf32(buf, len, + utf32_buffer); +} +simdutf_warn_unused result convert_utf16_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer); +#else + return convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer); +#endif +} +simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16le_to_utf32_with_errors( + buf, len, utf32_buffer); +} +simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_utf16be_to_utf32_with_errors( + buf, len, utf32_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return convert_valid_utf16be_to_utf32(buf, len, utf32_buffer); +#else + return convert_valid_utf16le_to_utf32(buf, len, utf32_buffer); +#endif +} +simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16le_to_utf32( + buf, len, utf32_buffer); +} +simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { + return get_default_implementation()->convert_valid_utf16be_to_utf32( + buf, len, utf32_buffer); +} +void change_endianness_utf16(const char16_t *input, size_t length, + char16_t *output) noexcept { + get_default_implementation()->change_endianness_utf16(input, length, output); +} +simdutf_warn_unused size_t count_utf16(const char16_t *input, + size_t length) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return count_utf16be(input, length); +#else + return count_utf16le(input, length); +#endif +} +simdutf_warn_unused size_t count_utf16le(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->count_utf16le(input, length); +} +simdutf_warn_unused size_t count_utf16be(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->count_utf16be(input, length); +} +simdutf_warn_unused size_t count_utf8(const char *input, + size_t length) noexcept { + return get_default_implementation()->count_utf8(input, length); +} +simdutf_warn_unused size_t latin1_length_from_utf8(const char *buf, + size_t len) noexcept { + return get_default_implementation()->latin1_length_from_utf8(buf, len); +} +simdutf_warn_unused size_t latin1_length_from_utf16(size_t len) noexcept { + return get_default_implementation()->latin1_length_from_utf16(len); +} +simdutf_warn_unused size_t latin1_length_from_utf32(size_t len) noexcept { + return get_default_implementation()->latin1_length_from_utf32(len); +} +simdutf_warn_unused size_t utf8_length_from_latin1(const char *buf, + size_t len) noexcept { + return get_default_implementation()->utf8_length_from_latin1(buf, len); +} +simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, + size_t length) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return utf8_length_from_utf16be(input, length); +#else + return utf8_length_from_utf16le(input, length); +#endif +} +simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf8_length_from_utf16le(input, length); +} +simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf8_length_from_utf16be(input, length); +} +simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, + size_t length) noexcept { +#if SIMDUTF_IS_BIG_ENDIAN + return utf32_length_from_utf16be(input, length); +#else + return utf32_length_from_utf16le(input, length); +#endif +} +simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf32_length_from_utf16le(input, length); +} +simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, + size_t length) noexcept { + return get_default_implementation()->utf32_length_from_utf16be(input, length); +} +simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, + size_t length) noexcept { + return get_default_implementation()->utf16_length_from_utf8(input, length); +} +simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept { + return get_default_implementation()->utf16_length_from_latin1(length); +} +simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, + size_t length) noexcept { + return get_default_implementation()->utf8_length_from_utf32(input, length); +} +simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, + size_t length) noexcept { + return get_default_implementation()->utf16_length_from_utf32(input, length); +} +simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, + size_t length) noexcept { + return get_default_implementation()->utf32_length_from_utf8(input, length); +} + +simdutf_warn_unused size_t +maximal_binary_length_from_base64(const char *input, size_t length) noexcept { + return get_default_implementation()->maximal_binary_length_from_base64( + input, length); +} + +simdutf_warn_unused result base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return get_default_implementation()->base64_to_binary( + input, length, output, options, last_chunk_handling_options); +} + +simdutf_warn_unused size_t maximal_binary_length_from_base64( + const char16_t *input, size_t length) noexcept { + return get_default_implementation()->maximal_binary_length_from_base64( + input, length); +} + +simdutf_warn_unused result base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return get_default_implementation()->base64_to_binary( + input, length, output, options, last_chunk_handling_options); +} + +template +simdutf_warn_unused result base64_to_binary_safe_impl( + const chartype *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + static_assert(std::is_same::value || + std::is_same::value, + "Only char and char16_t are supported."); + // The implementation could be nicer, but we expect that most times, the user + // will provide us with a buffer that is large enough. + size_t max_length = maximal_binary_length_from_base64(input, length); + if (outlen >= max_length) { + // fast path + full_result r = get_default_implementation()->base64_to_binary_details( + input, length, output, options, last_chunk_handling_options); + if (r.error != error_code::INVALID_BASE64_CHARACTER && + r.error != error_code::BASE64_EXTRA_BITS) { + outlen = r.output_count; + if (last_chunk_handling_options == stop_before_partial) { + if ((r.output_count % 3) != 0) { + bool empty_trail = true; + for (size_t i = r.input_count; i < length; i++) { + if (!scalar::base64::is_ascii_white_space_or_padding(input[i])) { + empty_trail = false; + break; + } + } + if (empty_trail) { + r.input_count = length; + } + } + return {r.error, r.input_count}; + } + return {r.error, length}; + } + return r; + } + // The output buffer is maybe too small. We will decode a truncated version of + // the input. + size_t outlen3 = outlen / 3 * 3; // round down to multiple of 3 + size_t safe_input = base64_length_from_binary(outlen3, options); + full_result r = get_default_implementation()->base64_to_binary_details( + input, safe_input, output, options, loose); + if (r.error == error_code::INVALID_BASE64_CHARACTER) { + return r; + } + size_t offset = + (r.error == error_code::BASE64_INPUT_REMAINDER) + ? 1 + : ((r.output_count % 3) == 0 ? 0 : (r.output_count % 3) + 1); + size_t output_index = r.output_count - (r.output_count % 3); + size_t input_index = safe_input; + // offset is a value that is no larger than 3. We backtrack + // by up to offset characters + an undetermined number of + // white space characters. It is expected that the next loop + // runs at most 3 times + the number of white space characters + // in between them, so we are not worried about performance. + while (offset > 0 && input_index > 0) { + chartype c = input[--input_index]; + if (scalar::base64::is_ascii_white_space(c)) { + // skipping + } else { + offset--; + } + } + size_t remaining_out = outlen - output_index; + const chartype *tail_input = input + input_index; + size_t tail_length = length - input_index; + while (tail_length > 0 && + scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { + tail_length--; + } + size_t padding_characts = 0; + if (tail_length > 0 && tail_input[tail_length - 1] == '=') { + tail_length--; + padding_characts++; + while (tail_length > 0 && + scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { + tail_length--; + } + if (tail_length > 0 && tail_input[tail_length - 1] == '=') { + tail_length--; + padding_characts++; + } + } + // this will advance tail_input and tail_length + result rr = scalar::base64::base64_tail_decode_safe( + output + output_index, remaining_out, tail_input, tail_length, + padding_characts, options, last_chunk_handling_options); + outlen = output_index + remaining_out; + if (last_chunk_handling_options != stop_before_partial && + rr.error == error_code::SUCCESS && padding_characts > 0) { + // additional checks + if ((outlen % 3 == 0) || ((outlen % 3) + 1 + padding_characts != 4)) { + rr.error = error_code::INVALID_BASE64_CHARACTER; + } + } + if (rr.error == error_code::SUCCESS && + last_chunk_handling_options == stop_before_partial) { + if (tail_input > input + input_index) { + rr.count = tail_input - input; + } else if (r.input_count > 0) { + rr.count = r.input_count + rr.count; + } + return rr; + } + rr.count += input_index; + return rr; +} + +simdutf_warn_unused size_t convert_latin1_to_utf8_safe( + const char *buf, size_t len, char *utf8_output, size_t utf8_len) noexcept { + const auto start{utf8_output}; + + while (true) { + // convert_latin1_to_utf8 will never write more than input length * 2 + auto read_len = std::min(len, utf8_len >> 1); + if (read_len <= 16) { + break; + } + + const auto write_len = + simdutf::convert_latin1_to_utf8(buf, read_len, utf8_output); + + utf8_output += write_len; + utf8_len -= write_len; + buf += read_len; + len -= read_len; + } + + utf8_output += + scalar::latin1_to_utf8::convert_safe(buf, len, utf8_output, utf8_len); + + return utf8_output - start; +} + +simdutf_warn_unused result base64_to_binary_safe( + const char *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return base64_to_binary_safe_impl(input, length, output, outlen, + options, last_chunk_handling_options); +} +simdutf_warn_unused result base64_to_binary_safe( + const char16_t *input, size_t length, char *output, size_t &outlen, + base64_options options, + last_chunk_handling_options last_chunk_handling_options) noexcept { + return base64_to_binary_safe_impl( + input, length, output, outlen, options, last_chunk_handling_options); +} + +simdutf_warn_unused size_t +base64_length_from_binary(size_t length, base64_options options) noexcept { + return get_default_implementation()->base64_length_from_binary(length, + options); +} + +size_t binary_to_base64(const char *input, size_t length, char *output, + base64_options options) noexcept { + return get_default_implementation()->binary_to_base64(input, length, output, + options); +} + +simdutf_warn_unused simdutf::encoding_type +autodetect_encoding(const char *buf, size_t length) noexcept { + return get_default_implementation()->autodetect_encoding(buf, length); +} +simdutf_warn_unused int detect_encodings(const char *buf, + size_t length) noexcept { + return get_default_implementation()->detect_encodings(buf, length); +} +const implementation *builtin_implementation() { + static const implementation *builtin_impl = + get_available_implementations()[SIMDUTF_STRINGIFY( + SIMDUTF_BUILTIN_IMPLEMENTATION)]; + return builtin_impl; +} + +simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length) { + return scalar::utf8::trim_partial_utf8(input, length); +} + +simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, + size_t length) { + return scalar::utf16::trim_partial_utf16(input, length); +} + +simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, + size_t length) { + return scalar::utf16::trim_partial_utf16(input, length); +} + +simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, + size_t length) { +#if SIMDUTF_IS_BIG_ENDIAN + return trim_partial_utf16be(input, length); +#else + return trim_partial_utf16le(input, length); +#endif +} + +} // namespace simdutf +/* end file src/implementation.cpp */ +/* begin file src/encoding_types.cpp */ + +namespace simdutf { +bool match_system(endianness e) { +#if SIMDUTF_IS_BIG_ENDIAN + return e == endianness::BIG; +#else + return e == endianness::LITTLE; +#endif +} + +std::string to_string(encoding_type bom) { + switch (bom) { + case UTF16_LE: + return "UTF16 little-endian"; + case UTF16_BE: + return "UTF16 big-endian"; + case UTF32_LE: + return "UTF32 little-endian"; + case UTF32_BE: + return "UTF32 big-endian"; + case UTF8: + return "UTF8"; + case unspecified: + return "unknown"; + default: + return "error"; + } +} + +namespace BOM { +// Note that BOM for UTF8 is discouraged. +encoding_type check_bom(const uint8_t *byte, size_t length) { + if (length >= 2 && byte[0] == 0xff and byte[1] == 0xfe) { + if (length >= 4 && byte[2] == 0x00 and byte[3] == 0x0) { + return encoding_type::UTF32_LE; + } else { + return encoding_type::UTF16_LE; + } + } else if (length >= 2 && byte[0] == 0xfe and byte[1] == 0xff) { + return encoding_type::UTF16_BE; + } else if (length >= 4 && byte[0] == 0x00 and byte[1] == 0x00 and + byte[2] == 0xfe and byte[3] == 0xff) { + return encoding_type::UTF32_BE; + } else if (length >= 4 && byte[0] == 0xef and byte[1] == 0xbb and + byte[2] == 0xbf) { + return encoding_type::UTF8; + } + return encoding_type::unspecified; +} + +encoding_type check_bom(const char *byte, size_t length) { + return check_bom(reinterpret_cast(byte), length); +} + +size_t bom_byte_size(encoding_type bom) { + switch (bom) { + case UTF16_LE: + return 2; + case UTF16_BE: + return 2; + case UTF32_LE: + return 4; + case UTF32_BE: + return 4; + case UTF8: + return 3; + case unspecified: + return 0; + default: + return 0; + } +} + +} // namespace BOM +} // namespace simdutf +/* end file src/encoding_types.cpp */ +/* begin file src/error.cpp */ +namespace simdutf { +// deliberately empty +} +/* end file src/error.cpp */ +// The large tables should be included once and they +// should not depend on a kernel. +/* begin file src/tables/utf8_to_utf16_tables.h */ +#ifndef SIMDUTF_UTF8_TO_UTF16_TABLES_H +#define SIMDUTF_UTF8_TO_UTF16_TABLES_H +#include + +namespace simdutf { +namespace { +namespace tables { +namespace utf8_to_utf16 { +/** + * utf8bigindex uses about 8 kB + * shufutf8 uses about 3344 B + * + * So we use a bit over 11 kB. It would be + * easy to save about 4 kB by only + * storing the index in utf8bigindex, and + * deriving the consumed bytes otherwise. + * However, this may come at a significant (10% to 20%) + * performance penalty. + */ + +const uint8_t shufutf8[209][16] = { + {0, 255, 1, 255, 2, 255, 3, 255, 4, 255, 5, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 3, 255, 4, 255, 6, 5, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 3, 255, 5, 4, 6, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 3, 255, 5, 4, 7, 6, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 4, 3, 5, 255, 6, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 4, 3, 5, 255, 7, 6, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 4, 3, 6, 5, 7, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 2, 255, 4, 3, 6, 5, 8, 7, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, + {0, 255, 1, 255, 3, 2, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 3, 255, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, + {0, 255, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 3, 255, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, + {1, 0, 2, 255, 4, 3, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 4, 255, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 6, 255, 7, 255, 8, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 6, 255, 7, 255, 9, 8, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 6, 255, 8, 7, 9, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 6, 255, 8, 7, 10, 9, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 7, 6, 8, 255, 9, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 7, 6, 8, 255, 10, 9, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 255, 0, 0, 0, 0}, + {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 0, 0, 0, 0}, + {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 4, 255, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 5, 4, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 6, 5, 4, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 5, 255, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 6, 5, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 7, 6, 5, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 6, 255, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 7, 6, 255, 255}, + {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 8, 7, 6, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 5, 255, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 6, 5, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 7, 6, 5, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 6, 255, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 7, 6, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 8, 7, 6, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 7, 255, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 8, 7, 255, 255}, + {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 9, 8, 7, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 4, 255, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 5, 4, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 6, 5, 4, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 5, 255, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 6, 5, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 7, 6, 5, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 6, 255, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 7, 6, 255, 255}, + {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 8, 7, 6, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 5, 255, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 6, 5, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 7, 6, 5, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 6, 255, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 7, 6, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 8, 7, 6, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 7, 255, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 8, 7, 255, 255}, + {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 9, 8, 7, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 6, 255, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 7, 6, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 8, 7, 6, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 7, 255, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 8, 7, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 9, 8, 7, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 8, 255, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 9, 8, 255, 255}, + {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 10, 9, 8, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 5, 255, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 6, 5, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 7, 6, 5, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 6, 255, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 7, 6, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 8, 7, 6, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 7, 255, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 8, 7, 255, 255}, + {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 9, 8, 7, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 6, 255, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 7, 6, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 8, 7, 6, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 7, 255, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 8, 7, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 9, 8, 7, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 8, 255, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 9, 8, 255, 255}, + {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 10, 9, 8, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 7, 255, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 8, 7, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 9, 8, 7, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 8, 255, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 9, 8, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 10, 9, 8, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 9, 255, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 10, 9, 255, 255}, + {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 11, 10, 9, 255}, + {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 1, 255, 255, 255, 5, 4, 3, 2, 0, 0, 0, 0}, + {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 2, 1, 255, 255, 6, 5, 4, 3, 0, 0, 0, 0}, + {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 3, 2, 1, 255, 7, 6, 5, 4, 0, 0, 0, 0}, + {0, 255, 255, 255, 4, 3, 2, 1, 5, 255, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 4, 3, 2, 1, 6, 5, 255, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 4, 3, 2, 1, 7, 6, 5, 255, 0, 0, 0, 0}, + {0, 255, 255, 255, 4, 3, 2, 1, 8, 7, 6, 5, 0, 0, 0, 0}, + {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 2, 255, 255, 255, 6, 5, 4, 3, 0, 0, 0, 0}, + {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 3, 2, 255, 255, 7, 6, 5, 4, 0, 0, 0, 0}, + {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 4, 3, 2, 255, 8, 7, 6, 5, 0, 0, 0, 0}, + {1, 0, 255, 255, 5, 4, 3, 2, 6, 255, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 5, 4, 3, 2, 7, 6, 255, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 5, 4, 3, 2, 8, 7, 6, 255, 0, 0, 0, 0}, + {1, 0, 255, 255, 5, 4, 3, 2, 9, 8, 7, 6, 0, 0, 0, 0}, + {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 3, 255, 255, 255, 7, 6, 5, 4, 0, 0, 0, 0}, + {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 4, 3, 255, 255, 8, 7, 6, 5, 0, 0, 0, 0}, + {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 5, 4, 3, 255, 9, 8, 7, 6, 0, 0, 0, 0}, + {2, 1, 0, 255, 6, 5, 4, 3, 7, 255, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 6, 5, 4, 3, 8, 7, 255, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 6, 5, 4, 3, 9, 8, 7, 255, 0, 0, 0, 0}, + {2, 1, 0, 255, 6, 5, 4, 3, 10, 9, 8, 7, 0, 0, 0, 0}, + {3, 2, 1, 0, 4, 255, 255, 255, 5, 255, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 4, 255, 255, 255, 6, 5, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 4, 255, 255, 255, 7, 6, 5, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 4, 255, 255, 255, 8, 7, 6, 5, 0, 0, 0, 0}, + {3, 2, 1, 0, 5, 4, 255, 255, 6, 255, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 5, 4, 255, 255, 7, 6, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 5, 4, 255, 255, 8, 7, 6, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 5, 4, 255, 255, 9, 8, 7, 6, 0, 0, 0, 0}, + {3, 2, 1, 0, 6, 5, 4, 255, 7, 255, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 6, 5, 4, 255, 8, 7, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 6, 5, 4, 255, 9, 8, 7, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 6, 5, 4, 255, 10, 9, 8, 7, 0, 0, 0, 0}, + {3, 2, 1, 0, 7, 6, 5, 4, 8, 255, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 7, 6, 5, 4, 9, 8, 255, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 7, 6, 5, 4, 10, 9, 8, 255, 0, 0, 0, 0}, + {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 0, 0, 0, 0}}; +/* number of two bytes : 64 */ +/* number of two + three bytes : 145 */ +/* number of two + three + four bytes : 209 */ +const uint8_t utf8bigindex[4096][2] = { + {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, + {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, + {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, + {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, + {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, + {148, 6}, {209, 12}, {151, 6}, {163, 6}, {66, 6}, {209, 12}, {154, 6}, + {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, + {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, + {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, + {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {152, 7}, {164, 7}, {145, 3}, {209, 12}, {155, 7}, {167, 7}, {69, 7}, {179, 7}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {170, 7}, {71, 7}, {182, 7}, {77, 7}, {95, 7}, {65, 5}, {194, 7}, {83, 7}, {101, 7}, @@ -10563,5379 +13360,15713 @@ const uint8_t utf8bigindex[4096][2] = { {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}}; } // namespace utf8_to_utf16 -} // namespace tables +} // namespace tables +} // unnamed namespace +} // namespace simdutf + +#endif // SIMDUTF_UTF8_TO_UTF16_TABLES_H +/* end file src/tables/utf8_to_utf16_tables.h */ +/* begin file src/tables/utf16_to_utf8_tables.h */ +// file generated by scripts/sse_convert_utf16_to_utf8.py +#ifndef SIMDUTF_UTF16_TO_UTF8_TABLES_H +#define SIMDUTF_UTF16_TO_UTF8_TABLES_H + +namespace simdutf { +namespace { +namespace tables { +namespace utf16_to_utf8 { + +// 1 byte for length, 16 bytes for mask +const uint8_t pack_1_2_utf8_bytes[256][17] = { + {16, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, + {15, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {15, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {15, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {15, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {15, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80}, + {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, + {13, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {13, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, + {12, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {12, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, + {11, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 3, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {11, 1, 0, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {11, 1, 0, 3, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 3, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 3, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 3, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 1, 0, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 1, 0, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 0, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}}; + +// 1 byte for length, 16 bytes for mask +const uint8_t pack_1_2_3_utf8_bytes[256][17] = { + {12, 2, 3, 1, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80}, + {9, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {11, 3, 1, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, + {10, 0, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 2, 3, 1, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {11, 2, 3, 1, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 3, 1, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 2, 3, 1, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 3, 1, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 2, 3, 1, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 1, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {11, 2, 3, 1, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 3, 1, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 2, 3, 1, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 2, 3, 1, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 2, 3, 1, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 0, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 2, 3, 1, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 1, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 2, 3, 1, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 2, 3, 1, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {2, 3, 1, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {5, 2, 3, 1, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 2, 3, 1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 3, 1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 0, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 1, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 2, 3, 1, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 3, 1, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 0, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 2, 3, 1, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 3, 1, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 0, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 2, 3, 1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 3, 1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 0, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 2, 3, 1, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 2, 3, 1, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {11, 2, 3, 1, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {10, 3, 1, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {9, 0, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 2, 3, 1, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 0, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 2, 3, 1, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 2, 3, 1, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {4, 3, 1, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {3, 0, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 2, 3, 1, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 2, 3, 1, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {10, 2, 3, 1, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 0, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 2, 3, 1, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 1, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {10, 2, 3, 1, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {7, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {9, 3, 1, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {8, 0, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 2, 3, 1, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {9, 2, 3, 1, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {8, 2, 3, 1, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 3, 1, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 0, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 2, 3, 1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}, + {3, 3, 1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {2, 0, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 2, 3, 1, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 2, 3, 1, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {9, 2, 3, 1, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + {6, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 3, 1, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {7, 0, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 2, 3, 1, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {8, 2, 3, 1, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 2, 3, 1, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {6, 3, 1, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {5, 0, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {8, 2, 3, 1, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {7, 3, 1, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {6, 0, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 2, 3, 1, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {2, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {4, 3, 1, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {3, 0, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {7, 2, 3, 1, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {4, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 3, 1, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {5, 0, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {6, 2, 3, 1, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80}, + {3, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80}, + {5, 3, 1, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}, + {4, 0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80}}; + +} // namespace utf16_to_utf8 +} // namespace tables +} // unnamed namespace +} // namespace simdutf + +#endif // SIMDUTF_UTF16_TO_UTF8_TABLES_H +/* end file src/tables/utf16_to_utf8_tables.h */ +// End of tables. + +// The scalar routines should be included once. +/* begin file src/scalar/ascii.h */ +#ifndef SIMDUTF_ASCII_H +#define SIMDUTF_ASCII_H + +namespace simdutf { +namespace scalar { +namespace { +namespace ascii { +#if SIMDUTF_IMPLEMENTATION_FALLBACK +// Only used by the fallback kernel. +inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { + const uint8_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + // process in blocks of 16 bytes when possible + for (; pos + 16 <= len; pos += 16) { + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) != 0) { + return false; + } + } + // process the tail byte-by-byte + for (; pos < len; pos++) { + if (data[pos] >= 0b10000000) { + return false; + } + } + return true; +} +#endif + +inline simdutf_warn_unused result validate_with_errors(const char *buf, + size_t len) noexcept { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + // process in blocks of 16 bytes when possible + for (; pos + 16 <= len; pos += 16) { + uint64_t v1; + std::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) != 0) { + for (; pos < len; pos++) { + if (data[pos] >= 0b10000000) { + return result(error_code::TOO_LARGE, pos); + } + } + } + } + // process the tail byte-by-byte + for (; pos < len; pos++) { + if (data[pos] >= 0b10000000) { + return result(error_code::TOO_LARGE, pos); + } + } + return result(error_code::SUCCESS, pos); +} + +} // namespace ascii +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/ascii.h */ +/* begin file src/scalar/latin1.h */ +#ifndef SIMDUTF_LATIN1_H +#define SIMDUTF_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1 { + +inline size_t utf32_length_from_latin1(size_t len) { + // We are not BOM aware. + return len; // a utf32 unit will always represent 1 latin1 character +} + +inline size_t utf8_length_from_latin1(const char *buf, size_t len) { + const uint8_t *c = reinterpret_cast(buf); + size_t answer = 0; + for (size_t i = 0; i < len; i++) { + if ((c[i] >> 7)) { + answer++; + } + } + return answer + len; +} + +inline size_t utf16_length_from_latin1(size_t len) { return len; } + +} // namespace latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/latin1.h */ + +/* begin file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ +#ifndef SIMDUTF_VALID_UTF32_TO_UTF8_H +#define SIMDUTF_VALID_UTF32_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf8 { + +#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 +// only used by the fallback and POWER kernel +inline size_t convert_valid(const char32_t *buf, size_t len, + char *utf8_output) { + const uint32_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{utf8_output}; + while (pos < len) { + // try to convert the next block of 2 ASCII characters + if (pos + 2 <= + len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF80FFFFFF80) == 0) { + *utf8_output++ = char(buf[pos]); + *utf8_output++ = char(buf[pos + 1]); + pos += 2; + continue; + } + } + uint32_t word = data[pos]; + if ((word & 0xFFFFFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xFFFFF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xFFFF0000) == 0) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } + } + return utf8_output - start; +} +#endif // SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 + +} // namespace utf32_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ +/* begin file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ +#ifndef SIMDUTF_UTF32_TO_UTF8_H +#define SIMDUTF_UTF32_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf8 { + +inline size_t convert(const char32_t *buf, size_t len, char *utf8_output) { + const uint32_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{utf8_output}; + while (pos < len) { + // try to convert the next block of 2 ASCII characters + if (pos + 2 <= + len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF80FFFFFF80) == 0) { + *utf8_output++ = char(buf[pos]); + *utf8_output++ = char(buf[pos + 1]); + pos += 2; + continue; + } + } + uint32_t word = data[pos]; + if ((word & 0xFFFFFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xFFFFF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xFFFF0000) == 0) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + if (word >= 0xD800 && word <= 0xDFFF) { + return 0; + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + if (word > 0x10FFFF) { + return 0; + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } + } + return utf8_output - start; +} + +inline result convert_with_errors(const char32_t *buf, size_t len, + char *utf8_output) { + const uint32_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{utf8_output}; + while (pos < len) { + // try to convert the next block of 2 ASCII characters + if (pos + 2 <= + len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF80FFFFFF80) == 0) { + *utf8_output++ = char(buf[pos]); + *utf8_output++ = char(buf[pos + 1]); + pos += 2; + continue; + } + } + uint32_t word = data[pos]; + if ((word & 0xFFFFFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xFFFFF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xFFFF0000) == 0) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } + } + return result(error_code::SUCCESS, utf8_output - start); +} + +} // namespace utf32_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ + +/* begin file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ +#ifndef SIMDUTF_VALID_UTF32_TO_UTF16_H +#define SIMDUTF_VALID_UTF32_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf16 { + +template +inline size_t convert_valid(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const uint32_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + uint32_t word = data[pos]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(utf16::swap_bytes(uint16_t(word))) + : char16_t(word); + pos++; + } else { + // will generate a surrogate pair + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = utf16::swap_bytes(high_surrogate); + low_surrogate = utf16::swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos++; + } + } + return utf16_output - start; +} + +} // namespace utf32_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ +/* begin file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ +#ifndef SIMDUTF_UTF32_TO_UTF16_H +#define SIMDUTF_UTF32_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_utf16 { + +template +inline size_t convert(const char32_t *buf, size_t len, char16_t *utf16_output) { + const uint32_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + uint32_t word = data[pos]; + if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return 0; + } + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(utf16::swap_bytes(uint16_t(word))) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return 0; + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = utf16::swap_bytes(high_surrogate); + low_surrogate = utf16::swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + pos++; + } + return utf16_output - start; +} + +template +inline result convert_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const uint32_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + uint32_t word = data[pos]; + if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, pos); + } + // will not generate a surrogate pair + *utf16_output++ = !match_system(big_endian) + ? char16_t(utf16::swap_bytes(uint16_t(word))) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, pos); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = utf16::swap_bytes(high_surrogate); + low_surrogate = utf16::swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + pos++; + } + return result(error_code::SUCCESS, utf16_output - start); +} + +} // namespace utf32_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ + +/* begin file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ +#ifndef SIMDUTF_VALID_UTF16_TO_UTF8_H +#define SIMDUTF_VALID_UTF16_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf8 { + +template +inline size_t convert_valid(const char16_t *buf, size_t len, + char *utf8_output) { + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{utf8_output}; + while (pos < len) { + // try to convert the next block of 4 ASCII characters + if (pos + 4 <= + len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if (!match_system(big_endian)) { + v = (v >> 8) | (v << (64 - 8)); + } + if ((v & 0xFF80FF80FF80FF80) == 0) { + size_t final_pos = pos + 4; + while (pos < final_pos) { + *utf8_output++ = !match_system(big_endian) + ? char(utf16::swap_bytes(buf[pos])) + : char(buf[pos]); + pos++; + } + continue; + } + } + + uint16_t word = + !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xF800) != 0xD800) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? utf16::swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + uint32_t value = (diff << 10) + diff2 + 0x10000; + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + pos += 2; + } + } + return utf8_output - start; +} + +} // namespace utf16_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ +/* begin file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ +#ifndef SIMDUTF_UTF16_TO_UTF8_H +#define SIMDUTF_UTF16_TO_UTF8_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf8 { + +template +inline size_t convert(const char16_t *buf, size_t len, char *utf8_output) { + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{utf8_output}; + while (pos < len) { + // try to convert the next block of 8 bytes + if (pos + 4 <= + len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if (!match_system(big_endian)) { + v = (v >> 8) | (v << (64 - 8)); + } + if ((v & 0xFF80FF80FF80FF80) == 0) { + size_t final_pos = pos + 4; + while (pos < final_pos) { + *utf8_output++ = !match_system(big_endian) + ? char(utf16::swap_bytes(buf[pos])) + : char(buf[pos]); + pos++; + } + continue; + } + } + uint16_t word = + !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xF800) != 0xD800) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // must be a surrogate pair + if (pos + 1 >= len) { + return 0; + } + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return 0; + } + uint16_t next_word = !match_system(big_endian) + ? utf16::swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return 0; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + pos += 2; + } + } + return utf8_output - start; +} + +template +inline result convert_with_errors(const char16_t *buf, size_t len, + char *utf8_output) { + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{utf8_output}; + while (pos < len) { + // try to convert the next block of 8 bytes + if (pos + 4 <= + len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if (!match_system(big_endian)) + v = (v >> 8) | (v << (64 - 8)); + if ((v & 0xFF80FF80FF80FF80) == 0) { + size_t final_pos = pos + 4; + while (pos < final_pos) { + *utf8_output++ = !match_system(big_endian) + ? char(utf16::swap_bytes(buf[pos])) + : char(buf[pos]); + pos++; + } + continue; + } + } + uint16_t word = + !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF80) == 0) { + // will generate one UTF-8 bytes + *utf8_output++ = char(word); + pos++; + } else if ((word & 0xF800) == 0) { + // will generate two UTF-8 bytes + // we have 0b110XXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else if ((word & 0xF800) != 0xD800) { + // will generate three UTF-8 bytes + // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + pos++; + } else { + // must be a surrogate pair + if (pos + 1 >= len) { + return result(error_code::SURROGATE, pos); + } + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + uint16_t next_word = !match_system(big_endian) + ? utf16::swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + pos += 2; + } + } + return result(error_code::SUCCESS, utf8_output - start); +} + +} // namespace utf16_to_utf8 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ + +/* begin file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ +#ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H +#define SIMDUTF_VALID_UTF16_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf32 { + +template +inline size_t convert_valid(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair, extend 16-bit word to 32-bit word + *utf32_output++ = char32_t(word); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? utf16::swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + pos += 2; + } + } + return utf32_output - start; +} + +} // namespace utf16_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ +/* begin file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ +#ifndef SIMDUTF_UTF16_TO_UTF32_H +#define SIMDUTF_UTF16_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_utf32 { + +template +inline size_t convert(const char16_t *buf, size_t len, char32_t *utf32_output) { + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair, extend 16-bit word to 32-bit word + *utf32_output++ = char32_t(word); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return 0; + } + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? utf16::swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return 0; + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + pos += 2; + } + } + return utf32_output - start; +} + +template +inline result convert_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + uint16_t word = + !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair, extend 16-bit word to 32-bit word + *utf32_output++ = char32_t(word); + pos++; + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + if (diff > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + if (pos + 1 >= len) { + return result(error_code::SURROGATE, pos); + } // minimal bound checking + uint16_t next_word = !match_system(big_endian) + ? utf16::swap_bytes(data[pos + 1]) + : data[pos + 1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if (diff2 > 0x3FF) { + return result(error_code::SURROGATE, pos); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + pos += 2; + } + } + return result(error_code::SUCCESS, utf32_output - start); +} + +} // namespace utf16_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ + +/* begin file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ +#ifndef SIMDUTF_VALID_UTF8_TO_UTF16_H +#define SIMDUTF_VALID_UTF8_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf16 { + +template +inline size_t convert_valid(const char *buf, size_t len, + char16_t *utf16_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + // try to convert the next block of 8 ASCII bytes + if (pos + 8 <= + len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 8; + while (pos < final_pos) { + *utf16_output++ = !match_system(big_endian) + ? char16_t(utf16::swap_bytes(buf[pos])) + : char16_t(buf[pos]); + pos++; + } + continue; + } + } + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = !match_system(big_endian) + ? char16_t(utf16::swap_bytes(leading_byte)) + : char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 1 >= len) { + break; + } // minimal bound checking + uint16_t code_point = uint16_t(((leading_byte & 0b00011111) << 6) | + (data[pos + 1] & 0b00111111)); + if (!match_system(big_endian)) { + code_point = utf16::swap_bytes(uint16_t(code_point)); + } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 2 >= len) { + break; + } // minimal bound checking + uint16_t code_point = uint16_t(((leading_byte & 0b00001111) << 12) | + ((data[pos + 1] & 0b00111111) << 6) | + (data[pos + 2] & 0b00111111)); + if (!match_system(big_endian)) { + code_point = utf16::swap_bytes(uint16_t(code_point)); + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + break; + } // minimal bound checking + uint32_t code_point = ((leading_byte & 0b00000111) << 18) | + ((data[pos + 1] & 0b00111111) << 12) | + ((data[pos + 2] & 0b00111111) << 6) | + (data[pos + 3] & 0b00111111); + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = utf16::swap_bytes(high_surrogate); + low_surrogate = utf16::swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + // we may have a continuation but we do not do error checking + return 0; + } + } + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ +#ifndef SIMDUTF_UTF8_TO_UTF16_H +#define SIMDUTF_UTF8_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf16 { + +template +inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf16_output++ = !match_system(big_endian) + ? char16_t(utf16::swap_bytes(buf[pos])) + : char16_t(buf[pos]); + pos++; + } + continue; + } + } + + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = !match_system(big_endian) + ? char16_t(utf16::swap_bytes(leading_byte)) + : char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return 0; + } + if (!match_system(big_endian)) { + code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 2 >= len) { + return 0; + } // minimal bound checking + + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return 0; + } + if (!match_system(big_endian)) { + code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return 0; + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | + (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return 0; + } + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = utf16::swap_bytes(high_surrogate); + low_surrogate = utf16::swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + return 0; + } + } + return utf16_output - start; +} + +template +inline result convert_with_errors(const char *buf, size_t len, + char16_t *utf16_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf16_output++ = !match_system(big_endian) + ? char16_t(utf16::swap_bytes(buf[pos])) + : char16_t(buf[pos]); + pos++; + } + continue; + } + } + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf16_output++ = !match_system(big_endian) + ? char16_t(utf16::swap_bytes(leading_byte)) + : char16_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 1 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return result(error_code::OVERLONG, pos); + } + if (!match_system(big_endian)) { + code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8, it should become + // a single UTF-16 word. + if (pos + 2 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point)) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + if (!match_system(big_endian)) { + code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); + } + *utf16_output++ = char16_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | + (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + code_point -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = utf16::swap_bytes(high_surrogate); + low_surrogate = utf16::swap_bytes(low_surrogate); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + pos += 4; + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((leading_byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + } + return result(error_code::SUCCESS, utf16_output - start); +} + +/** + * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and + * we have up to len input bytes left, and we encountered some error. It is + * possible that the error is at 'buf' exactly, but it could also be in the + * previous bytes (up to 3 bytes back). + * + * prior_bytes indicates how many bytes, prior to 'buf' may belong to the + * current memory section and can be safely accessed. We prior_bytes to access + * safely up to three bytes before 'buf'. + * + * The caller is responsible to ensure that len > 0. + * + * If the error is believed to have occurred prior to 'buf', the count value + * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. + */ +template +inline result rewind_and_convert_with_errors(size_t prior_bytes, + const char *buf, size_t len, + char16_t *utf16_output) { + size_t extra_len{0}; + // We potentially need to go back in time and find a leading byte. + // In theory '3' would be sufficient, but sometimes the error can go back + // quite far. + size_t how_far_back = prior_bytes; + // size_t how_far_back = 3; // 3 bytes in the past + current position + // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for (size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-static_cast(i)]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if (found_leading_bytes) { + if (i > 0 && byte < 128) { + // If we had to go back and the leading byte is ascii + // then we can stop right away. + return result(error_code::TOO_LONG, 0 - i + 1); + } + buf -= i; + extra_len = i; + break; + } + } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described + // in C Standard as . C Standard Section 4.1.5 defines size_t as an + // unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if (!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is + // continuation] Or we possibly have a stream that does not start with a + // leading byte. + return result(error_code::TOO_LONG, 0 - how_far_back); + } + result res = convert_with_errors(buf, len + extra_len, utf16_output); + if (res.error) { + res.count -= extra_len; + } + return res; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ + +/* begin file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ +#ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H +#define SIMDUTF_VALID_UTF8_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf32 { + +inline size_t convert_valid(const char *buf, size_t len, + char32_t *utf32_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + // try to convert the next block of 8 ASCII bytes + if (pos + 8 <= + len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 8; + while (pos < final_pos) { + *utf32_output++ = char32_t(buf[pos]); + pos++; + } + continue; + } + } + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf32_output++ = char32_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + break; + } // minimal bound checking + *utf32_output++ = char32_t(((leading_byte & 0b00011111) << 6) | + (data[pos + 1] & 0b00111111)); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + if (pos + 2 >= len) { + break; + } // minimal bound checking + *utf32_output++ = char32_t(((leading_byte & 0b00001111) << 12) | + ((data[pos + 1] & 0b00111111) << 6) | + (data[pos + 2] & 0b00111111)); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + break; + } // minimal bound checking + uint32_t code_word = ((leading_byte & 0b00000111) << 18) | + ((data[pos + 1] & 0b00111111) << 12) | + ((data[pos + 2] & 0b00111111) << 6) | + (data[pos + 3] & 0b00111111); + *utf32_output++ = char32_t(code_word); + pos += 4; + } else { + // we may have a continuation but we do not do error checking + return 0; + } + } + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ +/* begin file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ +#ifndef SIMDUTF_UTF8_TO_UTF32_H +#define SIMDUTF_UTF8_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_utf32 { + +inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf32_output++ = char32_t(buf[pos]); + pos++; + } + continue; + } + } + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf32_output++ = char32_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return 0; + } + *utf32_output++ = char32_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + if (pos + 2 >= len) { + return 0; + } // minimal bound checking + + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return 0; + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return 0; + } + *utf32_output++ = char32_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return 0; + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return 0; + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | + (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + return 0; + } + *utf32_output++ = char32_t(code_point); + pos += 4; + } else { + return 0; + } + } + return utf32_output - start; +} + +inline result convert_with_errors(const char *buf, size_t len, + char32_t *utf32_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char32_t *start{utf32_output}; + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *utf32_output++ = char32_t(buf[pos]); + pos++; + } + continue; + } + } + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *utf32_output++ = char32_t(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { + return result(error_code::OVERLONG, pos); + } + *utf32_output++ = char32_t(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + if (pos + 2 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + // range check + uint32_t code_point = (leading_byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point) { + return result(error_code::OVERLONG, pos); + } + if (0xd7ff < code_point && code_point < 0xe000) { + return result(error_code::SURROGATE, pos); + } + *utf32_output++ = char32_t(code_point); + pos += 3; + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + if (pos + 3 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } + + // range check + uint32_t code_point = (leading_byte & 0b00000111) << 18 | + (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | + (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff) { + return result(error_code::OVERLONG, pos); + } + if (0x10ffff < code_point) { + return result(error_code::TOO_LARGE, pos); + } + *utf32_output++ = char32_t(code_point); + pos += 4; + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((leading_byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } else { + return result(error_code::HEADER_BITS, pos); + } + } + } + return result(error_code::SUCCESS, utf32_output - start); +} + +/** + * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and + * we have up to len input bytes left, and we encountered some error. It is + * possible that the error is at 'buf' exactly, but it could also be in the + * previous bytes location (up to 3 bytes back). + * + * prior_bytes indicates how many bytes, prior to 'buf' may belong to the + * current memory section and can be safely accessed. We prior_bytes to access + * safely up to three bytes before 'buf'. + * + * The caller is responsible to ensure that len > 0. + * + * If the error is believed to have occurred prior to 'buf', the count value + * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. + */ +inline result rewind_and_convert_with_errors(size_t prior_bytes, + const char *buf, size_t len, + char32_t *utf32_output) { + size_t extra_len{0}; + // We potentially need to go back in time and find a leading byte. + size_t how_far_back = 3; // 3 bytes in the past + current position + if (how_far_back > prior_bytes) { + how_far_back = prior_bytes; + } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for (size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-static_cast(i)]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if (found_leading_bytes) { + if (i > 0 && byte < 128) { + // If we had to go back and the leading byte is ascii + // then we can stop right away. + return result(error_code::TOO_LONG, 0 - i + 1); + } + buf -= i; + extra_len = i; + break; + } + } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described + // in C Standard as . C Standard Section 4.1.5 defines size_t as an + // unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if (!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is + // continuation] Or we possibly have a stream that does not start with a + // leading byte. + return result(error_code::TOO_LONG, 0 - how_far_back); + } + + result res = convert_with_errors(buf, len + extra_len, utf32_output); + if (res.error) { + res.count -= extra_len; + } + return res; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ + +/* begin file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF16_H +#define SIMDUTF_LATIN1_TO_UTF16_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf16 { + +template +inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + + while (pos < len) { + uint16_t word = + uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point + *utf16_output++ = + char16_t(match_system(big_endian) ? word : utf16::swap_bytes(word)); + pos++; + } + + return utf16_output - start; +} + +template +inline result convert_with_errors(const char *buf, size_t len, + char16_t *utf16_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char16_t *start{utf16_output}; + + while (pos < len) { + uint16_t word = + uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point + *utf16_output++ = + char16_t(match_system(big_endian) ? word : utf16::swap_bytes(word)); + pos++; + } + + return result(error_code::SUCCESS, utf16_output - start); +} + +} // namespace latin1_to_utf16 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ +/* begin file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ +#ifndef SIMDUTF_LATIN1_TO_UTF32_H +#define SIMDUTF_LATIN1_TO_UTF32_H + +namespace simdutf { +namespace scalar { +namespace { +namespace latin1_to_utf32 { + +inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) { + const unsigned char *data = reinterpret_cast(buf); + char32_t *start{utf32_output}; + for (size_t i = 0; i < len; i++) { + *utf32_output++ = (char32_t)data[i]; + } + return utf32_output - start; +} + +} // namespace latin1_to_utf32 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ + +/* begin file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ +#ifndef SIMDUTF_UTF8_TO_LATIN1_H +#define SIMDUTF_UTF8_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_latin1 { + +inline size_t convert(const char *buf, size_t len, char *latin_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{latin_output}; + + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 + // 1000 1000 .... etc + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = char(buf[pos]); + pos++; + } + continue; + } + } + + // suppose it is not an all ASCII byte sequence + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *latin_output++ = char(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == + 0b11000000) { // the first three bits indicate: + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return 0; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } // checks if the next byte is a valid continuation byte in UTF-8. A + // valid continuation byte starts with 10. + // range check - + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | + (data[pos + 1] & + 0b00111111); // assembles the Unicode code point from the two bytes. + // It does this by discarding the leading 110 and 10 + // bits from the two bytes, shifting the remaining bits + // of the first byte, and then combining the results + // with a bitwise OR operation. + if (code_point < 0x80 || 0xFF < code_point) { + return 0; // We only care about the range 129-255 which is Non-ASCII + // latin1 characters. A code_point beneath 0x80 is invalid as + // it is already covered by bytes whose leading bit is zero. + } + *latin_output++ = char(code_point); + pos += 2; + } else { + return 0; + } + } + return latin_output - start; +} + +inline result convert_with_errors(const char *buf, size_t len, + char *latin_output) { + const uint8_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{latin_output}; + + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 + // 1000 1000...etc + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = char(buf[pos]); + pos++; + } + continue; + } + } + // suppose it is not an all ASCII byte sequence + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *latin_output++ = char(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == + 0b11000000) { // the first three bits indicate: + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + return result(error_code::TOO_SHORT, pos); + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return result(error_code::TOO_SHORT, pos); + } // checks if the next byte is a valid continuation byte in UTF-8. A + // valid continuation byte starts with 10. + // range check - + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | + (data[pos + 1] & + 0b00111111); // assembles the Unicode code point from the two bytes. + // It does this by discarding the leading 110 and 10 + // bits from the two bytes, shifting the remaining bits + // of the first byte, and then combining the results + // with a bitwise OR operation. + if (code_point < 0x80) { + return result(error_code::OVERLONG, pos); + } + if (0xFF < code_point) { + return result(error_code::TOO_LARGE, pos); + } // We only care about the range 129-255 which is Non-ASCII latin1 + // characters + *latin_output++ = char(code_point); + pos += 2; + } else if ((leading_byte & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 + return result(error_code::TOO_LARGE, pos); + } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 + // we have a 4-byte UTF-8 word. + return result(error_code::TOO_LARGE, pos); + } else { + // we either have too many continuation bytes or an invalid leading byte + if ((leading_byte & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } + + return result(error_code::HEADER_BITS, pos); + } + } + return result(error_code::SUCCESS, latin_output - start); +} + +inline result rewind_and_convert_with_errors(size_t prior_bytes, + const char *buf, size_t len, + char *latin1_output) { + size_t extra_len{0}; + // We potentially need to go back in time and find a leading byte. + // In theory '3' would be sufficient, but sometimes the error can go back + // quite far. + size_t how_far_back = prior_bytes; + // size_t how_far_back = 3; // 3 bytes in the past + current position + // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } + bool found_leading_bytes{false}; + // important: it is i <= how_far_back and not 'i < how_far_back'. + for (size_t i = 0; i <= how_far_back; i++) { + unsigned char byte = buf[-static_cast(i)]; + found_leading_bytes = ((byte & 0b11000000) != 0b10000000); + if (found_leading_bytes) { + if (i > 0 && byte < 128) { + // If we had to go back and the leading byte is ascii + // then we can stop right away. + return result(error_code::TOO_LONG, 0 - i + 1); + } + buf -= i; + extra_len = i; + break; + } + } + // + // It is possible for this function to return a negative count in its result. + // C++ Standard Section 18.1 defines size_t is in which is described + // in C Standard as . C Standard Section 4.1.5 defines size_t as an + // unsigned integral type of the result of the sizeof operator + // + // An unsigned type will simply wrap round arithmetically (well defined). + // + if (!found_leading_bytes) { + // If how_far_back == 3, we may have four consecutive continuation bytes!!! + // [....] [continuation] [continuation] [continuation] | [buf is + // continuation] Or we possibly have a stream that does not start with a + // leading byte. + return result(error_code::TOO_LONG, 0 - how_far_back); + } + result res = convert_with_errors(buf, len + extra_len, latin1_output); + if (res.error) { + res.count -= extra_len; + } + return res; +} + +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ +#ifndef SIMDUTF_UTF16_TO_LATIN1_H +#define SIMDUTF_UTF16_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_latin1 { + +#include // for std::memcpy + +template +inline size_t convert(const char16_t *buf, size_t len, char *latin_output) { + if (len == 0) { + return 0; + } + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *current_write = latin_output; + uint16_t word = 0; + uint16_t too_large = 0; + + while (pos < len) { + word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + too_large |= word; + *current_write++ = char(word & 0xFF); + pos++; + } + if ((too_large & 0xFF00) != 0) { + return 0; + } + + return current_write - latin_output; +} + +template +inline result convert_with_errors(const char16_t *buf, size_t len, + char *latin_output) { + if (len == 0) { + return result(error_code::SUCCESS, 0); + } + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{latin_output}; + uint16_t word; + + while (pos < len) { + if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that + // they are Latin1 + uint64_t v1, v2, v3, v4; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + ::memcpy(&v2, data + pos + 4, sizeof(uint64_t)); + ::memcpy(&v3, data + pos + 8, sizeof(uint64_t)); + ::memcpy(&v4, data + pos + 12, sizeof(uint64_t)); + + if (!match_system(big_endian)) { + v1 = (v1 >> 8) | (v1 << (64 - 8)); + } + if (!match_system(big_endian)) { + v2 = (v2 >> 8) | (v2 << (64 - 8)); + } + if (!match_system(big_endian)) { + v3 = (v3 >> 8) | (v3 << (64 - 8)); + } + if (!match_system(big_endian)) { + v4 = (v4 >> 8) | (v4 << (64 - 8)); + } + + if (((v1 | v2 | v3 | v4) & 0xFF00FF00FF00FF00) == 0) { + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = !match_system(big_endian) + ? char(utf16::swap_bytes(data[pos])) + : char(data[pos]); + pos++; + } + continue; + } + } + word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + if ((word & 0xFF00) == 0) { + *latin_output++ = char(word & 0xFF); + pos++; + } else { + return result(error_code::TOO_LARGE, pos); + } + } + return result(error_code::SUCCESS, latin_output - start); +} + +} // namespace utf16_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ +/* begin file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ +#ifndef SIMDUTF_UTF32_TO_LATIN1_H +#define SIMDUTF_UTF32_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_latin1 { + +inline size_t convert(const char32_t *buf, size_t len, char *latin1_output) { + const uint32_t *data = reinterpret_cast(buf); + char *start = latin1_output; + uint32_t utf32_char; + size_t pos = 0; + uint32_t too_large = 0; + + while (pos < len) { + utf32_char = (uint32_t)data[pos]; + too_large |= utf32_char; + *latin1_output++ = (char)(utf32_char & 0xFF); + pos++; + } + if ((too_large & 0xFFFFFF00) != 0) { + return 0; + } + return latin1_output - start; +} + +inline result convert_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const uint32_t *data = reinterpret_cast(buf); + char *start{latin1_output}; + size_t pos = 0; + while (pos < len) { + if (pos + 2 <= + len) { // if it is safe to read 8 more bytes, check that they are Latin1 + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF00FFFFFF00) == 0) { + *latin1_output++ = char(buf[pos]); + *latin1_output++ = char(buf[pos + 1]); + pos += 2; + continue; + } + } + uint32_t utf32_char = data[pos]; + if ((utf32_char & 0xFFFFFF00) == + 0) { // Check if the character can be represented in Latin-1 + *latin1_output++ = (char)(utf32_char & 0xFF); + pos++; + } else { + return result(error_code::TOO_LARGE, pos); + }; + } + return result(error_code::SUCCESS, latin1_output - start); +} + +} // namespace utf32_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ + +/* begin file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ +#ifndef SIMDUTF_VALID_UTF8_TO_LATIN1_H +#define SIMDUTF_VALID_UTF8_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf8_to_latin1 { + +inline size_t convert_valid(const char *buf, size_t len, char *latin_output) { + const uint8_t *data = reinterpret_cast(buf); + + size_t pos = 0; + char *start{latin_output}; + + while (pos < len) { + // try to convert the next block of 16 ASCII bytes + if (pos + 16 <= + len) { // if it is safe to read 16 more bytes, check that they are ascii + uint64_t v1; + ::memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | + v2}; // We are only interested in these bits: 1000 1000 1000 + // 1000, so it makes sense to concatenate everything + if ((v & 0x8080808080808080) == + 0) { // if NONE of these are set, e.g. all of them are zero, then + // everything is ASCII + size_t final_pos = pos + 16; + while (pos < final_pos) { + *latin_output++ = char(buf[pos]); + pos++; + } + continue; + } + } + + // suppose it is not an all ASCII byte sequence + uint8_t leading_byte = data[pos]; // leading byte + if (leading_byte < 0b10000000) { + // converting one ASCII byte !!! + *latin_output++ = char(leading_byte); + pos++; + } else if ((leading_byte & 0b11100000) == + 0b11000000) { // the first three bits indicate: + // We have a two-byte UTF-8 + if (pos + 1 >= len) { + break; + } // minimal bound checking + if ((data[pos + 1] & 0b11000000) != 0b10000000) { + return 0; + } // checks if the next byte is a valid continuation byte in UTF-8. A + // valid continuation byte starts with 10. + // range check - + uint32_t code_point = + (leading_byte & 0b00011111) << 6 | + (data[pos + 1] & + 0b00111111); // assembles the Unicode code point from the two bytes. + // It does this by discarding the leading 110 and 10 + // bits from the two bytes, shifting the remaining bits + // of the first byte, and then combining the results + // with a bitwise OR operation. + *latin_output++ = char(code_point); + pos += 2; + } else { + // we may have a continuation but we do not do error checking + return 0; + } + } + return latin_output - start; +} + +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ +/* begin file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ +#ifndef SIMDUTF_VALID_UTF16_TO_LATIN1_H +#define SIMDUTF_VALID_UTF16_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf16_to_latin1 { + +template +inline size_t convert_valid(const char16_t *buf, size_t len, + char *latin_output) { + const uint16_t *data = reinterpret_cast(buf); + size_t pos = 0; + char *start{latin_output}; + uint16_t word = 0; + + while (pos < len) { + word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; + *latin_output++ = char(word); + pos++; + } + + return latin_output - start; +} + +} // namespace utf16_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ +/* begin file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ +#ifndef SIMDUTF_VALID_UTF32_TO_LATIN1_H +#define SIMDUTF_VALID_UTF32_TO_LATIN1_H + +namespace simdutf { +namespace scalar { +namespace { +namespace utf32_to_latin1 { + +inline size_t convert_valid(const char32_t *buf, size_t len, + char *latin1_output) { + const uint32_t *data = reinterpret_cast(buf); + char *start = latin1_output; + uint32_t utf32_char; + size_t pos = 0; + + while (pos < len) { + utf32_char = (uint32_t)data[pos]; + + if (pos + 2 <= + len) { // if it is safe to read 8 more bytes, check that they are Latin1 + uint64_t v; + ::memcpy(&v, data + pos, sizeof(uint64_t)); + if ((v & 0xFFFFFF00FFFFFF00) == 0) { + *latin1_output++ = char(buf[pos]); + *latin1_output++ = char(buf[pos + 1]); + pos += 2; + continue; + } else { + // output can not be represented in latin1 + return 0; + } + } + if ((utf32_char & 0xFFFFFF00) == 0) { + *latin1_output++ = char(utf32_char); + } else { + // output can not be represented in latin1 + return 0; + } + pos++; + } + return latin1_output - start; +} + +} // namespace utf32_to_latin1 +} // unnamed namespace +} // namespace scalar +} // namespace simdutf + +#endif +/* end file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ + +SIMDUTF_PUSH_DISABLE_WARNINGS +SIMDUTF_DISABLE_UNDESIRED_WARNINGS + +#if SIMDUTF_IMPLEMENTATION_ARM64 +/* begin file src/arm64/implementation.cpp */ +/* begin file src/simdutf/arm64/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "arm64" +// #define SIMDUTF_IMPLEMENTATION arm64 +/* end file src/simdutf/arm64/begin.h */ +namespace simdutf { +namespace arm64 { +namespace { +#ifndef SIMDUTF_ARM64_H + #error "arm64.h must be included" +#endif +using namespace simd; + +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + simd8 bits = input.reduce_or(); + return bits.max_val() < 0b10000000u; +} + +simdutf_unused simdutf_really_inline simd8 +must_be_continuation(const simd8 prev1, const simd8 prev2, + const simd8 prev3) { + simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller + // is using ^ as well. This will work fine because we only have to report + // errors for cases with 0-1 lead bytes. Multiple lead bytes implies 2 + // overlapping multibyte characters, and if that happens, there is guaranteed + // to be at least *one* lead byte that is part of only 1 other multibyte + // character. The error will be detected there. + return is_second_byte ^ is_third_byte ^ is_fourth_byte; +} + +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + return is_third_byte ^ is_fourth_byte; +} + +// common functions for utf8 conversions +simdutf_really_inline uint16x4_t convert_utf8_3_byte_to_utf16(uint8x16_t in) { + // Low half contains 10cccccc|1110aaaa + // High half contains 10bbbbbb|10bbbbbb +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t sh = simdutf_make_uint8x16_t(0, 2, 3, 5, 6, 8, 9, 11, 1, 1, + 4, 4, 7, 7, 10, 10); +#else + const uint8x16_t sh = {0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10}; +#endif + uint8x16_t perm = vqtbl1q_u8(in, sh); + // Split into half vectors. + // 10cccccc|1110aaaa + uint8x8_t perm_low = vget_low_u8(perm); // no-op + // 10bbbbbb|10bbbbbb + uint8x8_t perm_high = vget_high_u8(perm); + // xxxxxxxx 10bbbbbb + uint16x4_t mid = vreinterpret_u16_u8(perm_high); // no-op + // xxxxxxxx 1110aaaa + uint16x4_t high = vreinterpret_u16_u8(perm_low); // no-op + // Assemble with shift left insert. + // xxxxxxaa aabbbbbb + uint16x4_t mid_high = vsli_n_u16(mid, high, 6); + // (perm_low << 8) | (perm_low >> 8) + // xxxxxxxx 10cccccc + uint16x4_t low = vreinterpret_u16_u8(vrev16_u8(perm_low)); + // Shift left insert into the low bits + // aaaabbbb bbcccccc + uint16x4_t composed = vsli_n_u16(low, mid_high, 6); + return composed; +} + +simdutf_really_inline uint16x8_t convert_utf8_2_byte_to_utf16(uint8x16_t in) { + // Converts 6 2 byte UTF-8 characters to 6 UTF-16 characters. + // Technically this calculates 8, but 6 does better and happens more often + // (The languages which use these codepoints use ASCII spaces so 8 would need + // to be in the middle of a very long word). + + // 10bbbbbb 110aaaaa + uint16x8_t upper = vreinterpretq_u16_u8(in); + // (in << 8) | (in >> 8) + // 110aaaaa 10bbbbbb + uint16x8_t lower = vreinterpretq_u16_u8(vrev16q_u8(in)); + // 00000000 000aaaaa + uint16x8_t upper_masked = vandq_u16(upper, vmovq_n_u16(0x1F)); + // Assemble with shift left insert. + // 00000aaa aabbbbbb + uint16x8_t composed = vsliq_n_u16(lower, upper_masked, 6); + return composed; +} + +simdutf_really_inline uint16x8_t +convert_utf8_1_to_2_byte_to_utf16(uint8x16_t in, size_t shufutf8_idx) { + // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. + // This is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx])); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh)); + // Mask + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000000 00bbbbbb + uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits + // 1 byte: 00000000 00000000 + // 2 byte: 000aaaaa 00000000 + uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits + // Combine with a shift right accumulate + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000aaa aabbbbbb + uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2); + return composed; +} + +/* begin file src/arm64/arm_validate_utf16.cpp */ +template +const char16_t *arm_validate_utf16(const char16_t *input, size_t size) { + const char16_t *end = input + size; + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + while (end - input >= 16) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + if (!match_system(big_endian)) { + in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0))); + in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1))); + } + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const simd8 in = simd16::pack(t0, t1); + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64(); + if (surrogates_wordmask == 0) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher word) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint64_t V = ~surrogates_wordmask; + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = ((in & v_fc) == v_dc); + const uint64_t H = vH.to_bitmask64(); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint64_t L = ~H & surrogates_wordmask; + + const uint64_t a = + L & (H >> 4); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint64_t b = + a << 4; // Just mark that the opposite fact is hold, + // thanks to that we have only two masks for valid case. + const uint64_t c = V | a | b; // Combine all the masks into the final one. + if (c == ~0ull) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0xfffffffffffffffull) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return nullptr; + } + } + } + return input; +} + +template +const result arm_validate_utf16_with_errors(const char16_t *input, + size_t size) { + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + while (input + 16 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + if (!match_system(big_endian)) { + in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0))); + in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1))); + } + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const simd8 in = simd16::pack(t0, t1); + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64(); + if (surrogates_wordmask == 0) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher word) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint64_t V = ~surrogates_wordmask; + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = ((in & v_fc) == v_dc); + const uint64_t H = vH.to_bitmask64(); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint64_t L = ~H & surrogates_wordmask; + + const uint64_t a = + L & (H >> 4); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint64_t b = + a << 4; // Just mark that the opposite fact is hold, + // thanks to that we have only two masks for valid case. + const uint64_t c = V | a | b; // Combine all the masks into the final one. + if (c == ~0ull) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0xfffffffffffffffull) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + return result(error_code::SUCCESS, input - start); +} +/* end file src/arm64/arm_validate_utf16.cpp */ +/* begin file src/arm64/arm_validate_utf32le.cpp */ + +const char32_t *arm_validate_utf32le(const char32_t *input, size_t size) { + const char32_t *end = input + size; + + const uint32x4_t standardmax = vmovq_n_u32(0x10ffff); + const uint32x4_t offset = vmovq_n_u32(0xffff2000); + const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff); + uint32x4_t currentmax = vmovq_n_u32(0x0); + uint32x4_t currentoffsetmax = vmovq_n_u32(0x0); + + while (end - input >= 4) { + const uint32x4_t in = vld1q_u32(reinterpret_cast(input)); + currentmax = vmaxq_u32(in, currentmax); + currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax); + input += 4; + } + + uint32x4_t is_zero = + veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax); + if (vmaxvq_u32(is_zero) != 0) { + return nullptr; + } + + is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (vmaxvq_u32(is_zero) != 0) { + return nullptr; + } + + return input; +} + +const result arm_validate_utf32le_with_errors(const char32_t *input, + size_t size) { + const char32_t *start = input; + const char32_t *end = input + size; + + const uint32x4_t standardmax = vmovq_n_u32(0x10ffff); + const uint32x4_t offset = vmovq_n_u32(0xffff2000); + const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff); + uint32x4_t currentmax = vmovq_n_u32(0x0); + uint32x4_t currentoffsetmax = vmovq_n_u32(0x0); + + while (end - input >= 4) { + const uint32x4_t in = vld1q_u32(reinterpret_cast(input)); + currentmax = vmaxq_u32(in, currentmax); + currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax); + + uint32x4_t is_zero = + veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax); + if (vmaxvq_u32(is_zero) != 0) { + return result(error_code::TOO_LARGE, input - start); + } + + is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (vmaxvq_u32(is_zero) != 0) { + return result(error_code::SURROGATE, input - start); + } + + input += 4; + } + + return result(error_code::SUCCESS, input - start); +} +/* end file src/arm64/arm_validate_utf32le.cpp */ + +/* begin file src/arm64/arm_convert_latin1_to_utf16.cpp */ +template +std::pair +arm_convert_latin1_to_utf16(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + + while (end - buf >= 16) { + uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf)); + uint16x8_t inlow = vmovl_u8(vget_low_u8(in8)); + if (!match_system(big_endian)) { + inlow = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inlow))); + } + vst1q_u16(reinterpret_cast(utf16_output), inlow); + uint16x8_t inhigh = vmovl_u8(vget_high_u8(in8)); + if (!match_system(big_endian)) { + inhigh = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inhigh))); + } + vst1q_u16(reinterpret_cast(utf16_output + 8), inhigh); + utf16_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf16_output); +} +/* end file src/arm64/arm_convert_latin1_to_utf16.cpp */ +/* begin file src/arm64/arm_convert_latin1_to_utf32.cpp */ +std::pair +arm_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + const char *end = buf + len; + + while (end - buf >= 16) { + uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf)); + uint16x8_t in8low = vmovl_u8(vget_low_u8(in8)); + uint32x4_t in16lowlow = vmovl_u16(vget_low_u16(in8low)); + uint32x4_t in16lowhigh = vmovl_u16(vget_high_u16(in8low)); + uint16x8_t in8high = vmovl_u8(vget_high_u8(in8)); + uint32x4_t in8highlow = vmovl_u16(vget_low_u16(in8high)); + uint32x4_t in8highhigh = vmovl_u16(vget_high_u16(in8high)); + vst1q_u32(reinterpret_cast(utf32_output), in16lowlow); + vst1q_u32(reinterpret_cast(utf32_output + 4), in16lowhigh); + vst1q_u32(reinterpret_cast(utf32_output + 8), in8highlow); + vst1q_u32(reinterpret_cast(utf32_output + 12), in8highhigh); + + utf32_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf32_output); +} +/* end file src/arm64/arm_convert_latin1_to_utf32.cpp */ +/* begin file src/arm64/arm_convert_latin1_to_utf8.cpp */ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +std::pair +arm_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char *end = latin1_input + len; + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + // We always write 16 bytes, of which more than the first 8 bytes + // are valid. A safety margin of 8 is more than sufficient. + while (end - latin1_input >= 16 + 8) { + uint8x16_t in8 = vld1q_u8(reinterpret_cast(latin1_input)); + if (vmaxvq_u8(in8) <= 0x7F) { // ASCII fast path!!!! + vst1q_u8(utf8_output, in8); + utf8_output += 16; + latin1_input += 16; + continue; + } + + // We just fallback on UTF-16 code. This could be optimized/simplified + // further. + uint16x8_t in16 = vmovl_u8(vget_low_u8(in8)); + // 1. prepare 2-byte values + // input 8-bit word : [aabb|bbbb] x 8 + // expected output : [1100|00aa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [0000|00aa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(in16, 2); + // t1 = [0000|00aa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(in16, v_003f); + // t3 = [0000|00aa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [1100|00aa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in16, v_007f); + const uint8x16_t utf8_unpacked = + vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in16, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + // 6. adjust pointers + latin1_input += 8; + utf8_output += row[0]; + + } // while + + return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); +} +/* end file src/arm64/arm_convert_latin1_to_utf8.cpp */ + +/* begin file src/arm64/arm_convert_utf8_to_latin1.cpp */ +// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 16, usually 12). +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + uint8x16_t in = vld1q_u8(reinterpret_cast(input)); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if (utf8_end_of_code_point_mask == 0xfff) { + // We process in chunks of 12 bytes + vst1q_u8(reinterpret_cast(latin1_output), in); + latin1_output += 12; // We wrote 12 18-bit characters. + return 12; // We consumed 12 bytes. + } + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 + // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy + // scenario we process SIX (6) input code-code units. The max length in bytes + // of six code code units spanning between 1 and 2 bytes each is 12 bytes. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh)); + // Mask + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000000 00bbbbbb + uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits + // 1 byte: 00000000 00000000 + // 2 byte: 000aaaaa 00000000 + uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits + // Combine with a shift right accumulate + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000aaa aabbbbbb + uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2); + // writing 8 bytes even though we only care about the first 6 bytes. + uint8x8_t latin1_packed = vmovn_u16(composed); + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + latin1_output += 6; // We wrote 6 bytes. + return consumed; +} +/* end file src/arm64/arm_convert_utf8_to_latin1.cpp */ +/* begin file src/arm64/arm_convert_utf8_to_utf16.cpp */ +// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 16, usually 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + uint8x16_t in = vld1q_u8(reinterpret_cast(input)); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xffff) { + // We process in chunks of 16 bytes + // The routine in simd.h is reused. + simd8 temp{vreinterpretq_s8_u8(in)}; + temp.store_ascii_as_utf16(utf16_output); + utf16_output += 16; // We wrote 16 16-bit characters. + return 16; // We consumed 16 bytes. + } + + // 3 byte sequences are the next most common, as seen in CJK, which has long + // sequences of these. + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. + uint16x4_t composed = convert_utf8_3_byte_to_utf16(in); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed))); + } + vst1_u16(reinterpret_cast(utf16_output), composed); + utf16_output += 4; // We wrote 4 16-bit characters. + return 12; // We consumed 12 bytes. + } + + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if ((utf8_end_of_code_point_mask & 0xFFF) == 0xaaa) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte + // UTF-16 code units. + uint16x8_t composed = convert_utf8_2_byte_to_utf16(in); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = + vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); + } + vst1q_u16(reinterpret_cast(utf16_output), composed); + + utf16_output += 6; // We wrote 6 16-bit characters. + return 12; // We consumed 12 bytes. + } + + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + uint16x8_t composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = + vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); + } + // Store + vst1q_u16(reinterpret_cast(utf16_output), composed); + utf16_output += 6; // We wrote 6 16-bit characters. + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + // XXX: depending on the system scalar instructions might be faster. + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); + // 1 byte: 00000000 0ccccccc + // 2 byte: xx0bbbbb x0cccccc + // 3 byte: xxbbbbbb x0cccccc + uint16x4_t lowperm = vmovn_u32(perm); + // Partially mask with bic (doesn't require a temporary register unlike and) + // The shift left insert below will clear the top bits. + // 1 byte: 00000000 00000000 + // 2 byte: xx0bbbbb 00000000 + // 3 byte: xxbbbbbb 00000000 + uint16x4_t middlebyte = vbic_u16(lowperm, vmov_n_u16(uint16_t(~0xFF00))); + // ASCII + // 1 byte: 00000000 0ccccccc + // 2+byte: 00000000 00cccccc + uint16x4_t ascii = vand_u16(lowperm, vmov_n_u16(0x7F)); + // Split into narrow vectors. + // 2 byte: 00000000 00000000 + // 3 byte: 00000000 xxxxaaaa + uint16x4_t highperm = vshrn_n_u32(perm, 16); + // Shift right accumulate the middle byte + // 1 byte: 00000000 0ccccccc + // 2 byte: 00xx0bbb bbcccccc + // 3 byte: 00xxbbbb bbcccccc + uint16x4_t middlelow = vsra_n_u16(ascii, middlebyte, 2); + // Shift left and insert the top 4 bits, overwriting the garbage + // 1 byte: 00000000 0ccccccc + // 2 byte: 00000bbb bbcccccc + // 3 byte: aaaabbbb bbcccccc + uint16x4_t composed = vsli_n_u16(middlelow, highperm, 12); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed))); + } + vst1_u16(reinterpret_cast(utf16_output), composed); + + utf16_output += 4; // We wrote 4 16-bit codepoints + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but + // it is easier when we can assume they are all pairs. This version does + // not use the LUT, but 4 byte sequences are less common and the overhead + // of the extra memory access is less important than the early branch + // overhead in shorter sequences. + + // Swap byte pairs + // 10dddddd 10cccccc|10bbbbbb 11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + uint8x16_t swap = vrev16q_u8(in); + // Shift left 2 bits + // cccccc00 dddddd00 xxxxxxxx bbbbbb00 + uint32x4_t shift = vreinterpretq_u32_u8(vshlq_n_u8(swap, 2)); + // Create a magic number containing the low 2 bits of the trail surrogate + // and all the corrections needed to create the pair. UTF-8 4b prefix = + // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) + // surrogate high = +0x0000|0xD800 + // surrogate low = +0xDC00|0x0000 + // ------------------------------- + // = +0xDC00|0xE7C0 + uint32x4_t magic = vmovq_n_u32(0xDC00E7C0); + // Generate unadjusted trail surrogate minus lowest 2 bits + // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 + uint32x4_t trail = + vbslq_u32(vmovq_n_u32(0x0000FF00), vreinterpretq_u32_u8(swap), shift); + // Insert low 2 bits of trail surrogate to magic number for later + // 11011100 00000000 11100111 110000cc + uint16x8_t magic_with_low_2 = + vreinterpretq_u16_u32(vsraq_n_u32(magic, shift, 30)); + // Generate lead surrogate + // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx + uint32x4_t lead = vreinterpretq_u32_u16( + vsliq_n_u16(vreinterpretq_u16_u8(swap), vreinterpretq_u16_u8(in), 6)); + // Mask out lead + // 000000cc ccdddddd|xxxxxxxx xxxxxxxx + lead = vbicq_u32(lead, vmovq_n_u32(uint32_t(~0x03FFFFFF))); + // Blend pairs + // 000000cc ccdddddd|11110aaa bbbbbb00 + uint16x8_t blend = vreinterpretq_u16_u32( + vbslq_u32(vmovq_n_u32(0x0000FFFF), trail, lead)); + // Add magic number to finish the result + // 110111CC CCDDDDDD|110110AA BBBBBBCC + uint16x8_t composed = vaddq_u16(blend, magic_with_low_2); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = + vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); + } + uint16_t buffer[8]; + vst1q_u16(reinterpret_cast(buffer), composed); + for (int k = 0; k < 6; k++) { + utf16_output[k] = buffer[k]; + } // the loop might compiler to a couple of instructions. + utf16_output += 6; // We wrote 3 32-bit surrogate pairs. + return 12; // We consumed 12 bytes. + } + // 3 1-4 byte sequences + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 3 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); + // added to fix issue https://github.com/simdutf/simdutf/issues/514 + // We only want to write 2 * 16-bit code units when that is actually what we + // have. Unfortunately, we cannot trust the input. So it is possible to get + // 0xff as an input byte and it should not result in a surrogate pair. We + // need to check for that. + uint32_t permbuffer[4]; + vst1q_u32(permbuffer, perm); + // Mask the low and middle bytes + // 00000000 00000000 00000000 0ddddddd + uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7f)); + // Because the surrogates need more work, the high surrogate is computed + // first. + uint32x4_t middlehigh = vshlq_n_u32(perm, 2); + // 00000000 00000000 00cccccc 00000000 + uint32x4_t middlebyte = vandq_u32(perm, vmovq_n_u32(0x3F00)); + // Start assembling the sequence. Since the 4th byte is in the same position + // as it would be in a surrogate and there is no dependency, shift left + // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: + // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx + uint32x4_t ab = vbslq_u32(vmovq_n_u32(0xFF000000), perm, middlehigh); + // Top 16 bits contains the high ten bits of the surrogate pair before + // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa + // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction + uint32x4_t abc = + vbslq_u32(vmovq_n_u32(0xFFFC0000), ab, vshlq_n_u32(middlebyte, 4)); + // Combine the low 6 or 7 bits by a shift right accumulate + // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct + // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o + // correction + uint32x4_t composed = vsraq_n_u32(ascii, abc, 6); + // After this is for surrogates + // Blend the low and high surrogates + // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd + uint32x4_t mixed = vbslq_u32(vmovq_n_u32(0xFFFF0000), abc, composed); + // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits + // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: + // 11110aaa bbbbbbcc|000000cc ccdddddd + uint16x8_t masked_pair = vreinterpretq_u16_u32( + vbicq_u32(mixed, vmovq_n_u32(uint32_t(~0xFFFF03FF)))); + // Correct the remaining UTF-8 prefix, surrogate offset, and add the + // surrogate prefixes in one magic 16-bit addition. similar magic number but + // without the continue byte adjust and halfword swapped UTF-8 4b prefix = + // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) + // surrogate high = +0xD800|0x0000 + // surrogate low = +0x0000|0xDC00 + // ----------------------------------- + // = +0xE7C0|0xDC00 + uint16x8_t magic = vreinterpretq_u16_u32(vmovq_n_u32(0xE7C0DC00)); + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete + uint32x4_t surrogates = + vreinterpretq_u32_u16(vaddq_u16(masked_pair, magic)); + // If the high bit is 1 (s32 less than zero), this needs a surrogate pair + uint32x4_t is_pair = vcltzq_s32(vreinterpretq_s32_u32(perm)); + + // Select either the 4 byte surrogate pair or the 2 byte solo codepoint + // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD + uint32x4_t selected = vbslq_u32(is_pair, surrogates, composed); + // Byte swap if necessary + if (!match_system(big_endian)) { + selected = + vreinterpretq_u32_u8(vrev16q_u8(vreinterpretq_u8_u32(selected))); + } + // Attempting to shuffle and store would be complex, just scalarize. + uint32_t buffer[4]; + vst1q_u32(buffer, selected); + // Test for the top bit of the surrogate mask. Remove due to issue 514 + // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : + // 0x00800000; + for (size_t i = 0; i < 3; i++) { + // Surrogate + // Used to be if (buffer[i] & SURROGATE_MASK) { + // See discussion above. + // patch for issue https://github.com/simdutf/simdutf/issues/514 + if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { + utf16_output[0] = uint16_t(buffer[i] >> 16); + utf16_output[1] = uint16_t(buffer[i] & 0xFFFF); + utf16_output += 2; + } else { + utf16_output[0] = uint16_t(buffer[i] & 0xFFFF); + utf16_output++; + } + } + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; + } +} +/* end file src/arm64/arm_convert_utf8_to_utf16.cpp */ +/* begin file src/arm64/arm_convert_utf8_to_utf32.cpp */ +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_out) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + uint32_t *&utf32_output = reinterpret_cast(utf32_out); + uint8x16_t in = vld1q_u8(reinterpret_cast(input)); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xFFF; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + if (utf8_end_of_code_point_mask == 0xfff) { + // We process in chunks of 12 bytes. + // use fast implementation in src/simdutf/arm64/simd.h + // Ideally the compiler can keep the tables in registers. + simd8 temp{vreinterpretq_s8_u8(in)}; + temp.store_ascii_as_utf32_tbl(utf32_out); + utf32_output += 12; // We wrote 12 32-bit characters. + return 12; // We consumed 12 bytes. + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. Convert to UTF-16 + uint16x4_t composed_utf16 = convert_utf8_3_byte_to_utf16(in); + // Zero extend and store via ST2 with a zero. + uint16x4x2_t interleaver = {{composed_utf16, vmov_n_u16(0)}}; + vst2_u16(reinterpret_cast(utf32_output), interleaver); + utf32_output += 4; // We wrote 4 32-bit characters. + return 12; // We consumed 12 bytes. + } + + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if (input_utf8_end_of_code_point_mask == 0xaaa) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte + // UTF-32 code units. Convert to UTF-16 + uint16x8_t composed_utf16 = convert_utf8_2_byte_to_utf16(in); + // Zero extend and store via ST2 with a zero. + uint16x8x2_t interleaver = {{composed_utf16, vmovq_n_u16(0)}}; + vst2q_u16(reinterpret_cast(utf32_output), interleaver); + utf32_output += 6; // We wrote 6 32-bit characters. + return 12; // We consumed 12 bytes. + } + /// Either no fast path or an unimportant fast path. + + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + uint16x8_t composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); + // Zero extend and store with ST2 and zero + uint16x8x2_t interleaver = {{composed_utf16, vmovq_n_u16(0)}}; + vst2q_u16(reinterpret_cast(utf32_output), interleaver); + utf32_output += 6; // We wrote 6 32-bit characters. + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + // Shuffle + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); + // Split + // 00000000 00000000 0ccccccc + uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F)); // 6 or 7 bits + // Note: unmasked + // xxxxxxxx aaaaxxxx xxxxxxxx + uint32x4_t high = vshrq_n_u32(perm, 4); // 4 bits + // Use 16 bit bic instead of and. + // The top bits will be corrected later in the bsl + // 00000000 10bbbbbb 00000000 + uint32x4_t middle = vreinterpretq_u32_u16( + vbicq_u16(vreinterpretq_u16_u32(perm), + vmovq_n_u16(uint16_t(~0xff00)))); // 5 or 6 bits + // Combine low and middle with shift right accumulate + // 00000000 00xxbbbb bbcccccc + uint32x4_t lowmid = vsraq_n_u32(ascii, middle, 2); + // Insert top 4 bits from high byte with bitwise select + // 00000000 aaaabbbb bbcccccc + uint32x4_t composed = vbslq_u32(vmovq_n_u32(0x0000F000), high, lowmid); + vst1q_u32(utf32_output, composed); + utf32_output += 4; // We wrote 4 32-bit characters. + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-32 code units. This uses the same method as the fixed 3 byte + // version, reversing and shift left insert. However, there is no need for + // a shuffle mask now, just rev16 and rev32. + // + // This version does not use the LUT, but 4 byte sequences are less common + // and the overhead of the extra memory access is less important than the + // early branch overhead in shorter sequences, so it comes last. + + // Swap pairs of bytes + // 10dddddd|10cccccc|10bbbbbb|11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + uint16x8_t swap1 = vreinterpretq_u16_u8(vrev16q_u8(in)); + // Shift left and insert + // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb + uint16x8_t merge1 = vsliq_n_u16(swap1, vreinterpretq_u16_u8(in), 6); + // Swap 16-bit lanes + // xxxxcccc ccdddddd xxxxxxxa aabbbbbb + // xxxxxxxa aabbbbbb xxxxcccc ccdddddd + uint32x4_t swap2 = vreinterpretq_u32_u16(vrev32q_u16(merge1)); + // Shift insert again + // xxxxxxxx xxxaaabb bbbbcccc ccdddddd + uint32x4_t merge2 = vsliq_n_u32(swap2, vreinterpretq_u32_u16(merge1), 12); + // Clear the garbage + // 00000000 000aaabb bbbbcccc ccdddddd + uint32x4_t composed = vandq_u32(merge2, vmovq_n_u32(0x1FFFFF)); + // Store + vst1q_u32(utf32_output, composed); + + utf32_output += 3; // We wrote 3 32-bit characters. + return 12; // We consumed 12 bytes. + } + // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit + // due to surrogates no longer being involved. + uint8x16_t sh = vld1q_u8(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx])); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 2 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); + // Ascii + uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F)); + uint32x4_t middle = vandq_u32(perm, vmovq_n_u32(0x3f00)); + // When converting the way we do, the 3 byte prefix will be interpreted as + // the 18th bit being set, since the code would interpret the lead byte + // (0b1110bbbb) as a continuation byte (0b10bbbbbb). To fix this, we can + // either xor or do an 8 bit add of the 6th bit shifted right by 1. Since + // NEON has shift right accumulate, we use that. + // 4 byte 3 byte + // 10bbbbbb 1110bbbb + // 00000000 01000000 6th bit + // 00000000 00100000 shift right + // 10bbbbbb 0000bbbb add + // 00bbbbbb 0000bbbb mask + uint8x16_t correction = + vreinterpretq_u8_u32(vandq_u32(perm, vmovq_n_u32(0x00400000))); + uint32x4_t corrected = vreinterpretq_u32_u8( + vsraq_n_u8(vreinterpretq_u8_u32(perm), correction, 1)); + // 00000000 00000000 0000cccc ccdddddd + uint32x4_t cd = vsraq_n_u32(ascii, middle, 2); + // Insert twice + // xxxxxxxx xxxaaabb bbbbxxxx xxxxxxxx + uint32x4_t ab = vbslq_u32(vmovq_n_u32(0x01C0000), vshrq_n_u32(corrected, 6), + vshrq_n_u32(corrected, 4)); + // 00000000 000aaabb bbbbcccc ccdddddd + uint32x4_t composed = vbslq_u32(vmovq_n_u32(0xFFE00FFF), cd, ab); + // Store + vst1q_u32(utf32_output, composed); + utf32_output += 3; // We wrote 3 32-bit characters. + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; + } +} +/* end file src/arm64/arm_convert_utf8_to_utf32.cpp */ + +/* begin file src/arm64/arm_convert_utf16_to_latin1.cpp */ + +template +std::pair +arm_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (end - buf >= 8) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + if (vmaxvq_u16(in) <= 0xff) { + // 1. pack the bytes + uint8x8_t latin1_packed = vmovn_u16(in); + // 2. store (8 bytes) + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +template +std::pair +arm_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + while (end - buf >= 8) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + if (vmaxvq_u16(in) <= 0xff) { + // 1. pack the bytes + uint8x8_t latin1_packed = vmovn_u16(in); + // 2. store (8 bytes) + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + // Let us do a scalar fallback. + for (int k = 0; k < 8; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/arm64/arm_convert_utf16_to_latin1.cpp */ +/* begin file src/arm64/arm_convert_utf16_to_utf32.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +arm_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *end = buf + len; + + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + + while (end - buf >= 8) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + + const uint16x8_t surrogates_bytemask = + vceqq_u16(vandq_u16(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (vmaxvq_u16(surrogates_bytemask) == 0) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in))); + vst1q_u32(utf32_output + 4, vmovl_high_u16(in)); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(buf, reinterpret_cast(utf32_output)); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +arm_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *start = buf; + const char16_t *end = buf + len; + + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + + while ((end - buf) >= 8) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + + const uint16x8_t surrogates_bytemask = + vceqq_u16(vandq_u16(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (vmaxvq_u16(surrogates_bytemask) == 0) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in))); + vst1q_u32(utf32_output + 4, vmovl_high_u16(in)); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf32_output)); +} +/* end file src/arm64/arm_convert_utf16_to_utf32.cpp */ +/* begin file src/arm64/arm_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. + + Ad 1. + + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. + + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + Ad 2. + + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. + + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. + + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +arm_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *end = buf + len; + + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + if (vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + uint16x8_t nextin = + vld1q_u16(reinterpret_cast(buf) + 8); + if (!match_system(big_endian)) { + nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); + } + if (vmaxvq_u16(nextin) > 0x7F) { + // 1. pack the bytes + // obviously suboptimal. + uint8x8_t utf8_packed = vmovn_u16(in); + // 2. store (8 bytes) + vst1_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } else { + // 1. pack the bytes + // obviously suboptimal. + uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin); + // 2. store (16 bytes) + vst1q_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + } + + if (vmaxvq_u16(in) <= 0x7FF) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(in, 2); + // t1 = [000a|aaaa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); + const uint8x16_t utf8_unpacked = + vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } + const uint16x8_t surrogates_bytemask = + vceqq_u16(vandq_u16(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (vmaxvq_u16(surrogates_bytemask) == 0) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t dup_even = simdutf_make_uint16x8_t( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +#else + const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; +#endif + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const uint16x8_t t0 = vreinterpretq_u16_u8( + vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + const uint16x8_t s0 = vshrq_n_u16(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + const uint16x8_t s1s = vshlq_n_u16(s1, 2); + // [00bb|bbbb|0000|aaaa] + const uint16x8_t s2 = vorrq_u16(s0, s1s); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); + const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); + const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); + const uint16x8_t m0 = + vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); + const uint16x8_t s4 = veorq_u16(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); + const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t onemask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); + const uint16x8_t twomask = simdutf_make_uint16x8_t( + 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); +#else + const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0100, 0x0400, 0x1000, 0x4000}; + const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, + 0x0200, 0x0800, 0x2000, 0x8000}; +#endif + const uint16x8_t combined = + vorrq_u16(vandq_u16(one_byte_bytemask, onemask), + vandq_u16(one_or_two_bytes_bytemask, twomask)); + const uint16_t mask = vaddvq_u16(combined); + // The following fast path may or may not be beneficial. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); + vst1q_u8(utf8_output, utf8_0); + utf8_output += 12; + vst1q_u8(utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + + vst1q_u8(utf8_output, utf8_0); + utf8_output += row0[0]; + vst1q_u8(utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(buf, reinterpret_cast(utf8_output)); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +arm_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *start = buf; + const char16_t *end = buf + len; + + const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); + if (!match_system(big_endian)) { + in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + } + if (vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + uint16x8_t nextin = + vld1q_u16(reinterpret_cast(buf) + 8); + if (!match_system(big_endian)) { + nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); + } + if (vmaxvq_u16(nextin) > 0x7F) { + // 1. pack the bytes + // obviously suboptimal. + uint8x8_t utf8_packed = vmovn_u16(in); + // 2. store (8 bytes) + vst1_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } else { + // 1. pack the bytes + // obviously suboptimal. + uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin); + // 2. store (16 bytes) + vst1q_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + } + + if (vmaxvq_u16(in) <= 0x7FF) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(in, 2); + // t1 = [000a|aaaa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); + const uint8x16_t utf8_unpacked = + vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } + const uint16x8_t surrogates_bytemask = + vceqq_u16(vandq_u16(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (vmaxvq_u16(surrogates_bytemask) == 0) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t dup_even = simdutf_make_uint16x8_t( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +#else + const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; +#endif + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const uint16x8_t t0 = vreinterpretq_u16_u8( + vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + const uint16x8_t s0 = vshrq_n_u16(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + const uint16x8_t s1s = vshlq_n_u16(s1, 2); + // [00bb|bbbb|0000|aaaa] + const uint16x8_t s2 = vorrq_u16(s0, s1s); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); + const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); + const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); + const uint16x8_t m0 = + vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); + const uint16x8_t s4 = veorq_u16(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); + const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t onemask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); + const uint16x8_t twomask = simdutf_make_uint16x8_t( + 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); +#else + const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0100, 0x0400, 0x1000, 0x4000}; + const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, + 0x0200, 0x0800, 0x2000, 0x8000}; +#endif + const uint16x8_t combined = + vorrq_u16(vandq_u16(one_byte_bytemask, onemask), + vandq_u16(one_or_two_bytes_bytemask, twomask)); + const uint16_t mask = vaddvq_u16(combined); + // The following fast path may or may not be beneficial. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); + vst1q_u8(utf8_output, utf8_0); + utf8_output += 12; + vst1q_u8(utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + + vst1q_u8(utf8_output, utf8_0); + utf8_output += row0[0]; + vst1q_u8(utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} +/* end file src/arm64/arm_convert_utf16_to_utf8.cpp */ + +/* begin file src/arm64/arm_base64.cpp */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ + +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + // credit: Wojciech Muła + uint8_t *out = (uint8_t *)dst; + constexpr static uint8_t source_table[64] = { + 'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D', + 'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W', + 'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p', + '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8', + 'N', 'd', 't', '9', 'O', 'e', 'u', '+', 'P', 'f', 'v', '/', + }; + constexpr static uint8_t source_table_url[64] = { + 'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D', + 'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W', + 'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p', + '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8', + 'N', 'd', 't', '9', 'O', 'e', 'u', '-', 'P', 'f', 'v', '_', + }; + const uint8x16_t v3f = vdupq_n_u8(0x3f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + // When trying to load a uint8_t array, Visual Studio might + // error with: error C2664: '__n128x4 neon_ld4m_q8(const char *)': + // cannot convert argument 1 from 'const uint8_t [64]' to 'const char * + const uint8x16x4_t table = vld4q_u8( + (reinterpret_cast(options & base64_url) ? source_table_url + : source_table)); +#else + const uint8x16x4_t table = + vld4q_u8((options & base64_url) ? source_table_url : source_table); +#endif + size_t i = 0; + for (; i + 16 * 3 <= srclen; i += 16 * 3) { + const uint8x16x3_t in = vld3q_u8((const uint8_t *)src + i); + uint8x16x4_t result; + result.val[0] = vshrq_n_u8(in.val[0], 2); + result.val[1] = + vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[1], 4), in.val[0], 4), v3f); + result.val[2] = + vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[2], 6), in.val[1], 2), v3f); + result.val[3] = vandq_u8(in.val[2], v3f); + result.val[0] = vqtbl4q_u8(table, result.val[0]); + result.val[1] = vqtbl4q_u8(table, result.val[1]); + result.val[2] = vqtbl4q_u8(table, result.val[2]); + result.val[3] = vqtbl4q_u8(table, result.val[3]); + vst4q_u8(out, result); + out += 64; + } + out += scalar::base64::tail_encode_base64((char *)out, src + i, srclen - i, + options); + + return size_t((char *)out - dst); +} + +static inline void compress(uint8x16_t data, uint16_t mask, char *output) { + if (mask == 0) { + vst1q_u8((uint8_t *)output, data); + return; + } + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint64x2_t compactmasku64 = {tables::base64::thintable_epi8[mask1], + tables::base64::thintable_epi8[mask2]}; + uint8x16_t compactmask = vreinterpretq_u8_u64(compactmasku64); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t off = + simdutf_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); +#else + const uint8x16_t off = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8}; +#endif + + compactmask = vaddq_u8(compactmask, off); + uint8x16_t pruned = vqtbl1q_u8(data, compactmask); + + int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + compactmask = vld1q_u8(tables::base64::pshufb_combine_table + pop1 * 8); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8((uint8_t *)output, answer); +} + +struct block64 { + uint8x16_t chunks[4]; +}; + +static_assert(sizeof(block64) == 64, "block64 is not 64 bytes"); +template uint64_t to_base64_mask(block64 *b, bool *error) { + uint8x16_t v0f = vdupq_n_u8(0xf); + + uint8x16_t underscore0, underscore1, underscore2, underscore3; + if (base64_url) { + underscore0 = vceqq_u8(b->chunks[0], vdupq_n_u8(0x5f)); + underscore1 = vceqq_u8(b->chunks[1], vdupq_n_u8(0x5f)); + underscore2 = vceqq_u8(b->chunks[2], vdupq_n_u8(0x5f)); + underscore3 = vceqq_u8(b->chunks[3], vdupq_n_u8(0x5f)); + } else { + (void)underscore0; + (void)underscore1; + (void)underscore2; + (void)underscore3; + } + + uint8x16_t lo_nibbles0 = vandq_u8(b->chunks[0], v0f); + uint8x16_t lo_nibbles1 = vandq_u8(b->chunks[1], v0f); + uint8x16_t lo_nibbles2 = vandq_u8(b->chunks[2], v0f); + uint8x16_t lo_nibbles3 = vandq_u8(b->chunks[3], v0f); + + // Needed by the decoding step. + uint8x16_t hi_nibbles0 = vshrq_n_u8(b->chunks[0], 4); + uint8x16_t hi_nibbles1 = vshrq_n_u8(b->chunks[1], 4); + uint8x16_t hi_nibbles2 = vshrq_n_u8(b->chunks[2], 4); + uint8x16_t hi_nibbles3 = vshrq_n_u8(b->chunks[3], 4); + uint8x16_t lut_lo; +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + if (base64_url) { + lut_lo = + simdutf_make_uint8x16_t(0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, + 0x70, 0x61, 0xe1, 0xf4, 0xe5, 0xa5, 0xf4, 0xf4); + } else { + lut_lo = + simdutf_make_uint8x16_t(0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, + 0x70, 0x61, 0xe1, 0xb4, 0xe5, 0xe5, 0xf4, 0xb4); + } +#else + if (base64_url) { + lut_lo = uint8x16_t{0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, + 0x70, 0x61, 0xe1, 0xf4, 0xe5, 0xa5, 0xf4, 0xf4}; + } else { + lut_lo = uint8x16_t{0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, + 0x70, 0x61, 0xe1, 0xb4, 0xe5, 0xe5, 0xf4, 0xb4}; + } +#endif + uint8x16_t lo0 = vqtbl1q_u8(lut_lo, lo_nibbles0); + uint8x16_t lo1 = vqtbl1q_u8(lut_lo, lo_nibbles1); + uint8x16_t lo2 = vqtbl1q_u8(lut_lo, lo_nibbles2); + uint8x16_t lo3 = vqtbl1q_u8(lut_lo, lo_nibbles3); + uint8x16_t lut_hi; +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + if (base64_url) { + lut_hi = + simdutf_make_uint8x16_t(0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20); + } else { + lut_hi = + simdutf_make_uint8x16_t(0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20); + } +#else + if (base64_url) { + lut_hi = uint8x16_t{0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}; + } else { + lut_hi = uint8x16_t{0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}; + } +#endif + uint8x16_t hi0 = vqtbl1q_u8(lut_hi, hi_nibbles0); + uint8x16_t hi1 = vqtbl1q_u8(lut_hi, hi_nibbles1); + uint8x16_t hi2 = vqtbl1q_u8(lut_hi, hi_nibbles2); + uint8x16_t hi3 = vqtbl1q_u8(lut_hi, hi_nibbles3); + + if (base64_url) { + hi0 = vbicq_u8(hi0, underscore0); + hi1 = vbicq_u8(hi1, underscore1); + hi2 = vbicq_u8(hi2, underscore2); + hi3 = vbicq_u8(hi3, underscore3); + } + + uint8_t checks = + vmaxvq_u8(vorrq_u8(vorrq_u8(vandq_u8(lo0, hi0), vandq_u8(lo1, hi1)), + vorrq_u8(vandq_u8(lo2, hi2), vandq_u8(lo3, hi3)))); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = + simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + uint64_t badcharmask = 0; + *error = checks > 0x3; + if (checks) { + // Add each of the elements next to each other, successively, to stuff each + // 8 byte mask into one. + uint8x16_t test0 = vtstq_u8(lo0, hi0); + uint8x16_t test1 = vtstq_u8(lo1, hi1); + uint8x16_t test2 = vtstq_u8(lo2, hi2); + uint8x16_t test3 = vtstq_u8(lo3, hi3); + uint8x16_t sum0 = + vpaddq_u8(vandq_u8(test0, bit_mask), vandq_u8(test1, bit_mask)); + uint8x16_t sum1 = + vpaddq_u8(vandq_u8(test2, bit_mask), vandq_u8(test3, bit_mask)); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + badcharmask = vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + // This is the transformation step that can be done while we are waiting for + // sum0 + uint8x16_t roll_lut; +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + if (base64_url) { + roll_lut = + simdutf_make_uint8x16_t(0xe0, 0x11, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0); + } else { + roll_lut = + simdutf_make_uint8x16_t(0x0, 0x10, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0); + } +#else + if (base64_url) { + roll_lut = uint8x16_t{0xe0, 0x11, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + } else { + roll_lut = uint8x16_t{0x0, 0x10, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + } +#endif + uint8x16_t vsecond_last = base64_url ? vdupq_n_u8(0x2d) : vdupq_n_u8(0x2f); + if (base64_url) { + hi_nibbles0 = vbicq_u8(hi_nibbles0, underscore0); + hi_nibbles1 = vbicq_u8(hi_nibbles1, underscore1); + hi_nibbles2 = vbicq_u8(hi_nibbles2, underscore2); + hi_nibbles3 = vbicq_u8(hi_nibbles3, underscore3); + } + uint8x16_t roll0 = vqtbl1q_u8( + roll_lut, vaddq_u8(vceqq_u8(b->chunks[0], vsecond_last), hi_nibbles0)); + uint8x16_t roll1 = vqtbl1q_u8( + roll_lut, vaddq_u8(vceqq_u8(b->chunks[1], vsecond_last), hi_nibbles1)); + uint8x16_t roll2 = vqtbl1q_u8( + roll_lut, vaddq_u8(vceqq_u8(b->chunks[2], vsecond_last), hi_nibbles2)); + uint8x16_t roll3 = vqtbl1q_u8( + roll_lut, vaddq_u8(vceqq_u8(b->chunks[3], vsecond_last), hi_nibbles3)); + b->chunks[0] = vaddq_u8(b->chunks[0], roll0); + b->chunks[1] = vaddq_u8(b->chunks[1], roll1); + b->chunks[2] = vaddq_u8(b->chunks[2], roll2); + b->chunks[3] = vaddq_u8(b->chunks[3], roll3); + return badcharmask; +} + +void copy_block(block64 *b, char *output) { + vst1q_u8((uint8_t *)output, b->chunks[0]); + vst1q_u8((uint8_t *)output + 16, b->chunks[1]); + vst1q_u8((uint8_t *)output + 32, b->chunks[2]); + vst1q_u8((uint8_t *)output + 48, b->chunks[3]); +} + +uint64_t compress_block(block64 *b, uint64_t mask, char *output) { + uint64_t popcounts = + vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + uint64_t offsets = popcounts * 0x0101010101010101; + compress(b->chunks[0], uint16_t(mask), output); + compress(b->chunks[1], uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF]); + compress(b->chunks[2], uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF]); + compress(b->chunks[3], uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF]); + return offsets >> 56; +} + +// The caller of this function is responsible to ensure that there are 64 bytes +// available from reading at src. The data is read into a block64 structure. +void load_block(block64 *b, const char *src) { + b->chunks[0] = vld1q_u8(reinterpret_cast(src)); + b->chunks[1] = vld1q_u8(reinterpret_cast(src) + 16); + b->chunks[2] = vld1q_u8(reinterpret_cast(src) + 32); + b->chunks[3] = vld1q_u8(reinterpret_cast(src) + 48); +} + +// The caller of this function is responsible to ensure that there are 32 bytes +// available from reading at data. It returns a 16-byte value, narrowing with +// saturation the 16-bit words. +inline uint8x16_t load_satured(const uint16_t *data) { + uint16x8_t in1 = vld1q_u16(data); + uint16x8_t in2 = vld1q_u16(data + 8); + return vqmovn_high_u16(vqmovn_u16(in1), in2); +} + +// The caller of this function is responsible to ensure that there are 128 bytes +// available from reading at src. The data is read into a block64 structure. +void load_block(block64 *b, const char16_t *src) { + b->chunks[0] = load_satured(reinterpret_cast(src)); + b->chunks[1] = load_satured(reinterpret_cast(src) + 16); + b->chunks[2] = load_satured(reinterpret_cast(src) + 32); + b->chunks[3] = load_satured(reinterpret_cast(src) + 48); +} + +// decode 64 bytes and output 48 bytes +void base64_decode_block(char *out, const char *src) { + uint8x16x4_t str = vld4q_u8((uint8_t *)src); + uint8x16x3_t outvec; + outvec.val[0] = + vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4)); + outvec.val[1] = + vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2)); + outvec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]); + vst3q_u8((uint8_t *)out, outvec); +} + +template +full_result +compress_decode_base64(char *dst, const char_type *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + size_t equallocation = + srclen; // location of the first padding character if any + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + size_t equalsigns = 0; + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 1; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 2; + } + } + if (srclen == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + const char_type *const srcinit = src; + const char *const dstinit = dst; + const char_type *const srcend = src + srclen; + + constexpr size_t block_size = 10; + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const char_type *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b; + load_block(&b, src); + src += 64; + bool error = false; + uint64_t badcharmask = to_base64_mask(&b, &error); + if (badcharmask) { + if (error) { + src -= 64; + while (src < srcend && scalar::base64::is_eight_byte(*src) && + to_base64[uint8_t(*src)] <= 64) { + src++; + } + if (src < srcend) { + // should never happen + } + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + } + + if (badcharmask != 0) { + // optimization opportunity: check for simple masks like those made of + // continuous 1s followed by continuous 0s. And masks containing a + // single bad character. + bufferptr += compress_block(&b, badcharmask, bufferptr); + } else { + // optimization opportunity: if bufferptr == buffer and mask == 0, we + // can avoid the call to compress_block and decode directly. + copy_block(&b, bufferptr); + bufferptr += 64; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 1); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if (!scalar::base64::is_eight_byte(*src) || val > 64) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } + + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + base64_decode_block(dst, buffer_start); + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + triple = scalar::utf32::swap_bytes(triple); + std::memcpy(dst, &triple, 4); + + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + triple = scalar::utf32::swap_bytes(triple); + std::memcpy(dst, &triple, 3); + + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r.input_count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + return r; + } else { + r.output_count += size_t(dst - dstinit); + } + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + r.error = error_code::INVALID_BASE64_CHARACTER; + r.input_count = equallocation; + } + } + return r; + } + if (equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; +} +/* end file src/arm64/arm_base64.cpp */ +/* begin file src/arm64/arm_convert_utf32_to_latin1.cpp */ +std::pair +arm_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + while (end - buf >= 8) { + uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf)); + uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf + 4)); + + uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2)); + if (vmaxvq_u16(utf16_packed) <= 0xff) { + // 1. pack the bytes + uint8x8_t latin1_packed = vmovn_u16(utf16_packed); + // 2. store (8 bytes) + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} + +std::pair +arm_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; + + while (end - buf >= 8) { + uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf)); + uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf + 4)); + + uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2)); + + if (vmaxvq_u16(utf16_packed) <= 0xff) { + // 1. pack the bytes + uint8x8_t latin1_packed = vmovn_u16(utf16_packed); + // 2. store (8 bytes) + vst1_u8(reinterpret_cast(latin1_output), latin1_packed); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + // Let us do a scalar fallback. + for (int k = 0; k < 8; k++) { + uint32_t word = buf[k]; + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); +} +/* end file src/arm64/arm_convert_utf32_to_latin1.cpp */ +/* begin file src/arm64/arm_convert_utf32_to_utf16.cpp */ +template +std::pair +arm_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *end = buf + len; + + uint16x4_t forbidden_bytemask = vmov_n_u16(0x0); + + while (end - buf >= 4) { + uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); + + // Check if no bits set above 16th + if (vmaxvq_u32(in) <= 0xFFFF) { + uint16x4_t utf16_packed = vmovn_u32(in); + + const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800); + const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff); + forbidden_bytemask = vorr_u16(vand_u16(vcle_u16(utf16_packed, v_dfff), + vcge_u16(utf16_packed, v_d800)), + forbidden_bytemask); + + if (!match_system(big_endian)) { + utf16_packed = + vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(utf16_packed))); + } + vst1_u16(utf16_output, utf16_packed); + utf16_output += 4; + buf += 4; + } else { + size_t forward = 3; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + // check for invalid input + if (vmaxv_u16(forbidden_bytemask) != 0) { + return std::make_pair(nullptr, reinterpret_cast(utf16_output)); + } + + return std::make_pair(buf, reinterpret_cast(utf16_output)); +} + +template +std::pair +arm_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *start = buf; + const char32_t *end = buf + len; + + while (end - buf >= 4) { + uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); + + // Check if no bits set above 16th + if (vmaxvq_u32(in) <= 0xFFFF) { + uint16x4_t utf16_packed = vmovn_u32(in); + + const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800); + const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff); + const uint16x4_t forbidden_bytemask = vand_u16( + vcle_u16(utf16_packed, v_dfff), vcge_u16(utf16_packed, v_d800)); + if (vmaxv_u16(forbidden_bytemask) != 0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf16_output)); + } + + if (!match_system(big_endian)) { + utf16_packed = + vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(utf16_packed))); + } + vst1_u16(utf16_output, utf16_packed); + utf16_output += 4; + buf += 4; + } else { + size_t forward = 3; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } + } + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf16_output)); +} +/* end file src/arm64/arm_convert_utf32_to_utf16.cpp */ +/* begin file src/arm64/arm_convert_utf32_to_utf8.cpp */ +std::pair +arm_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *end = buf + len; + + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + + uint16x8_t forbidden_bytemask = vmovq_n_u16(0x0); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (buf + 16 + safety_margin < end) { + uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); + uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf + 4)); + + // Check if no bits set above 16th + if (vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp) + uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin)); + if (vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + uint8x8_t utf8_packed = vmovn_u16(utf16_packed); + // 2. store (8 bytes) + vst1_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + continue; // we are done for this round! + } + + if (vmaxvq_u16(utf16_packed) <= 0x7FF) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); + const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16( + vbslq_u16(one_byte_bytemask, utf16_packed, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff); + forbidden_bytemask = + vorrq_u16(vandq_u16(vcleq_u16(utf16_packed, v_dfff), + vcgeq_u16(utf16_packed, v_d800)), + forbidden_bytemask); + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t dup_even = simdutf_make_uint16x8_t( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +#else + const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; +#endif + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const uint16x8_t t0 = + vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), + vreinterpretq_u8_u16(dup_even))); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + const uint16x8_t s1 = + vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + const uint16x8_t s1s = vshlq_n_u16(s1, 2); + // [00bb|bbbb|0000|aaaa] + const uint16x8_t s2 = vorrq_u16(s0, s1s); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); + const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); + const uint16x8_t one_or_two_bytes_bytemask = + vcleq_u16(utf16_packed, v_07ff); + const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), + one_or_two_bytes_bytemask); + const uint16x8_t s4 = veorq_u16(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); + const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t onemask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); + const uint16x8_t twomask = simdutf_make_uint16x8_t( + 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); +#else + const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0100, 0x0400, 0x1000, 0x4000}; + const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, + 0x0200, 0x0800, 0x2000, 0x8000}; +#endif + const uint16x8_t combined = + vorrq_u16(vandq_u16(one_byte_bytemask, onemask), + vandq_u16(one_or_two_bytes_bytemask, twomask)); + const uint16_t mask = vaddvq_u16(combined); + // The following fast path may or may not be beneficial. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); + vst1q_u8(utf8_output, utf8_0); + utf8_output += 12; + vst1q_u8(utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + + vst1q_u8(utf8_output, utf8_0); + utf8_output += row0[0]; + vst1q_u8(utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + // check for invalid input + if (vmaxvq_u16(forbidden_bytemask) != 0) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + return std::make_pair(buf, reinterpret_cast(utf8_output)); +} + +std::pair +arm_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *start = buf; + const char32_t *end = buf + len; + + const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (buf + 16 + safety_margin < end) { + uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); + uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf + 4)); + + // Check if no bits set above 16th + if (vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp) + uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin)); + if (vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + uint8x8_t utf8_packed = vmovn_u16(utf16_packed); + // 2. store (8 bytes) + vst1_u8(utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + continue; // we are done for this round! + } + + if (vmaxvq_u16(utf16_packed) <= 0x7FF) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); + const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + + // t0 = [000a|aaaa|bbbb|bb00] + const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const uint16x8_t t1 = vandq_u16(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const uint16x8_t t3 = vorrq_u16(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const uint16x8_t t4 = vorrq_u16(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); + const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16( + vbslq_u16(one_byte_bytemask, utf16_packed, t4)); + // 3. prepare bitmask for 8-bit lookup +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t mask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +#else + const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0002, 0x0008, 0x0020, 0x0080}; +#endif + uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const uint8x16_t shuffle = vld1q_u8(row + 1); + const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + + // 5. store bytes + vst1q_u8(utf8_output, utf8_packed); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + + // check for invalid input + const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff); + const uint16x8_t forbidden_bytemask = vandq_u16( + vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800)); + if (vmaxvq_u16(forbidden_bytemask) != 0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf8_output)); + } + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t dup_even = simdutf_make_uint16x8_t( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +#else + const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; +#endif + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const uint16x8_t t0 = + vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), + vreinterpretq_u8_u16(dup_even))); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + const uint16x8_t s1 = + vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + const uint16x8_t s1s = vshlq_n_u16(s1, 2); + // [00bb|bbbb|0000|aaaa] + const uint16x8_t s2 = vorrq_u16(s0, s1s); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); + const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); + const uint16x8_t one_or_two_bytes_bytemask = + vcleq_u16(utf16_packed, v_07ff); + const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), + one_or_two_bytes_bytemask); + const uint16x8_t s4 = veorq_u16(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); + const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); + const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + const uint16x8_t onemask = simdutf_make_uint16x8_t( + 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); + const uint16x8_t twomask = simdutf_make_uint16x8_t( + 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); +#else + const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, + 0x0100, 0x0400, 0x1000, 0x4000}; + const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, + 0x0200, 0x0800, 0x2000, 0x8000}; +#endif + const uint16x8_t combined = + vorrq_u16(vandq_u16(one_byte_bytemask, onemask), + vandq_u16(one_or_two_bytes_bytemask, twomask)); + const uint16_t mask = vaddvq_u16(combined); + // The following fast path may or may not be beneficial. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); + vst1q_u8(utf8_output, utf8_0); + utf8_output += 12; + vst1q_u8(utf8_output, utf8_1); + utf8_output += 12; + buf += 8; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); + const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); + const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + + vst1q_u8(utf8_output, utf8_0); + utf8_output += row0[0]; + vst1q_u8(utf8_output, utf8_1); + utf8_output += row1[0]; + + buf += 8; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} +/* end file src/arm64/arm_convert_utf32_to_utf8.cpp */ + +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace arm64 { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); + +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; + } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} + +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; +} + +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_validation { + +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); +} + +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; + } + + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; + } + } + + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_validation { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); +} + +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); +} + +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; + } + reader.advance(); + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); + } +} + +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); +} + +template +bool generic_validate_ascii(const uint8_t *input, size_t length) { + buf_block_reader<64> reader(input, length); + uint8_t blocks[64]{}; + simd::simd8x64 running_or(blocks); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + running_or |= in; + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + running_or |= in; + return running_or.is_ascii(); +} + +bool generic_validate_ascii(const char *input, size_t length) { + return generic_validate_ascii( + reinterpret_cast(input), length); +} + +template +result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); + + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } +} + +result generic_validate_ascii_with_errors(const char *input, size_t length) { + return generic_validate_ascii_with_errors( + reinterpret_cast(input), length); +} + +} // namespace utf8_validation +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +// transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ + +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_utf16 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; + } + utf16_output += howmany; + } + return utf16_output - start; + } + + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; + } + } + return result(error_code::SUCCESS, utf16_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ + +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_utf16 { + +using namespace simd; + +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; +} + +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +// transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ + +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_utf32 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; + } + return utf32_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; + } + } + return result(error_code::SUCCESS, utf32_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ + +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_utf32 { + +using namespace simd; + +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + } + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; +} + +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +// other functions +/* begin file src/generic/utf16.h */ +namespace simdutf { +namespace arm64 { +namespace { +namespace utf16 { + +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); + count += count_ones(not_pair) / 2; + } + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} + +template +simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t ascii_mask = input.lteq(0x7F); + uint64_t twobyte_mask = input.lteq(0x7FF); + uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); + + size_t ascii_count = count_ones(ascii_mask) / 2; + size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; + size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; + size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; + count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + + ascii_count; + } + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} + +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} + +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; + + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } + + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf16.h */ +/* begin file src/generic/utf8.h */ + +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8 { + +using namespace simd; + +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); + } + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} + +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} +} // namespace utf8 +} // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8.h */ +// transcoding from UTF-8 to Latin 1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ + +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, + + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, + + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} + +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + } + + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; + } + return latin1_output - start; + } + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); + } + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_latin1 } // unnamed namespace +} // namespace arm64 +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ + +namespace simdutf { +namespace arm64 { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; +} + +} // namespace utf8_to_latin1 +} // namespace +} // namespace arm64 +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ + +// placeholder scalars + +// +// Implementation-specific overrides +// +namespace simdutf { +namespace arm64 { + +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + // todo: reimplement as a one-pass algorithm. + int out = 0; + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; + } + } + return out; +} + +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return arm64::utf8_validation::generic_validate_utf8(buf, len); +} + +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return arm64::utf8_validation::generic_validate_utf8_with_errors(buf, len); +} + +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return arm64::utf8_validation::generic_validate_ascii(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return arm64::utf8_validation::generic_validate_ascii_with_errors(buf, len); +} + +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char16_t *tail = arm_validate_utf16(buf, len); + if (tail) { + return scalar::utf16::validate(tail, + len - (tail - buf)); + } else { + return false; + } +} + +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char16_t *tail = arm_validate_utf16(buf, len); + if (tail) { + return scalar::utf16::validate(tail, len - (tail - buf)); + } else { + return false; + } +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = arm_validate_utf16_with_errors(buf, len); + if (res.count != len) { + result scalar_res = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = arm_validate_utf16_with_errors(buf, len); + if (res.count != len) { + result scalar_res = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char32_t *tail = arm_validate_utf32le(buf, len); + if (tail) { + return scalar::utf32::validate(tail, len - (tail - buf)); + } else { + return false; + } +} + +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = arm_validate_utf32le_with_errors(buf, len); + if (res.count != len) { + result scalar_res = + scalar::utf32::validate_with_errors(buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + arm_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; + + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + arm_convert_latin1_to_utf16(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + arm_convert_latin1_to_utf16(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + arm_convert_latin1_to_utf32(buf, len, utf32_output); + size_t converted_chars = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return arm64::utf8_to_latin1::convert_valid(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16le_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return 0; + } + std::pair ret = + arm_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + arm_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = scalar::utf32_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf32_to_utf8(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + arm_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + arm_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16le_to_utf32(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16be_to_utf32(buf, len, utf32_output); +} + +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + utf16::change_endianness_utf16(input, length, output); +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} + +simdutf_warn_unused size_t +implementation::latin1_length_from_utf16(size_t length) const noexcept { + return scalar::utf16::latin1_length_from_utf16(length); +} + +simdutf_warn_unused size_t +implementation::latin1_length_from_utf32(size_t length) const noexcept { + return scalar::utf32::latin1_length_from_utf32(length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + // See + // https://lemire.me/blog/2023/05/15/computing-the-utf-8-size-of-a-latin-1-string-quickly-arm-neon-edition/ + // credit to Pete Cawley + const uint8_t *data = reinterpret_cast(input); + uint64_t result = 0; + const int lanes = sizeof(uint8x16_t); + uint8_t rem = length % lanes; + const uint8_t *simd_end = data + (length / lanes) * lanes; + const uint8x16_t threshold = vdupq_n_u8(0x80); + for (; data < simd_end; data += lanes) { + // load 16 bytes + uint8x16_t input_vec = vld1q_u8(data); + // compare to threshold (0x80) + uint8x16_t withhighbit = vcgeq_u8(input_vec, threshold); + // vertical addition + result -= vaddvq_s8(vreinterpretq_s8_u8(withhighbit)); + } + return result + (length / lanes) * lanes + + scalar::latin1::utf8_length_from_latin1((const char *)simd_end, rem); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t +implementation::utf16_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf16_length_from_latin1(length); +} + +simdutf_warn_unused size_t +implementation::utf32_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf32_length_from_latin1(length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::utf16_length_from_utf8(input, length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const uint32x4_t v_7f = vmovq_n_u32((uint32_t)0x7f); + const uint32x4_t v_7ff = vmovq_n_u32((uint32_t)0x7ff); + const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff); + const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1); + size_t pos = 0; + size_t count = 0; + for (; pos + 4 <= length; pos += 4) { + uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos)); + const uint32x4_t ascii_bytes_bytemask = vcleq_u32(in, v_7f); + const uint32x4_t one_two_bytes_bytemask = vcleq_u32(in, v_7ff); + const uint32x4_t two_bytes_bytemask = + veorq_u32(one_two_bytes_bytemask, ascii_bytes_bytemask); + const uint32x4_t three_bytes_bytemask = + veorq_u32(vcleq_u32(in, v_ffff), one_two_bytes_bytemask); + + const uint16x8_t reduced_ascii_bytes_bytemask = + vreinterpretq_u16_u32(vandq_u32(ascii_bytes_bytemask, v_1)); + const uint16x8_t reduced_two_bytes_bytemask = + vreinterpretq_u16_u32(vandq_u32(two_bytes_bytemask, v_1)); + const uint16x8_t reduced_three_bytes_bytemask = + vreinterpretq_u16_u32(vandq_u32(three_bytes_bytemask, v_1)); + + const uint16x8_t compressed_bytemask0 = + vpaddq_u16(reduced_ascii_bytes_bytemask, reduced_two_bytes_bytemask); + const uint16x8_t compressed_bytemask1 = + vpaddq_u16(reduced_three_bytes_bytemask, reduced_three_bytes_bytemask); + + size_t ascii_count = count_ones( + vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 0)); + size_t two_bytes_count = count_ones( + vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 1)); + size_t three_bytes_count = count_ones( + vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask1), 0)); + + count += 16 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; + } + return count + + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); +} + +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff); + const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1); + size_t pos = 0; + size_t count = 0; + for (; pos + 4 <= length; pos += 4) { + uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos)); + const uint32x4_t surrogate_bytemask = vcgtq_u32(in, v_ffff); + const uint16x8_t reduced_bytemask = + vreinterpretq_u16_u32(vandq_u32(surrogate_bytemask, v_1)); + const uint16x8_t compressed_bytemask = + vpaddq_u16(reduced_bytemask, reduced_bytemask); + size_t surrogate_count = count_ones( + vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask), 0)); + count += 4 + surrogate_count; + } + return count + + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + +simdutf_warn_unused size_t implementation::base64_length_from_binary( + size_t length, base64_options options) const noexcept { + return scalar::base64::base64_length_from_binary(length, options); +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + return encode_base64(output, input, length, options); +} + +} // namespace arm64 +} // namespace simdutf + +/* begin file src/simdutf/arm64/end.h */ +/* end file src/simdutf/arm64/end.h */ +/* end file src/arm64/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_FALLBACK +/* begin file src/fallback/implementation.cpp */ +/* begin file src/simdutf/fallback/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "fallback" +// #define SIMDUTF_IMPLEMENTATION fallback +/* end file src/simdutf/fallback/begin.h */ + + + + + + + + +#include +#include + +namespace simdutf { +namespace fallback { + +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + // todo: reimplement as a one-pass algorithm. + int out = 0; + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; + } + } + return out; +} + +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return scalar::utf8::validate(buf, len); +} + +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return scalar::utf8::validate_with_errors(buf, len); +} + +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return scalar::ascii::validate(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return scalar::ascii::validate_with_errors(buf, len); +} + +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + return scalar::utf16::validate(buf, len); +} + +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + return scalar::utf16::validate(buf, len); +} + +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + return scalar::utf16::validate_with_errors(buf, len); +} + +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + return scalar::utf16::validate_with_errors(buf, len); +} + +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + return scalar::utf32::validate(buf, len); +} + +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + return scalar::utf32::validate_with_errors(buf, len); +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + return scalar::latin1_to_utf8::convert(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::latin1_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::latin1_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::latin1_to_utf32::convert(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf8_to_latin1::convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf8_to_latin1::convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf8_to_latin1::convert_valid(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert_valid(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf8_to_utf16::convert_valid(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf8_to_utf32::convert(buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf8_to_utf32::convert_with_errors(buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return scalar::utf8_to_utf32::convert_valid(input, size, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert(buf, len, + latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert(buf, len, + latin1_output); +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert_with_errors( + buf, len, latin1_output); +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert_with_errors( + buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert_valid( + buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf16_to_latin1::convert_valid(buf, len, + latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert(buf, len, + utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert(buf, len, utf8_output); +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_with_errors( + buf, len, utf8_output); +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_with_errors( + buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_valid(buf, len, + utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_valid(buf, len, + utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf32_to_latin1::convert(buf, len, latin1_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf32_to_latin1::convert_with_errors(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return scalar::utf32_to_latin1::convert_valid(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf32_to_utf8::convert(buf, len, utf8_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert(buf, len, + utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_with_errors( + buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_valid( + buf, len, utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_valid(buf, len, + utf16_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert(buf, len, + utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert(buf, len, + utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_with_errors( + buf, len, utf32_output); +} + +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_with_errors( + buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_valid( + buf, len, utf32_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_valid(buf, len, + utf32_output); +} + +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + scalar::utf16::change_endianness_utf16(input, length, output); +} + +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::count_code_points(input, length); +} + +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return scalar::utf8::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return scalar::utf8::count_code_points(buf, len); +} + +simdutf_warn_unused size_t +implementation::latin1_length_from_utf16(size_t length) const noexcept { + return scalar::utf16::latin1_length_from_utf16(length); +} + +simdutf_warn_unused size_t +implementation::latin1_length_from_utf32(size_t length) const noexcept { + return length; +} + +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + size_t answer = length; + size_t i = 0; + auto pop = [](uint64_t v) { + return (size_t)(((v >> 7) & UINT64_C(0x0101010101010101)) * + UINT64_C(0x0101010101010101) >> + 56); + }; + for (; i + 32 <= length; i += 32) { + uint64_t v; + memcpy(&v, input + i, 8); + answer += pop(v); + memcpy(&v, input + i + 8, sizeof(v)); + answer += pop(v); + memcpy(&v, input + i + 16, sizeof(v)); + answer += pop(v); + memcpy(&v, input + i + 24, sizeof(v)); + answer += pop(v); + } + for (; i + 8 <= length; i += 8) { + uint64_t v; + memcpy(&v, input + i, sizeof(v)); + answer += pop(v); + } + for (; i + 1 <= length; i += 1) { + answer += static_cast(input[i]) >> 7; + } + return answer; +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16(input, + length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf32_length_from_utf16(input, + length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t +implementation::utf16_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf16_length_from_latin1(length); +} + +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return scalar::utf8::utf16_length_from_utf8(input, length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return scalar::utf32::utf8_length_from_utf32(input, length); +} + +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return scalar::utf32::utf16_length_from_utf32(input, length); +} + +simdutf_warn_unused size_t +implementation::utf32_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf32_length_from_latin1(length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return scalar::utf8::count_code_points(input, length); +} + +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + return {SUCCESS, 0}; + } + result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + } + return r; +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + return r; +} + +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} + +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + return {SUCCESS, 0}; + } + result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + } + return r; +} + +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + return r; +} + +simdutf_warn_unused size_t implementation::base64_length_from_binary( + size_t length, base64_options options) const noexcept { + return scalar::base64::base64_length_from_binary(length, options); +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + return scalar::base64::tail_encode_base64(output, input, length, options); +} +} // namespace fallback } // namespace simdutf -#endif // SIMDUTF_UTF8_TO_UTF16_TABLES_H -/* end file src/tables/utf8_to_utf16_tables.h */ -/* begin file src/tables/utf16_to_utf8_tables.h */ -// file generated by scripts/sse_convert_utf16_to_utf8.py -#ifndef SIMDUTF_UTF16_TO_UTF8_TABLES_H -#define SIMDUTF_UTF16_TO_UTF8_TABLES_H +/* begin file src/simdutf/fallback/end.h */ +/* end file src/simdutf/fallback/end.h */ +/* end file src/fallback/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_ICELAKE +/* begin file src/icelake/implementation.cpp */ + + +/* begin file src/simdutf/icelake/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "icelake" +// #define SIMDUTF_IMPLEMENTATION icelake + +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else +SIMDUTF_TARGET_ICELAKE +#endif + +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +// clang-format off +SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) +// clang-format on +#endif // end of workaround +/* end file src/simdutf/icelake/begin.h */ +namespace simdutf { +namespace icelake { +namespace { +#ifndef SIMDUTF_ICELAKE_H + #error "icelake.h must be included" +#endif +/* begin file src/icelake/icelake_utf8_common.inl.cpp */ +// Common procedures for both validating and non-validating conversions from +// UTF-8. +enum block_processing_mode { SIMDUTF_FULL, SIMDUTF_TAIL }; + +using utf8_to_utf16_result = std::pair; +using utf8_to_utf32_result = std::pair; + +/* + process_block_utf8_to_utf16 converts up to 64 bytes from 'in' from UTF-8 + to UTF-16. When tail = SIMDUTF_FULL, then the full input buffer (64 bytes) + might be used. When tail = SIMDUTF_TAIL, we take into account 'gap' which + indicates how many input bytes are relevant. + + Returns true when the result is correct, otherwise it returns false. + + The provided in and out pointers are advanced according to how many input + bytes have been processed, upon success. +*/ +template +simdutf_really_inline bool +process_block_utf8_to_utf16(const char *&in, char16_t *&out, size_t gap) { + // constants + __m512i mask_identity = _mm512_set_epi8( + 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, + 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, + 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1, 0); + __m512i mask_c0c0c0c0 = _mm512_set1_epi32(0xc0c0c0c0); + __m512i mask_80808080 = _mm512_set1_epi32(0x80808080); + __m512i mask_f0f0f0f0 = _mm512_set1_epi32(0xf0f0f0f0); + __m512i mask_dfdfdfdf_tail = _mm512_set_epi64( + 0xffffdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, + 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, + 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf); + __m512i mask_c2c2c2c2 = _mm512_set1_epi32(0xc2c2c2c2); + __m512i mask_ffffffff = _mm512_set1_epi32(0xffffffff); + __m512i mask_d7c0d7c0 = _mm512_set1_epi32(0xd7c0d7c0); + __m512i mask_dc00dc00 = _mm512_set1_epi32(0xdc00dc00); + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + // Note that 'tail' is a compile-time constant ! + __mmask64 b = + (tail == SIMDUTF_FULL) ? 0xFFFFFFFFFFFFFFFF : (uint64_t(1) << gap) - 1; + __m512i input = (tail == SIMDUTF_FULL) ? _mm512_loadu_si512(in) + : _mm512_maskz_loadu_epi8(b, in); + __mmask64 m1 = (tail == SIMDUTF_FULL) + ? _mm512_cmplt_epu8_mask(input, mask_80808080) + : _mm512_mask_cmplt_epu8_mask(b, input, mask_80808080); + if (_ktestc_mask64_u8(m1, + b)) { // NOT(m1) AND b -- if all zeroes, then all ASCII + // alternatively, we could do 'if (m1 == b) { ' + if (tail == SIMDUTF_FULL) { + in += 64; // consumed 64 bytes + // we convert a full 64-byte block, writing 128 bytes. + __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); + if (big_endian) { + input1 = _mm512_shuffle_epi8(input1, byteflip); + } + _mm512_storeu_si512(out, input1); + out += 32; + __m512i input2 = + _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1)); + if (big_endian) { + input2 = _mm512_shuffle_epi8(input2, byteflip); + } + _mm512_storeu_si512(out, input2); + out += 32; + return true; // we are done + } else { + in += gap; + if (gap <= 32) { + __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); + if (big_endian) { + input1 = _mm512_shuffle_epi8(input1, byteflip); + } + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << (gap)) - 1), + input1); + out += gap; + } else { + __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); + if (big_endian) { + input1 = _mm512_shuffle_epi8(input1, byteflip); + } + _mm512_storeu_si512(out, input1); + out += 32; + __m512i input2 = + _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1)); + if (big_endian) { + input2 = _mm512_shuffle_epi8(input2, byteflip); + } + _mm512_mask_storeu_epi16( + out, __mmask32((uint32_t(1) << (gap - 32)) - 1), input2); + out += gap - 32; + } + return true; // we are done + } + } + // classify characters further + __mmask64 m234 = _mm512_cmp_epu8_mask( + mask_c0c0c0c0, input, + _MM_CMPINT_LE); // 0xc0 <= input, 2, 3, or 4 leading byte + __mmask64 m34 = + _mm512_cmp_epu8_mask(mask_dfdfdfdf_tail, input, + _MM_CMPINT_LT); // 0xdf < input, 3 or 4 leading byte + + __mmask64 milltwobytes = _mm512_mask_cmp_epu8_mask( + m234, input, mask_c2c2c2c2, + _MM_CMPINT_LT); // 0xc0 <= input < 0xc2 (illegal two byte sequence) + // Overlong 2-byte sequence + if (_ktestz_mask64_u8(milltwobytes, milltwobytes) == 0) { + // Overlong 2-byte sequence + return false; + } + if (_ktestz_mask64_u8(m34, m34) == 0) { + // We have a 3-byte sequence and/or a 2-byte sequence, or possibly even a + // 4-byte sequence! + __mmask64 m4 = _mm512_cmp_epu8_mask( + input, mask_f0f0f0f0, + _MM_CMPINT_NLT); // 0xf0 <= zmm0 (4 byte start bytes) + + __mmask64 mask_not_ascii = (tail == SIMDUTF_FULL) + ? _knot_mask64(m1) + : _kand_mask64(_knot_mask64(m1), b); + + __mmask64 mp1 = _kshiftli_mask64(m234, 1); + __mmask64 mp2 = _kshiftli_mask64(m34, 2); + // We could do it as follows... + // if (_kortestz_mask64_u8(m4,m4)) { // compute the bitwise OR of the 64-bit + // masks a and b and return 1 if all zeroes but GCC generates better code + // when we do: + if (m4 == 0) { // compute the bitwise OR of the 64-bit masks a and b and + // return 1 if all zeroes + // Fast path with 1,2,3 bytes + __mmask64 mc = _kor_mask64(mp1, mp2); // expected continuation bytes + __mmask64 m1234 = _kor_mask64(m1, m234); + // mismatched continuation bytes: + if (tail == SIMDUTF_FULL) { + __mmask64 xnormcm1234 = _kxnor_mask64( + mc, + m1234); // XNOR of mc and m1234 should be all zero if they differ + // the presence of a 1 bit indicates that they overlap. + // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return + // 1 if all zeroes. + if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { + return false; + } + } else { + __mmask64 bxorm1234 = _kxor_mask64(b, m1234); + if (mc != bxorm1234) { + return false; + } + } + // mend: identifying the last bytes of each sequence to be decoded + __mmask64 mend = _kshiftri_mask64(m1234, 1); + if (tail != SIMDUTF_FULL) { + mend = _kor_mask64(mend, (uint64_t(1) << (gap - 1))); + } + + __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity); + __m512i last_and_thirdu16 = + _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third)); + + __m512i nonasciitags = _mm512_maskz_mov_epi8( + mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000 other: 11000000 + __m512i clearedbytes = _mm512_andnot_si512( + nonasciitags, input); // high two bits cleared where not ASCII + __m512i lastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, last_and_thirdu16, + clearedbytes); // the last byte of each character + + __mmask64 mask_before_non_ascii = _kshiftri_mask64( + mask_not_ascii, 1); // bytes that precede non-ASCII bytes + __m512i indexofsecondlastbytes = _mm512_add_epi16( + mask_ffffffff, last_and_thirdu16); // indices of the second last bytes + __m512i beforeasciibytes = + _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes); + __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, indexofsecondlastbytes, + beforeasciibytes); // the second last bytes (of two, three byte seq, + // surrogates) + secondlastbytes = + _mm512_slli_epi16(secondlastbytes, 6); // shifted into position + + __m512i indexofthirdlastbytes = _mm512_add_epi16( + mask_ffffffff, + indexofsecondlastbytes); // indices of the second last bytes + __m512i thirdlastbyte = + _mm512_maskz_mov_epi8(m34, + clearedbytes); // only those that are the third + // last byte of a sequence + __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, indexofthirdlastbytes, + thirdlastbyte); // the third last bytes (of three byte sequences, hi + // surrogate) + thirdlastbytes = + _mm512_slli_epi16(thirdlastbytes, 12); // shifted into position + __m512i Wout = _mm512_ternarylogic_epi32(lastbytes, secondlastbytes, + thirdlastbytes, 254); + // the elements of Wout excluding the last element if it happens to be a + // high surrogate: + + __mmask64 mprocessed = + (tail == SIMDUTF_FULL) + ? _pdep_u64(0xFFFFFFFF, mend) + : _pdep_u64( + 0xFFFFFFFF, + _kand_mask64( + mend, b)); // we adjust mend at the end of the output. + + // Encodings out of range... + { + // the location of 3-byte sequence start bytes in the input + __mmask64 m3 = m34 & (b ^ m4); + // code units in Wout corresponding to 3-byte sequences. + __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend)); + __m512i mask_08000800 = _mm512_set1_epi32(0x08000800); + __mmask32 Msmall800 = + _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800); + __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800); + __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800); + __mmask32 M3s = + _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800); + if (_kor_mask32(Msmall800, M3s)) { + return false; + } + } + int64_t nout = _mm_popcnt_u64(mprocessed); + in += 64 - _lzcnt_u64(mprocessed); + if (big_endian) { + Wout = _mm512_shuffle_epi8(Wout, byteflip); + } + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout); + out += nout; + return true; // ok + } + // + // We have a 4-byte sequence, this is the general case. + // Slow! + __mmask64 mp3 = _kshiftli_mask64(m4, 3); + __mmask64 mc = + _kor_mask64(_kor_mask64(mp1, mp2), mp3); // expected continuation bytes + __mmask64 m1234 = _kor_mask64(m1, m234); + + // mend: identifying the last bytes of each sequence to be decoded + __mmask64 mend = + _kor_mask64(_kshiftri_mask64(_kor_mask64(mp3, m1234), 1), mp3); + if (tail != SIMDUTF_FULL) { + mend = _kor_mask64(mend, __mmask64(uint64_t(1) << (gap - 1))); + } + __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity); + __m512i last_and_thirdu16 = + _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third)); + + __m512i nonasciitags = _mm512_maskz_mov_epi8( + mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000 other: 11000000 + __m512i clearedbytes = _mm512_andnot_si512( + nonasciitags, input); // high two bits cleared where not ASCII + __m512i lastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, last_and_thirdu16, + clearedbytes); // the last byte of each character + + __mmask64 mask_before_non_ascii = _kshiftri_mask64( + mask_not_ascii, 1); // bytes that precede non-ASCII bytes + __m512i indexofsecondlastbytes = _mm512_add_epi16( + mask_ffffffff, last_and_thirdu16); // indices of the second last bytes + __m512i beforeasciibytes = + _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes); + __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, indexofsecondlastbytes, + beforeasciibytes); // the second last bytes (of two, three byte seq, + // surrogates) + secondlastbytes = + _mm512_slli_epi16(secondlastbytes, 6); // shifted into position + + __m512i indexofthirdlastbytes = _mm512_add_epi16( + mask_ffffffff, + indexofsecondlastbytes); // indices of the second last bytes + __m512i thirdlastbyte = _mm512_maskz_mov_epi8( + m34, + clearedbytes); // only those that are the third last byte of a sequence + __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8( + 0x5555555555555555, indexofthirdlastbytes, + thirdlastbyte); // the third last bytes (of three byte sequences, hi + // surrogate) + thirdlastbytes = + _mm512_slli_epi16(thirdlastbytes, 12); // shifted into position + __m512i thirdsecondandlastbytes = _mm512_ternarylogic_epi32( + lastbytes, secondlastbytes, thirdlastbytes, 254); + uint64_t Mlo_uint64 = _pext_u64(mp3, mend); + __mmask32 Mlo = __mmask32(Mlo_uint64); + __mmask32 Mhi = __mmask32(Mlo_uint64 >> 1); + __m512i lo_surr_mask = _mm512_maskz_mov_epi16( + Mlo, + mask_dc00dc00); // lo surr: 1101110000000000, other: 0000000000000000 + __m512i shifted4_thirdsecondandlastbytes = + _mm512_srli_epi16(thirdsecondandlastbytes, + 4); // hi surr: 00000WVUTSRQPNML vuts = WVUTS - 1 + __m512i tagged_lo_surrogates = _mm512_or_si512( + thirdsecondandlastbytes, + lo_surr_mask); // lo surr: 110111KJHGFEDCBA, other: unchanged + __m512i Wout = _mm512_mask_add_epi16( + tagged_lo_surrogates, Mhi, shifted4_thirdsecondandlastbytes, + mask_d7c0d7c0); // hi sur: 110110vutsRQPNML, other: unchanged + // the elements of Wout excluding the last element if it happens to be a + // high surrogate: + __mmask32 Mout = ~(Mhi & 0x80000000); + __mmask64 mprocessed = + (tail == SIMDUTF_FULL) + ? _pdep_u64(Mout, mend) + : _pdep_u64( + Mout, + _kand_mask64(mend, + b)); // we adjust mend at the end of the output. + + // mismatched continuation bytes: + if (tail == SIMDUTF_FULL) { + __mmask64 xnormcm1234 = _kxnor_mask64( + mc, m1234); // XNOR of mc and m1234 should be all zero if they differ + // the presence of a 1 bit indicates that they overlap. + // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return 1 + // if all zeroes. + if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { + return false; + } + } else { + __mmask64 bxorm1234 = _kxor_mask64(b, m1234); + if (mc != bxorm1234) { + return false; + } + } + // Encodings out of range... + { + // the location of 3-byte sequence start bytes in the input + __mmask64 m3 = m34 & (b ^ m4); + // code units in Wout corresponding to 3-byte sequences. + __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend)); + __m512i mask_08000800 = _mm512_set1_epi32(0x08000800); + __mmask32 Msmall800 = + _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800); + __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800); + __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800); + __mmask32 M3s = + _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800); + __m512i mask_04000400 = _mm512_set1_epi32(0x04000400); + __mmask32 M4s = + _mm512_mask_cmpge_epu16_mask(Mhi, Moutminusd800, mask_04000400); + if (!_kortestz_mask32_u8(M4s, _kor_mask32(Msmall800, M3s))) { + return false; + } + } + in += 64 - _lzcnt_u64(mprocessed); + int64_t nout = _mm_popcnt_u64(mprocessed); + if (big_endian) { + Wout = _mm512_shuffle_epi8(Wout, byteflip); + } + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout); + out += nout; + return true; // ok + } + // Fast path 2: all ASCII or 2 byte + __mmask64 continuation_or_ascii = (tail == SIMDUTF_FULL) + ? _knot_mask64(m234) + : _kand_mask64(_knot_mask64(m234), b); + // on top of -0xc0 we subtract -2 which we get back later of the + // continuation byte tags + __m512i leading2byte = _mm512_maskz_sub_epi8(m234, input, mask_c2c2c2c2); + __mmask64 leading = tail == (tail == SIMDUTF_FULL) + ? _kor_mask64(m1, m234) + : _kand_mask64(_kor_mask64(m1, m234), + b); // first bytes of each sequence + if (tail == SIMDUTF_FULL) { + __mmask64 xnor234leading = + _kxnor_mask64(_kshiftli_mask64(m234, 1), leading); + if (!_kortestz_mask64_u8(xnor234leading, xnor234leading)) { + return false; + } + } else { + __mmask64 bxorleading = _kxor_mask64(b, leading); + if (_kshiftli_mask64(m234, 1) != bxorleading) { + return false; + } + } + // + if (tail == SIMDUTF_FULL) { + // In the two-byte/ASCII scenario, we are easily latency bound, so we want + // to increment the input buffer as quickly as possible. + // We process 32 bytes unless the byte at index 32 is a continuation byte, + // in which case we include it as well for a total of 33 bytes. + // Note that if x is an ASCII byte, then the following is false: + // int8_t(x) <= int8_t(0xc0) under two's complement. + in += 32; + if (int8_t(*in) <= int8_t(0xc0)) + in++; + // The alternative is to do + // in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); + // but it requires loading the input, doing the mask computation, and + // converting back the mask to a general register. It just takes too long, + // leaving the processor likely to be idle. + } else { + in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); + } + __m512i lead = _mm512_maskz_compress_epi8( + leading, leading2byte); // will contain zero for ascii, and the data + lead = _mm512_cvtepu8_epi16( + _mm512_castsi512_si256(lead)); // ... zero extended into code units + __m512i follow = _mm512_maskz_compress_epi8( + continuation_or_ascii, input); // the last bytes of each sequence + follow = _mm512_cvtepu8_epi16( + _mm512_castsi512_si256(follow)); // ... zero extended into code units + lead = _mm512_slli_epi16(lead, 6); // shifted into position + __m512i final = _mm512_add_epi16(follow, lead); // combining lead and follow + + if (big_endian) { + final = _mm512_shuffle_epi8(final, byteflip); + } + if (tail == SIMDUTF_FULL) { + // Next part is UTF-16 specific and can be generalized to UTF-32. + int nout = _mm_popcnt_u32(uint32_t(leading)); + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final); + out += nout; // UTF-8 to UTF-16 is only expansionary in this case. + } else { + int nout = int(_mm_popcnt_u64(_pdep_u64(0xFFFFFFFF, leading))); + _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final); + out += nout; // UTF-8 to UTF-16 is only expansionary in this case. + } + + return true; // we are fine. +} + +/* + utf32_to_utf16_masked converts `count` lower UTF-32 code units + from input `utf32` into UTF-16. It differs from utf32_to_utf16 + in that it 'masks' the writes. + + Returns how many 16-bit code units were stored. + + byteflip is used for flipping 16-bit code units, and it should be + __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809 + ); + We pass it to the (always inlined) function to encourage the compiler to + keep the value in a (constant) register. +*/ +template +simdutf_really_inline size_t utf32_to_utf16_masked(const __m512i byteflip, + __m512i utf32, + unsigned int count, + char16_t *output) { + + const __mmask16 valid = uint16_t((1 << count) - 1); + // 1. check if we have any surrogate pairs + const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff); + const __mmask16 sp_mask = + _mm512_mask_cmpgt_epu32_mask(valid, utf32, v_0000_ffff); + + if (sp_mask == 0) { + if (big_endian) { + _mm256_mask_storeu_epi16( + (__m256i *)output, valid, + _mm256_shuffle_epi8(_mm512_cvtepi32_epi16(utf32), + _mm512_castsi512_si256(byteflip))); + + } else { + _mm256_mask_storeu_epi16((__m256i *)output, valid, + _mm512_cvtepi32_epi16(utf32)); + } + return count; + } + + { + // build surrogate pair code units in 32-bit lanes + + // t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb] + const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000); + const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000); + + // t1 = 8 x [000000aaaaaaaaaa|bbbbbbbbbb000000] + const __m512i t1 = _mm512_slli_epi32(t0, 6); + + // t2 = 8 x [000000aaaaaaaaaa|aaaaaabbbbbbbbbb] -- copy hi word from t1 + // to t0 + // 0xe4 = (t1 and v_ffff_0000) or (t0 and not v_ffff_0000) + const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000); + const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4); + + // t2 = 8 x [110110aaaaaaaaaa|110111bbbbbbbbbb] -- copy hi word from t1 + // to t0 + // 0xba = (t2 and not v_fc00_fc000) or v_d800_dc00 + const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00); + const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00); + const __m512i t3 = + _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba); + const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3); + __m512i t5 = _mm512_ror_epi32(t4, 16); + // Here we want to trim all of the upper 16-bit code units from the 2-byte + // characters represented as 4-byte values. We can compute it from + // sp_mask or the following... It can be more optimized! + const __mmask32 nonzero = _kor_mask32( + 0xaaaaaaaa, _mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512())); + const __mmask32 nonzero_masked = + _kand_mask32(nonzero, __mmask32((uint64_t(1) << (2 * count)) - 1)); + if (big_endian) { + t5 = _mm512_shuffle_epi8(t5, byteflip); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (zen4) + __m512i compressed = _mm512_maskz_compress_epi16(nonzero_masked, t5); + _mm512_mask_storeu_epi16( + output, + (1 << (count + static_cast(count_ones(sp_mask)))) - 1, + compressed); + //_mm512_mask_compressstoreu_epi16(output, nonzero_masked, t5); + } + + return count + static_cast(count_ones(sp_mask)); +} + +/* + utf32_to_utf16 converts `count` lower UTF-32 code units + from input `utf32` into UTF-16. It may overflow. + + Returns how many 16-bit code units were stored. + + byteflip is used for flipping 16-bit code units, and it should be + __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809, + 0x0607040502030001, + 0x0e0f0c0d0a0b0809 + ); + We pass it to the (always inlined) function to encourage the compiler to + keep the value in a (constant) register. +*/ +template +simdutf_really_inline size_t utf32_to_utf16(const __m512i byteflip, + __m512i utf32, unsigned int count, + char16_t *output) { + // check if we have any surrogate pairs + const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff); + const __mmask16 sp_mask = _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff); + + if (sp_mask == 0) { + // technically, it should be _mm256_storeu_epi16 + if (big_endian) { + _mm256_storeu_si256( + (__m256i *)output, + _mm256_shuffle_epi8(_mm512_cvtepi32_epi16(utf32), + _mm512_castsi512_si256(byteflip))); + } else { + _mm256_storeu_si256((__m256i *)output, _mm512_cvtepi32_epi16(utf32)); + } + return count; + } + + { + // build surrogate pair code units in 32-bit lanes + + // t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb] + const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000); + const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000); + + // t1 = 8 x [000000aaaaaaaaaa|bbbbbbbbbb000000] + const __m512i t1 = _mm512_slli_epi32(t0, 6); + + // t2 = 8 x [000000aaaaaaaaaa|aaaaaabbbbbbbbbb] -- copy hi word from t1 + // to t0 + // 0xe4 = (t1 and v_ffff_0000) or (t0 and not v_ffff_0000) + const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000); + const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4); + + // t2 = 8 x [110110aaaaaaaaaa|110111bbbbbbbbbb] -- copy hi word from t1 + // to t0 + // 0xba = (t2 and not v_fc00_fc000) or v_d800_dc00 + const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00); + const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00); + const __m512i t3 = + _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba); + const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3); + __m512i t5 = _mm512_ror_epi32(t4, 16); + const __mmask32 nonzero = _kor_mask32( + 0xaaaaaaaa, _mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512())); + if (big_endian) { + t5 = _mm512_shuffle_epi8(t5, byteflip); + } + // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability + // (zen4) + __m512i compressed = _mm512_maskz_compress_epi16(nonzero, t5); + _mm512_mask_storeu_epi16( + output, + (1 << (count + static_cast(count_ones(sp_mask)))) - 1, + compressed); + //_mm512_mask_compressstoreu_epi16(output, nonzero, t5); + } + + return count + static_cast(count_ones(sp_mask)); +} + +/** + * Store the last N bytes of previous followed by 512-N bytes from input. + */ +template __m512i prev(__m512i input, __m512i previous) { + static_assert(N <= 32, "N must be no larger than 32"); + const __m512i movemask = + _mm512_setr_epi32(28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous); +#if SIMDUTF_GCC8 || SIMDUTF_GCC9 + constexpr int shift = 16 - N; // workaround for GCC8,9 + return _mm512_alignr_epi8(input, rotated, shift); +#else + return _mm512_alignr_epi8(input, rotated, 16 - N); +#endif // SIMDUTF_GCC8 || SIMDUTF_GCC9 +} + +template +__m512i shuffle_epi128(__m512i v) { + static_assert((idx0 >= 0 && idx0 <= 3), "idx0 must be in range 0..3"); + static_assert((idx1 >= 0 && idx1 <= 3), "idx1 must be in range 0..3"); + static_assert((idx2 >= 0 && idx2 <= 3), "idx2 must be in range 0..3"); + static_assert((idx3 >= 0 && idx3 <= 3), "idx3 must be in range 0..3"); + + constexpr unsigned shuffle = idx0 | (idx1 << 2) | (idx2 << 4) | (idx3 << 6); + return _mm512_shuffle_i32x4(v, v, shuffle); +} + +template constexpr __m512i broadcast_epi128(__m512i v) { + return shuffle_epi128(v); +} + +/** + * Current unused. + */ +template __m512i rotate_by_N_epi8(const __m512i input) { + + // lanes order: 1, 2, 3, 0 => 0b00_11_10_01 + const __m512i permuted = _mm512_shuffle_i32x4(input, input, 0x39); + + return _mm512_alignr_epi8(permuted, input, N); +} + +/* + expanded_utf8_to_utf32 converts expanded UTF-8 characters (`utf8`) + stored at separate 32-bit lanes. + + For each lane we have also a character class (`char_class), given in form + 0x8080800N, where N is 4 highest bits from the leading byte; 0x80 resets + corresponding bytes during pshufb. +*/ +simdutf_really_inline __m512i expanded_utf8_to_utf32(__m512i char_class, + __m512i utf8) { + /* + Input: + - utf8: bytes stored at separate 32-bit code units + - valid: which code units have valid UTF-8 characters + + Bit layout of single word. We show 4 cases for each possible + UTF-8 character encoding. The `?` denotes bits we must not + assume their value. + + |10dd.dddd|10cc.cccc|10bb.bbbb|1111.0aaa| 4-byte char + |????.????|10cc.cccc|10bb.bbbb|1110.aaaa| 3-byte char + |????.????|????.????|10bb.bbbb|110a.aaaa| 2-byte char + |????.????|????.????|????.????|0aaa.aaaa| ASCII char + byte 3 byte 2 byte 1 byte 0 + */ + + /* 1. Reset control bits of continuation bytes and the MSB + of the leading byte; this makes all bytes unsigned (and + does not alter ASCII char). + + |00dd.dddd|00cc.cccc|00bb.bbbb|0111.0aaa| 4-byte char + |00??.????|00cc.cccc|00bb.bbbb|0110.aaaa| 3-byte char + |00??.????|00??.????|00bb.bbbb|010a.aaaa| 2-byte char + |00??.????|00??.????|00??.????|0aaa.aaaa| ASCII char + ^^ ^^ ^^ ^ + */ + __m512i values; + const __m512i v_3f3f_3f7f = _mm512_set1_epi32(0x3f3f3f7f); + values = _mm512_and_si512(utf8, v_3f3f_3f7f); + + /* 2. Swap and join fields A-B and C-D + + |0000.cccc|ccdd.dddd|0001.110a|aabb.bbbb| 4-byte char + |0000.cccc|cc??.????|0001.10aa|aabb.bbbb| 3-byte char + |0000.????|????.????|0001.0aaa|aabb.bbbb| 2-byte char + |0000.????|????.????|000a.aaaa|aa??.????| ASCII char */ + const __m512i v_0140_0140 = _mm512_set1_epi32(0x01400140); + values = _mm512_maddubs_epi16(values, v_0140_0140); + + /* 3. Swap and join fields AB & CD + + |0000.0001|110a.aabb|bbbb.cccc|ccdd.dddd| 4-byte char + |0000.0001|10aa.aabb|bbbb.cccc|cc??.????| 3-byte char + |0000.0001|0aaa.aabb|bbbb.????|????.????| 2-byte char + |0000.000a|aaaa.aa??|????.????|????.????| ASCII char */ + const __m512i v_0001_1000 = _mm512_set1_epi32(0x00011000); + values = _mm512_madd_epi16(values, v_0001_1000); + + /* 4. Shift left the values by variable amounts to reset highest UTF-8 bits + |aaab.bbbb|bccc.cccd|dddd.d000|0000.0000| 4-byte char -- by 11 + |aaaa.bbbb|bbcc.cccc|????.??00|0000.0000| 3-byte char -- by 10 + |aaaa.abbb|bbb?.????|????.???0|0000.0000| 2-byte char -- by 9 + |aaaa.aaa?|????.????|????.????|?000.0000| ASCII char -- by 7 */ + { + /** pshufb + + continuation = 0 + ascii = 7 + _2_bytes = 9 + _3_bytes = 10 + _4_bytes = 11 + + shift_left_v3 = 4 * [ + ascii, # 0000 + ascii, # 0001 + ascii, # 0010 + ascii, # 0011 + ascii, # 0100 + ascii, # 0101 + ascii, # 0110 + ascii, # 0111 + continuation, # 1000 + continuation, # 1001 + continuation, # 1010 + continuation, # 1011 + _2_bytes, # 1100 + _2_bytes, # 1101 + _3_bytes, # 1110 + _4_bytes, # 1111 + ] */ + const __m512i shift_left_v3 = _mm512_setr_epi64( + 0x0707070707070707, 0x0b0a090900000000, 0x0707070707070707, + 0x0b0a090900000000, 0x0707070707070707, 0x0b0a090900000000, + 0x0707070707070707, 0x0b0a090900000000); + + const __m512i shift = _mm512_shuffle_epi8(shift_left_v3, char_class); + values = _mm512_sllv_epi32(values, shift); + } + + /* 5. Shift right the values by variable amounts to reset lowest bits + |0000.0000|000a.aabb|bbbb.cccc|ccdd.dddd| 4-byte char -- by 11 + |0000.0000|0000.0000|aaaa.bbbb|bbcc.cccc| 3-byte char -- by 16 + |0000.0000|0000.0000|0000.0aaa|aabb.bbbb| 2-byte char -- by 21 + |0000.0000|0000.0000|0000.0000|0aaa.aaaa| ASCII char -- by 25 */ + { + // 4 * [25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 21, 21, 16, 11] + const __m512i shift_right = _mm512_setr_epi64( + 0x1919191919191919, 0x0b10151500000000, 0x1919191919191919, + 0x0b10151500000000, 0x1919191919191919, 0x0b10151500000000, + 0x1919191919191919, 0x0b10151500000000); + + const __m512i shift = _mm512_shuffle_epi8(shift_right, char_class); + values = _mm512_srlv_epi32(values, shift); + } + + return values; +} + +simdutf_really_inline __m512i expand_and_identify(__m512i lane0, __m512i lane1, + int &count) { + const __m512i merged = _mm512_mask_mov_epi32(lane0, 0x1000, lane1); + const __m512i expand_ver2 = _mm512_setr_epi64( + 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, + 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, + 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); + const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); + const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); + const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); + const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); + const __mmask16 leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); + count = static_cast(count_ones(leading_bytes)); + return _mm512_mask_compress_epi32(_mm512_setzero_si512(), leading_bytes, + input); +} + +simdutf_really_inline __m512i expand_utf8_to_utf32(__m512i input) { + __m512i char_class = _mm512_srli_epi32(input, 4); + /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ + const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); + const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); + char_class = + _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); + return expanded_utf8_to_utf32(char_class, input); +} +/* end file src/icelake/icelake_utf8_common.inl.cpp */ +/* begin file src/icelake/icelake_macros.inl.cpp */ + +/* + This upcoming macro (SIMDUTF_ICELAKE_TRANSCODE16) takes 16 + 4 bytes (of a + UTF-8 string) and loads all possible 4-byte substring into an AVX512 + register. + + For example if we have bytes abcdefgh... we create following 32-bit lanes + + [abcd|bcde|cdef|defg|efgh|...] + ^ ^ + byte 0 of reg byte 63 of reg +*/ +/** pshufb + # lane{0,1,2} have got bytes: [ 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, + 11, 12, 13, 14, 15] # lane3 has got bytes: [ 16, 17, 18, 19, 4, 5, + 6, 8, 9, 10, 11, 12, 13, 14, 15] + + expand_ver2 = [ + # lane 0: + 0, 1, 2, 3, + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + + # lane 1: + 4, 5, 6, 7, + 5, 6, 7, 8, + 6, 7, 8, 9, + 7, 8, 9, 10, + + # lane 2: + 8, 9, 10, 11, + 9, 10, 11, 12, + 10, 11, 12, 13, + 11, 12, 13, 14, + + # lane 3 order: 13, 14, 15, 16 14, 15, 16, 17, 15, 16, 17, 18, 16, + 17, 18, 19 12, 13, 14, 15, 13, 14, 15, 0, 14, 15, 0, 1, 15, 0, 1, 2, + ] +*/ + +#define SIMDUTF_ICELAKE_TRANSCODE16(LANE0, LANE1, MASKED) \ + { \ + const __m512i merged = _mm512_mask_mov_epi32(LANE0, 0x1000, LANE1); \ + const __m512i expand_ver2 = _mm512_setr_epi64( \ + 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, \ + 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, \ + 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); \ + const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); \ + \ + __mmask16 leading_bytes; \ + const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); \ + const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); \ + const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); \ + leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); \ + \ + __m512i char_class; \ + char_class = _mm512_srli_epi32(input, 4); \ + /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ \ + const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); \ + const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); \ + char_class = \ + _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); \ + \ + const int valid_count = static_cast(count_ones(leading_bytes)); \ + const __m512i utf32 = expanded_utf8_to_utf32(char_class, input); \ + \ + const __m512i out = _mm512_mask_compress_epi32(_mm512_setzero_si512(), \ + leading_bytes, utf32); \ + \ + if (UTF32) { \ + if (MASKED) { \ + const __mmask16 valid = uint16_t((1 << valid_count) - 1); \ + _mm512_mask_storeu_epi32((__m512i *)output, valid, out); \ + } else { \ + _mm512_storeu_si512((__m512i *)output, out); \ + } \ + output += valid_count; \ + } else { \ + if (MASKED) { \ + output += utf32_to_utf16_masked( \ + byteflip, out, valid_count, reinterpret_cast(output)); \ + } else { \ + output += utf32_to_utf16( \ + byteflip, out, valid_count, reinterpret_cast(output)); \ + } \ + } \ + } + +#define SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(INPUT, VALID_COUNT, MASKED) \ + { \ + if (UTF32) { \ + if (MASKED) { \ + const __mmask16 valid_mask = uint16_t((1 << VALID_COUNT) - 1); \ + _mm512_mask_storeu_epi32((__m512i *)output, valid_mask, INPUT); \ + } else { \ + _mm512_storeu_si512((__m512i *)output, INPUT); \ + } \ + output += VALID_COUNT; \ + } else { \ + if (MASKED) { \ + output += utf32_to_utf16_masked( \ + byteflip, INPUT, VALID_COUNT, \ + reinterpret_cast(output)); \ + } else { \ + output += \ + utf32_to_utf16(byteflip, INPUT, VALID_COUNT, \ + reinterpret_cast(output)); \ + } \ + } \ + } + +#define SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) \ + if (UTF32) { \ + const __m128i t0 = _mm512_castsi512_si128(utf8); \ + const __m128i t1 = _mm512_extracti32x4_epi32(utf8, 1); \ + const __m128i t2 = _mm512_extracti32x4_epi32(utf8, 2); \ + const __m128i t3 = _mm512_extracti32x4_epi32(utf8, 3); \ + _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ + _mm512_cvtepu8_epi32(t0)); \ + _mm512_storeu_si512((__m512i *)(output + 1 * 16), \ + _mm512_cvtepu8_epi32(t1)); \ + _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ + _mm512_cvtepu8_epi32(t2)); \ + _mm512_storeu_si512((__m512i *)(output + 3 * 16), \ + _mm512_cvtepu8_epi32(t3)); \ + } else { \ + const __m256i h0 = _mm512_castsi512_si256(utf8); \ + const __m256i h1 = _mm512_extracti64x4_epi64(utf8, 1); \ + if (big_endian) { \ + _mm512_storeu_si512( \ + (__m512i *)(output + 0 * 16), \ + _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h0), byteflip)); \ + _mm512_storeu_si512( \ + (__m512i *)(output + 2 * 16), \ + _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h1), byteflip)); \ + } else { \ + _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ + _mm512_cvtepu8_epi16(h0)); \ + _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ + _mm512_cvtepu8_epi16(h1)); \ + } \ + } +/* end file src/icelake/icelake_macros.inl.cpp */ +/* begin file src/icelake/icelake_from_valid_utf8.inl.cpp */ +// file included directly + +// File contains conversion procedure from VALID UTF-8 strings. + +/* + valid_utf8_to_fixed_length converts a valid UTF-8 string into UTF-32. + + The `OUTPUT` template type decides what to do with UTF-32: store + it directly or convert into UTF-16 (with AVX512). + + Input: + - str - valid UTF-8 string + - len - string length + - out_buffer - output buffer + + Result: + - pair.first - the first unprocessed input byte + - pair.second - the first unprocessed output word +*/ +template +std::pair +valid_utf8_to_fixed_length(const char *str, size_t len, OUTPUT *dwords) { + constexpr bool UTF32 = std::is_same::value; + constexpr bool UTF16 = std::is_same::value; + static_assert( + UTF32 or UTF16, + "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); + static_assert(!(UTF32 and big_endian), + "we do not currently support big-endian UTF-32"); + + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + const char *ptr = str; + const char *end = ptr + len; + + OUTPUT *output = dwords; + /** + * In the main loop, we consume 64 bytes per iteration, + * but we access 64 + 4 bytes. + * We check for ptr + 64 + 64 <= end because + * we want to be do maskless writes without overruns. + */ + while (end - ptr >= 64 + 4) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80); + if (ascii == 0) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + continue; + } + + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + const __m512i lane3 = broadcast_epi128<3>(utf8); + int valid_count2; + __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); + uint32_t tmp1; + ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); + const __m512i lane4 = _mm512_set1_epi32(tmp1); + int valid_count3; + __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); + if (valid_count2 + valid_count3 <= 16) { + vec2 = _mm512_mask_expand_epi32( + vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); + valid_count2 += valid_count3; + vec2 = expand_utf8_to_utf32(vec2); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + } else { + vec2 = expand_utf8_to_utf32(vec2); + vec3 = expand_utf8_to_utf32(vec3); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) + } + ptr += 4 * 16; + } + + if (end - ptr >= 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80); + if (ascii == 0) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + } else { + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + + const __m512i lane3 = broadcast_epi128<3>(utf8); + SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) + + ptr += 3 * 16; + } + } + return {ptr, output}; +} + +using utf8_to_utf16_result = std::pair; +/* end file src/icelake/icelake_from_valid_utf8.inl.cpp */ +/* begin file src/icelake/icelake_utf8_validation.inl.cpp */ +// file included directly + +simdutf_really_inline __m512i check_special_cases(__m512i input, + const __m512i prev1) { + __m512i mask1 = _mm512_setr_epi64(0x0202020202020202, 0x4915012180808080, + 0x0202020202020202, 0x4915012180808080, + 0x0202020202020202, 0x4915012180808080, + 0x0202020202020202, 0x4915012180808080); + const __m512i v_0f = _mm512_set1_epi8(0x0f); + __m512i index1 = _mm512_and_si512(_mm512_srli_epi16(prev1, 4), v_0f); + + __m512i byte_1_high = _mm512_shuffle_epi8(mask1, index1); + __m512i mask2 = _mm512_setr_epi64(0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, + 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, + 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, + 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb); + __m512i index2 = _mm512_and_si512(prev1, v_0f); + + __m512i byte_1_low = _mm512_shuffle_epi8(mask2, index2); + __m512i mask3 = + _mm512_setr_epi64(0x101010101010101, 0x1010101babaaee6, 0x101010101010101, + 0x1010101babaaee6, 0x101010101010101, 0x1010101babaaee6, + 0x101010101010101, 0x1010101babaaee6); + __m512i index3 = _mm512_and_si512(_mm512_srli_epi16(input, 4), v_0f); + __m512i byte_2_high = _mm512_shuffle_epi8(mask3, index3); + return _mm512_ternarylogic_epi64(byte_1_high, byte_1_low, byte_2_high, 128); +} + +simdutf_really_inline __m512i check_multibyte_lengths(const __m512i input, + const __m512i prev_input, + const __m512i sc) { + __m512i prev2 = prev<2>(input, prev_input); + __m512i prev3 = prev<3>(input, prev_input); + __m512i is_third_byte = _mm512_subs_epu8( + prev2, _mm512_set1_epi8(0b11100000u - 1)); // Only 111_____ will be > 0 + __m512i is_fourth_byte = _mm512_subs_epu8( + prev3, _mm512_set1_epi8(0b11110000u - 1)); // Only 1111____ will be > 0 + __m512i is_third_or_fourth_byte = + _mm512_or_si512(is_third_byte, is_fourth_byte); + const __m512i v_7f = _mm512_set1_epi8(char(0x7f)); + is_third_or_fourth_byte = _mm512_adds_epu8(v_7f, is_third_or_fourth_byte); + // We want to compute (is_third_or_fourth_byte AND v80) XOR sc. + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + return _mm512_ternarylogic_epi32(is_third_or_fourth_byte, v_80, sc, + 0b1101010); + //__m512i is_third_or_fourth_byte_mask = + //_mm512_and_si512(is_third_or_fourth_byte, v_80); return + // _mm512_xor_si512(is_third_or_fourth_byte_mask, sc); +} +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline __m512i is_incomplete(const __m512i input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + __m512i max_value = _mm512_setr_epi64(0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xbfdfefffffffffff); + return _mm512_subs_epu8(input, max_value); +} + +struct avx512_utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + __m512i error{}; + + // The last input we received + __m512i prev_input_block{}; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + __m512i prev_incomplete{}; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const __m512i input, + const __m512i prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + __m512i prev1 = prev<1>(input, prev_input); + __m512i sc = check_special_cases(input, prev1); + this->error = _mm512_or_si512( + check_multibyte_lengths(input, prev_input, sc), this->error); + } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error = _mm512_or_si512(this->error, this->prev_incomplete); + } + + // returns true if ASCII. + simdutf_really_inline bool check_next_input(const __m512i input) { + const __m512i v_80 = _mm512_set1_epi8(char(0x80)); + const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80); + if (ascii == 0) { + this->error = _mm512_or_si512(this->error, this->prev_incomplete); + return true; + } else { + this->check_utf8_bytes(input, this->prev_input_block); + this->prev_incomplete = is_incomplete(input); + this->prev_input_block = input; + return false; + } + } + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return _mm512_test_epi8_mask(this->error, this->error) != 0; + } +}; // struct avx512_utf8_checker +/* end file src/icelake/icelake_utf8_validation.inl.cpp */ +/* begin file src/icelake/icelake_from_utf8.inl.cpp */ +// file included directly + +// File contains conversion procedure from possibly invalid UTF-8 strings. + +/** + * Attempts to convert up to len 1-byte code units from in (in UTF-8 format) to + * out. + * Returns the position of the input and output after the processing is + * completed. Upon error, the output is set to null. + */ + +template +utf8_to_utf16_result +fast_avx512_convert_utf8_to_utf16(const char *in, size_t len, char16_t *out) { + const char *const final_in = in + len; + bool result = true; + while (result) { + if (final_in - in >= 64) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else if (in < final_in) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else { + break; + } + } + if (!result) { + out = nullptr; + } + return std::make_pair(in, out); +} + +template +simdutf::result fast_avx512_convert_utf8_to_utf16_with_errors(const char *in, + size_t len, + char16_t *out) { + const char *const init_in = in; + const char16_t *const init_out = out; + const char *const final_in = in + len; + bool result = true; + while (result) { + if (final_in - in >= 64) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else if (in < final_in) { + result = process_block_utf8_to_utf16( + in, out, final_in - in); + } else { + break; + } + } + if (!result) { + size_t pos = size_t(in - init_in); + if (pos < len && (init_in[pos] & 0xc0) == 0x80 && pos >= 64) { + // We must check whether we are the fourth continuation byte + bool c1 = (init_in[pos - 1] & 0xc0) == 0x80; + bool c2 = (init_in[pos - 2] & 0xc0) == 0x80; + bool c3 = (init_in[pos - 3] & 0xc0) == 0x80; + if (c1 && c2 && c3) { + return {simdutf::TOO_LONG, pos}; + } + } + // rewind_and_convert_with_errors will seek a potential error from in + // onward, with the ability to go back up to in - init_in bytes, and read + // final_in - in bytes forward. + simdutf::result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + in - init_in, in, final_in - in, out); + res.count += (in - init_in); + return res; + } else { + return simdutf::result(error_code::SUCCESS, out - init_out); + } +} + +template +// todo: replace with the utf-8 to utf-16 routine adapted to utf-32. This code +// is legacy. +std::pair +validating_utf8_to_fixed_length(const char *str, size_t len, OUTPUT *dwords) { + constexpr bool UTF32 = std::is_same::value; + constexpr bool UTF16 = std::is_same::value; + static_assert( + UTF32 or UTF16, + "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); + static_assert(!(UTF32 and big_endian), + "we do not currently support big-endian UTF-32"); + + const char *ptr = str; + const char *end = ptr + len; + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + OUTPUT *output = dwords; + avx512_utf8_checker checker{}; + /** + * In the main loop, we consume 64 bytes per iteration, + * but we access 64 + 4 bytes. + * We use masked writes to avoid overruns, see + * https://github.com/simdutf/simdutf/issues/471 + */ + while (end - ptr >= 64 + 4) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + if (checker.check_next_input(utf8)) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + continue; + } + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + const __m512i lane3 = broadcast_epi128<3>(utf8); + int valid_count2; + __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); + uint32_t tmp1; + ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); + const __m512i lane4 = _mm512_set1_epi32(tmp1); + int valid_count3; + __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); + if (valid_count2 + valid_count3 <= 16) { + vec2 = _mm512_mask_expand_epi32( + vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); + valid_count2 += valid_count3; + vec2 = expand_utf8_to_utf32(vec2); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + } else { + vec2 = expand_utf8_to_utf32(vec2); + vec3 = expand_utf8_to_utf32(vec3); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) + } + ptr += 4 * 16; + } + const char *validatedptr = ptr; // validated up to ptr + + // For the final pass, we validate 64 bytes, but we only transcode + // 3*16 bytes, so we may end up double-validating 16 bytes. + if (end - ptr >= 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + if (checker.check_next_input(utf8)) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + } else { + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + + const __m512i lane3 = broadcast_epi128<3>(utf8); + SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) + + ptr += 3 * 16; + } + validatedptr += 4 * 16; + } + if (end != validatedptr) { + const __m512i utf8 = + _mm512_maskz_loadu_epi8(~UINT64_C(0) >> (64 - (end - validatedptr)), + (const __m512i *)validatedptr); + checker.check_next_input(utf8); + } + checker.check_eof(); + if (checker.errors()) { + return {ptr, nullptr}; // We found an error. + } + return {ptr, output}; +} + +// Like validating_utf8_to_fixed_length but returns as soon as an error is +// identified todo: replace with the utf-8 to utf-16 routine adapted to utf-32. +// This code is legacy. +template +std::tuple +validating_utf8_to_fixed_length_with_constant_checks(const char *str, + size_t len, + OUTPUT *dwords) { + constexpr bool UTF32 = std::is_same::value; + constexpr bool UTF16 = std::is_same::value; + static_assert( + UTF32 or UTF16, + "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); + static_assert(!(UTF32 and big_endian), + "we do not currently support big-endian UTF-32"); + + const char *ptr = str; + const char *end = ptr + len; + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + OUTPUT *output = dwords; + avx512_utf8_checker checker{}; + /** + * In the main loop, we consume 64 bytes per iteration, + * but we access 64 + 4 bytes. + */ + while (end - ptr >= 4 + 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + bool ascii = checker.check_next_input(utf8); + if (checker.errors()) { + return {ptr, output, false}; // We found an error. + } + if (ascii) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + continue; + } + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } + const __m512i lane3 = broadcast_epi128<3>(utf8); + int valid_count2; + __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); + uint32_t tmp1; + ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); + const __m512i lane4 = _mm512_set1_epi32(tmp1); + int valid_count3; + __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); + if (valid_count2 + valid_count3 <= 16) { + vec2 = _mm512_mask_expand_epi32( + vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); + valid_count2 += valid_count3; + vec2 = expand_utf8_to_utf32(vec2); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + } else { + vec2 = expand_utf8_to_utf32(vec2); + vec3 = expand_utf8_to_utf32(vec3); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) + } + ptr += 4 * 16; + } + const char *validatedptr = ptr; // validated up to ptr -namespace simdutf { -namespace { -namespace tables { -namespace utf16_to_utf8 { + // For the final pass, we validate 64 bytes, but we only transcode + // 3*16 bytes, so we may end up double-validating 16 bytes. + if (end - ptr >= 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + bool ascii = checker.check_next_input(utf8); + if (checker.errors()) { + return {ptr, output, false}; // We found an error. + } + if (ascii) { + SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) + output += 64; + ptr += 64; + } else { + const __m512i lane0 = broadcast_epi128<0>(utf8); + const __m512i lane1 = broadcast_epi128<1>(utf8); + int valid_count0; + __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); + const __m512i lane2 = broadcast_epi128<2>(utf8); + int valid_count1; + __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); + if (valid_count0 + valid_count1 <= 16) { + vec0 = _mm512_mask_expand_epi32( + vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); + valid_count0 += valid_count1; + vec0 = expand_utf8_to_utf32(vec0); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + } else { + vec0 = expand_utf8_to_utf32(vec0); + vec1 = expand_utf8_to_utf32(vec1); + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) + SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + } -// 1 byte for length, 16 bytes for mask -const uint8_t pack_1_2_utf8_bytes[256][17] = { - {16, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, - {15, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {15, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {15, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {15, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {15, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {11, 1, 0, 3, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 3, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 3, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 1, 0, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 0, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}}; + const __m512i lane3 = broadcast_epi128<3>(utf8); + SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) + + ptr += 3 * 16; + } + validatedptr += 4 * 16; + } + if (end != validatedptr) { + const __m512i utf8 = + _mm512_maskz_loadu_epi8(~UINT64_C(0) >> (64 - (end - validatedptr)), + (const __m512i *)validatedptr); + checker.check_next_input(utf8); + } + checker.check_eof(); + if (checker.errors()) { + return {ptr, output, false}; // We found an error. + } + return {ptr, output, true}; +} +/* end file src/icelake/icelake_from_utf8.inl.cpp */ +/* begin file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */ +// file included directly + +// File contains conversion procedure from possibly invalid UTF-8 strings. + +template +simdutf_really_inline size_t process_block_from_utf8_to_latin1( + const char *buf, size_t len, char *latin_output, __m512i minus64, + __m512i one, __mmask64 *next_leading_ptr, __mmask64 *next_bit6_ptr) { + __mmask64 load_mask = + is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL; + __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf); + __mmask64 nonascii = _mm512_movepi8_mask(input); + if (nonascii == 0) { + if (*next_leading_ptr) { // If we ended with a leading byte, it is an error. + return 0; // Indicates error + } + is_remaining + ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input) + : _mm512_storeu_si512((__m512i *)latin_output, input); + return len; + } + + const __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64); + + __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62)); + __mmask64 invalid_leading_bytes = + _mm512_mask_cmpgt_epu8_mask(leading, highbits, one); + + if (invalid_leading_bytes) { + return 0; // Indicates error + } + + __mmask64 leading_shift = (leading << 1) | *next_leading_ptr; + + if ((nonascii ^ leading) != leading_shift) { + return 0; // Indicates error + } + + const __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one); + input = + _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64); + + __mmask64 retain = ~leading & load_mask; + __m512i output = _mm512_maskz_compress_epi8(retain, input); + int64_t written_out = count_ones(retain); + if (written_out == 0) { + return 0; // Indicates error + } + *next_bit6_ptr = bit6 >> 63; + *next_leading_ptr = leading >> 63; + + __mmask64 store_mask = ~UINT64_C(0) >> (64 - written_out); + + _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output); + + return written_out; +} + +size_t utf8_to_latin1_avx512(const char *&inbuf, size_t len, + char *&inlatin_output) { + const char *buf = inbuf; + char *latin_output = inlatin_output; + char *start = latin_output; + size_t pos = 0; + __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000 + __m512i one = _mm512_set1_epi8(1); + __mmask64 next_leading = 0; + __mmask64 next_bit6 = 0; + + while (pos + 64 <= len) { + size_t written = process_block_from_utf8_to_latin1( + buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6); + if (written == 0) { + inlatin_output = latin_output; + inbuf = buf + pos - next_leading; + return 0; // Indicates error at pos or after, or just before pos (too + // short error) + } + latin_output += written; + pos += 64; + } + + if (pos < len) { + size_t remaining = len - pos; + size_t written = process_block_from_utf8_to_latin1( + buf + pos, remaining, latin_output, minus64, one, &next_leading, + &next_bit6); + if (written == 0) { + inbuf = buf + pos - next_leading; + inlatin_output = latin_output; + return 0; // Indicates error at pos or after, or just before pos (too + // short error) + } + latin_output += written; + } + if (next_leading) { + inbuf = buf + len - next_leading; + inlatin_output = latin_output; + return 0; // Indicates error at end of buffer + } + inlatin_output = latin_output; + inbuf += len; + return size_t(latin_output - start); +} +/* end file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */ +/* begin file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */ +// file included directly + +// File contains conversion procedure from valid UTF-8 strings. + +template +simdutf_really_inline size_t process_valid_block_from_utf8_to_latin1( + const char *buf, size_t len, char *latin_output, __m512i minus64, + __m512i one, __mmask64 *next_leading_ptr, __mmask64 *next_bit6_ptr) { + __mmask64 load_mask = + is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL; + __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf); + __mmask64 nonascii = _mm512_movepi8_mask(input); + + if (nonascii == 0) { + is_remaining + ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input) + : _mm512_storeu_si512((__m512i *)latin_output, input); + return len; + } -// 1 byte for length, 16 bytes for mask -const uint8_t pack_1_2_3_utf8_bytes[256][17] = { - {12, 2, 3, 1, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80}, - {9, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {11, 3, 1, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 2, 3, 1, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {11, 2, 3, 1, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 3, 1, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 2, 3, 1, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 3, 1, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 2, 3, 1, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 1, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {11, 2, 3, 1, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 3, 1, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 2, 3, 1, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 2, 3, 1, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 2, 3, 1, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 0, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 2, 3, 1, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 1, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 2, 3, 1, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 2, 3, 1, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {2, 3, 1, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {5, 2, 3, 1, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 2, 3, 1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 3, 1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 0, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 3, 1, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 2, 3, 1, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 3, 1, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 0, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 2, 3, 1, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 3, 1, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 0, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 2, 3, 1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 3, 1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 0, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 2, 3, 1, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 2, 3, 1, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {11, 2, 3, 1, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 3, 1, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 2, 3, 1, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 2, 3, 1, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 2, 3, 1, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {4, 3, 1, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 0, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 2, 3, 1, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 2, 3, 1, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {10, 2, 3, 1, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 0, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 2, 3, 1, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 1, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 2, 3, 1, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 0, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 2, 3, 1, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 3, 1, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 0, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 2, 3, 1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 3, 1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 0, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 2, 3, 1, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 2, 3, 1, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {9, 2, 3, 1, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 1, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 2, 3, 1, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 3, 1, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 2, 3, 1, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 3, 1, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 0, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 2, 3, 1, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}}; + __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64); + + __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62)); + + *next_leading_ptr = leading >> 63; + + __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one); + input = + _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64); + *next_bit6_ptr = bit6 >> 63; + + __mmask64 retain = ~leading & load_mask; + __m512i output = _mm512_maskz_compress_epi8(retain, input); + int64_t written_out = count_ones(retain); + if (written_out == 0) { + return 0; // Indicates error + } + __mmask64 store_mask = ~UINT64_C(0) >> (64 - written_out); + // Optimization opportunity: sometimes, masked writes are not needed. + _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output); + return written_out; +} -} // namespace utf16_to_utf8 -} // namespace tables -} // unnamed namespace -} // namespace simdutf +size_t valid_utf8_to_latin1_avx512(const char *buf, size_t len, + char *latin_output) { + char *start = latin_output; + size_t pos = 0; + __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000 + __m512i one = _mm512_set1_epi8(1); + __mmask64 next_leading = 0; + __mmask64 next_bit6 = 0; -#endif // SIMDUTF_UTF16_TO_UTF8_TABLES_H -/* end file src/tables/utf16_to_utf8_tables.h */ -// End of tables. + while (pos + 64 <= len) { + size_t written = process_valid_block_from_utf8_to_latin1( + buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6); + latin_output += written; + pos += 64; + } -// The scalar routines should be included once. -/* begin file src/scalar/ascii.h */ -#ifndef SIMDUTF_ASCII_H -#define SIMDUTF_ASCII_H + if (pos < len) { + size_t remaining = len - pos; + size_t written = process_valid_block_from_utf8_to_latin1( + buf + pos, remaining, latin_output, minus64, one, &next_leading, + &next_bit6); + latin_output += written; + } -namespace simdutf { -namespace scalar { -namespace { -namespace ascii { -#if SIMDUTF_IMPLEMENTATION_FALLBACK -// Only used by the fallback kernel. -inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - // process in blocks of 16 bytes when possible - for (; pos + 16 <= len; pos += 16) { - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) != 0) { - return false; + return (size_t)(latin_output - start); +} +/* end file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */ +/* begin file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ +// file included directly +template +size_t icelake_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + __m512i v_0xFF = _mm512_set1_epi16(0xff); + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + __m512i shufmask = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, + 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + while (end - buf >= 32) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { + return 0; + } + _mm256_storeu_si256( + (__m256i *)latin1_output, + _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + latin1_output += 32; + buf += 32; + } + if (buf < end) { + uint32_t mask(uint32_t(1 << (end - buf)) - 1); + __m512i in = _mm512_maskz_loadu_epi16(mask, buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { + return 0; + } + _mm256_mask_storeu_epi8( + latin1_output, mask, + _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + } + return len; +} + +template +std::pair +icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + const char16_t *start = buf; + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + __m512i v_0xFF = _mm512_set1_epi16(0xff); + __m512i shufmask = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, + 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + while (end - buf >= 32) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { + uint16_t word; + while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf)) + : uint16_t(*buf))) <= 0xff) { + *latin1_output++ = uint8_t(word); + buf++; + } + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + latin1_output); + } + _mm256_storeu_si256( + (__m256i *)latin1_output, + _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + latin1_output += 32; + buf += 32; + } + if (buf < end) { + uint32_t mask(uint32_t(1 << (end - buf)) - 1); + __m512i in = _mm512_maskz_loadu_epi16(mask, buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { + + uint16_t word; + while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf)) + : uint16_t(*buf))) <= 0xff) { + *latin1_output++ = uint8_t(word); + buf++; + } + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + latin1_output); + } + _mm256_mask_storeu_epi8( + latin1_output, mask, + _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + } + return std::make_pair(result(error_code::SUCCESS, len), latin1_output); +} +/* end file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ +/* begin file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ +// file included directly + +/** + * This function converts the input (inbuf, inlen), assumed to be valid + * UTF16 (little endian) into UTF-8 (to outbuf). The number of code units + * written is written to 'outlen' and the function reports the number of input + * word consumed. + */ +template +size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen, + unsigned char *outbuf, size_t *outlen) { + __m512i in; + __mmask32 inmask = _cvtu32_mask32(0x7fffffff); + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + const char16_t *const inbuf_orig = inbuf; + const unsigned char *const outbuf_orig = outbuf; + int adjust = 0; + int carry = 0; + + while (inlen >= 32) { + in = _mm512_loadu_si512(inbuf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + inlen -= 31; + lastiteration: + inbuf += 31; + + failiteration: + const __mmask32 is234byte = _mm512_mask_cmp_epu16_mask( + inmask, in, _mm512_set1_epi16(0x0080), _MM_CMPINT_NLT); + + if (_ktestz_mask32_u8(inmask, is234byte)) { + // fast path for ASCII only + _mm512_mask_cvtepi16_storeu_epi8(outbuf, inmask, in); + outbuf += 31; + carry = 0; + + if (inlen < 32) { + goto tail; + } else { + continue; + } + } + + const __mmask32 is12byte = + _mm512_cmp_epu16_mask(in, _mm512_set1_epi16(0x0800), _MM_CMPINT_LT); + + if (_ktestc_mask32_u8(is12byte, inmask)) { + // fast path for 1 and 2 byte only + + const __m512i twobytes = _mm512_ternarylogic_epi32( + _mm512_slli_epi16(in, 8), _mm512_srli_epi16(in, 6), + _mm512_set1_epi16(0x3f3f), 0xa8); // (A|B)&C + in = _mm512_mask_add_epi16(in, is234byte, twobytes, + _mm512_set1_epi16(int16_t(0x80c0))); + const __m512i cmpmask = + _mm512_mask_blend_epi16(inmask, _mm512_set1_epi16(int16_t(0xffff)), + _mm512_set1_epi16(0x0800)); + const __mmask64 smoosh = + _mm512_cmp_epu8_mask(in, cmpmask, _MM_CMPINT_NLT); + const __m512i out = _mm512_maskz_compress_epi8(smoosh, in); + _mm512_mask_storeu_epi8(outbuf, + _cvtu64_mask64(_pext_u64(_cvtmask64_u64(smoosh), + _cvtmask64_u64(smoosh))), + out); + outbuf += 31 + _mm_popcnt_u32(_cvtmask32_u32(is234byte)); + carry = 0; + + if (inlen < 32) { + goto tail; + } else { + continue; + } + } + __m512i lo = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)); + __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1)); + + __m512i taglo = _mm512_set1_epi32(0x8080e000); + __m512i taghi = taglo; + + const __m512i fc00masked = + _mm512_and_epi32(in, _mm512_set1_epi16(int16_t(0xfc00))); + const __mmask32 hisurr = _mm512_mask_cmp_epu16_mask( + inmask, fc00masked, _mm512_set1_epi16(int16_t(0xd800)), _MM_CMPINT_EQ); + const __mmask32 losurr = _mm512_cmp_epu16_mask( + fc00masked, _mm512_set1_epi16(int16_t(0xdc00)), _MM_CMPINT_EQ); + + int carryout = 0; + if (!_kortestz_mask32_u8(hisurr, losurr)) { + // handle surrogates + + __m512i los = _mm512_alignr_epi32(hi, lo, 1); + __m512i his = _mm512_alignr_epi32(lo, hi, 1); + + const __mmask32 hisurrhi = _kshiftri_mask32(hisurr, 16); + taglo = _mm512_mask_mov_epi32(taglo, __mmask16(hisurr), + _mm512_set1_epi32(0x808080f0)); + taghi = _mm512_mask_mov_epi32(taghi, __mmask16(hisurrhi), + _mm512_set1_epi32(0x808080f0)); + + lo = _mm512_mask_slli_epi32(lo, __mmask16(hisurr), lo, 10); + hi = _mm512_mask_slli_epi32(hi, __mmask16(hisurrhi), hi, 10); + los = _mm512_add_epi32(los, _mm512_set1_epi32(0xfca02400)); + his = _mm512_add_epi32(his, _mm512_set1_epi32(0xfca02400)); + lo = _mm512_mask_add_epi32(lo, __mmask16(hisurr), lo, los); + hi = _mm512_mask_add_epi32(hi, __mmask16(hisurrhi), hi, his); + + carryout = _cvtu32_mask32(_kshiftri_mask32(hisurr, 30)); + + const uint32_t h = _cvtmask32_u32(hisurr); + const uint32_t l = _cvtmask32_u32(losurr); + // check for mismatched surrogates + if ((h + h + carry) ^ l) { + const uint32_t lonohi = l & ~(h + h + carry); + const uint32_t hinolo = h & ~(l >> 1); + inlen = _tzcnt_u32(hinolo | lonohi); + inmask = __mmask32(0x7fffffff & ((1U << inlen) - 1)); + in = _mm512_maskz_mov_epi16(inmask, in); + adjust = (int)inlen - 31; + inlen = 0; + goto failiteration; + } + } + + hi = _mm512_maskz_mov_epi32(_cvtu32_mask16(0x7fff), hi); + carry = carryout; + + __m512i mslo = + _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), lo); + + __m512i mshi = + _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), hi); + + const __mmask32 outmask = __mmask32(_kandn_mask64(losurr, inmask)); + const __mmask64 outmhi = _kshiftri_mask64(outmask, 16); + + const __mmask32 is1byte = __mmask32(_knot_mask64(is234byte)); + const __mmask64 is1bhi = _kshiftri_mask64(is1byte, 16); + const __mmask64 is12bhi = _kshiftri_mask64(is12byte, 16); + + taglo = _mm512_mask_mov_epi32(taglo, __mmask16(is12byte), + _mm512_set1_epi32(0x80c00000)); + taghi = _mm512_mask_mov_epi32(taghi, __mmask16(is12bhi), + _mm512_set1_epi32(0x80c00000)); + __m512i magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), + _mm512_set1_epi32(0xffffffff), + _mm512_set1_epi32(0x00010101)); + __m512i magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), + _mm512_set1_epi32(0xffffffff), + _mm512_set1_epi32(0x00010101)); + + magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), + _mm512_set1_epi32(0xffffffff), + _mm512_set1_epi32(0x00010101)); + magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), + _mm512_set1_epi32(0xffffffff), + _mm512_set1_epi32(0x00010101)); + + mslo = _mm512_ternarylogic_epi32(mslo, _mm512_set1_epi32(0x3f3f3f3f), taglo, + 0xea); // A&B|C + mshi = _mm512_ternarylogic_epi32(mshi, _mm512_set1_epi32(0x3f3f3f3f), taghi, + 0xea); + mslo = _mm512_mask_slli_epi32(mslo, __mmask16(is1byte), lo, 24); + + mshi = _mm512_mask_slli_epi32(mshi, __mmask16(is1bhi), hi, 24); + + const __mmask64 wantlo = + _mm512_cmp_epu8_mask(mslo, magiclo, _MM_CMPINT_NLT); + const __mmask64 wanthi = + _mm512_cmp_epu8_mask(mshi, magichi, _MM_CMPINT_NLT); + const __m512i outlo = _mm512_maskz_compress_epi8(wantlo, mslo); + const __m512i outhi = _mm512_maskz_compress_epi8(wanthi, mshi); + const uint64_t wantlo_uint64 = _cvtmask64_u64(wantlo); + const uint64_t wanthi_uint64 = _cvtmask64_u64(wanthi); + + uint64_t advlo = _mm_popcnt_u64(wantlo_uint64); + uint64_t advhi = _mm_popcnt_u64(wanthi_uint64); + + _mm512_mask_storeu_epi8( + outbuf, _cvtu64_mask64(_pext_u64(wantlo_uint64, wantlo_uint64)), outlo); + _mm512_mask_storeu_epi8( + outbuf + advlo, _cvtu64_mask64(_pext_u64(wanthi_uint64, wanthi_uint64)), + outhi); + outbuf += advlo + advhi; + } + outbuf += -adjust; + +tail: + if (inlen != 0) { + // We must have inlen < 31. + inmask = _cvtu32_mask32((1U << inlen) - 1); + in = _mm512_maskz_loadu_epi16(inmask, inbuf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + adjust = (int)inlen - 31; + inlen = 0; + goto lastiteration; + } + *outlen = (outbuf - outbuf_orig) + adjust; + return ((inbuf - inbuf_orig) + adjust); +} +/* end file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ +/* begin file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ +// file included directly + +/* + Returns a pair: the first unprocessed byte from buf and utf32_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::tuple +convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *end = buf + len; + const __m512i v_fc00 = _mm512_set1_epi16((uint16_t)0xfc00); + const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800); + const __m512i v_dc00 = _mm512_set1_epi16((uint16_t)0xdc00); + __mmask32 carry{0}; + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + while (std::distance(buf, end) >= 32) { + // Always safe because buf + 32 <= end so that end - buf >= 32 bytes: + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (big_endian) { + in = _mm512_shuffle_epi8(in, byteflip); + } + + // H - bitmask for high surrogates + const __mmask32 H = + _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_d800); + // H - bitmask for low surrogates + const __mmask32 L = + _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_dc00); + + if ((H | L)) { + // surrogate pair(s) in a register + const __mmask32 V = + (L ^ + (carry | (H << 1))); // A high surrogate must be followed by low one + // and a low one must be preceded by a high one. + // If valid, V should be equal to 0 + + if (V == 0) { + // valid case + /* + Input surrogate pair: + |1101.11aa.aaaa.aaaa|1101.10bb.bbbb.bbbb| + low surrogate high surrogate + */ + /* 1. Expand all code units to 32-bit code units + in + |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb| + */ + const __m512i first = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)); + const __m512i second = + _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1)); + + /* 2. Shift by one 16-bit word to align low surrogates with high + surrogates in + |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb| + shifted + |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa| + */ + const __m512i shifted_first = _mm512_alignr_epi32(second, first, 1); + const __m512i shifted_second = + _mm512_alignr_epi32(_mm512_setzero_si512(), second, 1); + + /* 3. Align all high surrogates in first and second by shifting to the + left by 10 bits + |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000| + */ + const __m512i aligned_first = + _mm512_mask_slli_epi32(first, (__mmask16)H, first, 10); + const __m512i aligned_second = + _mm512_mask_slli_epi32(second, (__mmask16)(H >> 16), second, 10); + + /* 4. Remove surrogate prefixes and add offset 0x10000 by adding in, + shifted and constant in + |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000| + shifted + |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa| + constant|1111.1100.1010.0000.0010.0100.0000.0000|1111.1100.1010.0000.0010.0100.0000.0000| + */ + const __m512i constant = _mm512_set1_epi32((uint32_t)0xfca02400); + const __m512i added_first = _mm512_mask_add_epi32( + aligned_first, (__mmask16)H, aligned_first, shifted_first); + const __m512i utf32_first = _mm512_mask_add_epi32( + added_first, (__mmask16)H, added_first, constant); + + const __m512i added_second = + _mm512_mask_add_epi32(aligned_second, (__mmask16)(H >> 16), + aligned_second, shifted_second); + const __m512i utf32_second = _mm512_mask_add_epi32( + added_second, (__mmask16)(H >> 16), added_second, constant); + + // 5. Store all valid UTF-32 code units (low surrogate positions and + // 32nd word are invalid) + const __mmask32 valid = ~L & 0x7fffffff; + // We deliberately do a _mm512_maskz_compress_epi32 followed by + // storeu_epi32 to ease performance portability to Zen 4. + const __m512i compressed_first = + _mm512_maskz_compress_epi32((__mmask16)(valid), utf32_first); + const size_t howmany1 = count_ones((uint16_t)(valid)); + _mm512_storeu_si512((__m512i *)utf32_output, compressed_first); + utf32_output += howmany1; + const __m512i compressed_second = + _mm512_maskz_compress_epi32((__mmask16)(valid >> 16), utf32_second); + const size_t howmany2 = count_ones((uint16_t)(valid >> 16)); + // The following could be unsafe in some cases? + //_mm512_storeu_epi32((__m512i *) utf32_output, compressed_second); + _mm512_mask_storeu_epi32((__m512i *)utf32_output, + __mmask16((1 << howmany2) - 1), + compressed_second); + utf32_output += howmany2; + // Only process 31 code units, but keep track if the 31st word is a high + // surrogate as a carry + buf += 31; + carry = (H >> 30) & 0x1; + } else { + // invalid case + return std::make_tuple(buf + carry, utf32_output, false); + } + } else { + // no surrogates + // extend all thirty-two 16-bit code units to thirty-two 32-bit code units + _mm512_storeu_si512((__m512i *)(utf32_output), + _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in))); + _mm512_storeu_si512( + (__m512i *)(utf32_output) + 1, + _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1))); + utf32_output += 32; + buf += 32; + carry = 0; + } + } // while + return std::make_tuple(buf + carry, utf32_output, true); +} +/* end file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ +/* begin file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ +// file included directly +size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + __m512i v_0xFF = _mm512_set1_epi32(0xff); + __m512i shufmask = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, + 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0); + while (end - buf >= 16) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { + return 0; } + _mm_storeu_si128( + (__m128i *)latin1_output, + _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); + latin1_output += 16; + buf += 16; } - // process the tail byte-by-byte - for (; pos < len; pos++) { - if (data[pos] >= 0b10000000) { - return false; + if (buf < end) { + uint16_t mask = uint16_t((1 << (end - buf)) - 1); + __m512i in = _mm512_maskz_loadu_epi32(mask, buf); + if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { + return 0; } + _mm_mask_storeu_epi8( + latin1_output, mask, + _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); } - return true; + return len; } -#endif -inline simdutf_warn_unused result validate_with_errors(const char *buf, - size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - // process in blocks of 16 bytes when possible - for (; pos + 16 <= len; pos += 16) { - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) != 0) { - for (; pos < len; pos++) { - if (data[pos] >= 0b10000000) { - return result(error_code::TOO_LARGE, pos); - } +std::pair +icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + const char32_t *start = buf; + __m512i v_0xFF = _mm512_set1_epi32(0xff); + __m512i shufmask = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, + 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0); + while (end - buf >= 16) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { + while (uint32_t(*buf) <= 0xff) { + *latin1_output++ = uint8_t(*buf++); } + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + latin1_output); } + _mm_storeu_si128( + (__m128i *)latin1_output, + _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); + latin1_output += 16; + buf += 16; } - // process the tail byte-by-byte - for (; pos < len; pos++) { - if (data[pos] >= 0b10000000) { - return result(error_code::TOO_LARGE, pos); + if (buf < end) { + uint16_t mask = uint16_t((1 << (end - buf)) - 1); + __m512i in = _mm512_maskz_loadu_epi32(mask, buf); + if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { + while (uint32_t(*buf) <= 0xff) { + *latin1_output++ = uint8_t(*buf++); + } + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + latin1_output); } + _mm_mask_storeu_epi8( + latin1_output, mask, + _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); } - return result(error_code::SUCCESS, pos); + return std::make_pair(result(error_code::SUCCESS, len), latin1_output); } +/* end file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ +/* begin file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ +// file included directly -} // namespace ascii -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +// Todo: currently, this is just the haswell code, optimize for icelake kernel. +std::pair +avx512_convert_utf32_to_utf8(const char32_t *buf, size_t len, + char *utf8_output) { + const char32_t *end = buf + len; + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); + const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); + const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); + const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); + __m256i running_max = _mm256_setzero_si256(); + __m256i forbidden_bytemask = _mm256_setzero_si256(); -#endif -/* end file src/scalar/ascii.h */ -/* begin file src/scalar/latin1.h */ -#ifndef SIMDUTF_LATIN1_H -#define SIMDUTF_LATIN1_H + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 -namespace simdutf { -namespace scalar { -namespace { -namespace latin1 { + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); + running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin); -inline size_t utf32_length_from_latin1(size_t len) { - // We are not BOM aware. - return len; // a utf32 unit will always represent 1 latin1 character -} + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), + _mm256_and_si256(nextin, v_7fffffff)); + in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); -inline size_t utf8_length_from_latin1(const char *buf, size_t len) { - const uint8_t *c = reinterpret_cast(buf); - size_t answer = 0; - for (size_t i = 0; i < len; i++) { - if ((c[i] >> 7)) { - answer++; + // Try to apply UTF-16 => UTF-8 routine on 256 bits + // (haswell/avx2_convert_utf16_to_utf8.cpp) + + if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! } - } - return answer + len; -} + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); -inline size_t utf16_length_from_latin1(size_t len) { return len; } + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); -} // namespace latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in_16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in_16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); -#endif -/* end file src/scalar/latin1.h */ + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); -/* begin file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ -#ifndef SIMDUTF_VALID_UTF32_TO_UTF8_H -#define SIMDUTF_VALID_UTF32_TO_UTF8_H + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf8 { + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; -#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 -// only used by the fallback and POWER kernel -inline size_t convert_valid(const char32_t *buf, size_t len, - char *utf8_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 2 ASCII characters - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF80FFFFFF80) == 0) { - *utf8_output++ = char(buf[pos]); - *utf8_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t word = data[pos]; - if ((word & 0xFFFFFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xFFFFF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xFFFF0000) == 0) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + buf += 16; + continue; } - } - return utf8_output - start; -} -#endif // SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 + // Must check for overflow in packing + const __m256i saturation_bytemask = _mm256_cmpeq_epi32( + _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffffffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); + forbidden_bytemask = _mm256_or_si256( + forbidden_bytemask, + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800)); -} // namespace utf32_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#endif -/* end file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ -/* begin file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ -#ifndef SIMDUTF_UTF32_TO_UTF8_H -#define SIMDUTF_UTF32_TO_UTF8_H + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf8 { + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. -inline size_t convert(const char32_t *buf, size_t len, char *utf8_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 2 ASCII characters - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF80FFFFFF80) == 0) { - *utf8_output++ = char(buf[pos]); - *utf8_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t word = data[pos]; - if ((word & 0xFFFFFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xFFFFF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xFFFF0000) == 0) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - if (word >= 0xD800 && word <= 0xDFFF) { - return 0; - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - if (word > 0x10FFFF) { - return 0; - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } - } - return utf8_output - start; -} + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. -inline result convert_with_errors(const char32_t *buf, size_t len, - char *utf8_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 2 ASCII characters - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF80FFFFFF80) == 0) { - *utf8_output++ = char(buf[pos]); - *utf8_output++ = char(buf[pos + 1]); - pos += 2; + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; continue; - } - } - uint32_t word = data[pos]; - if ((word & 0xFFFFFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xFFFFF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xFFFF0000) == 0) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); + + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; } else { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); + // case: at least one 32-bit word is larger than 0xFFFF <=> it will + // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD may require + // large, non-trivial tables? + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { // 2-byte + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { // 3-byte + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { // 4-byte + if (word > 0x10FFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; + buf += k; } + } // while + + // check for invalid input + const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); + if (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi32( + _mm256_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffffffff) { + return std::make_pair(nullptr, utf8_output); } - return result(error_code::SUCCESS, utf8_output - start); + + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, utf8_output); + } + + return std::make_pair(buf, utf8_output); } -} // namespace utf32_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +// Todo: currently, this is just the haswell code, optimize for icelake kernel. +std::pair +avx512_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_output) { + const char32_t *end = buf + len; + const char32_t *start = buf; -#endif -/* end file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); + const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); + const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); + const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); + const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); -/* begin file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ -#ifndef SIMDUTF_VALID_UTF32_TO_UTF16_H -#define SIMDUTF_VALID_UTF32_TO_UTF16_H + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf16 { + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); + // Check for too large input + const __m256i max_input = + _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff); + if (static_cast(_mm256_movemask_epi8( + _mm256_cmpeq_epi32(max_input, v_10ffff))) != 0xffffffff) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + utf8_output); + } -template -inline size_t convert_valid(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - uint32_t word = data[pos]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(uint16_t(word))) - : char16_t(word); - pos++; - } else { - // will generate a surrogate pair - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos++; + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), + _mm256_and_si256(nextin, v_7fffffff)); + in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); + + // Try to apply UTF-16 => UTF-8 routine on 256 bits + // (haswell/avx2_convert_utf16_to_utf8.cpp) + + if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! } - } - return utf16_output - start; -} + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); -} // namespace utf32_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); -#endif -/* end file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ -/* begin file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ -#ifndef SIMDUTF_UTF32_TO_UTF16_H -#define SIMDUTF_UTF32_TO_UTF16_H + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in_16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in_16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf16 { + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); -template -inline size_t convert(const char32_t *buf, size_t len, char16_t *utf16_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - uint32_t word = data[pos]; - if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return 0; - } - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(uint16_t(word))) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return 0; - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes + + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; + + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + buf += 16; + continue; } - pos++; - } - return utf16_output - start; -} + // Must check for overflow in packing + const __m256i saturation_bytemask = _mm256_cmpeq_epi32( + _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffffffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes -template -inline result convert_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - uint32_t word = data[pos]; - if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); - } - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(uint16_t(word))) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); + // Check for illegal surrogate code units + const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); + const __m256i forbidden_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800); + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != + 0x0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + utf8_output); } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - pos++; - } - return result(error_code::SUCCESS, utf16_output - start); -} -} // namespace utf32_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#endif -/* end file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes -/* begin file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ -#ifndef SIMDUTF_VALID_UTF16_TO_UTF8_H -#define SIMDUTF_VALID_UTF16_TO_UTF8_H + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf8 { + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. -template -inline size_t convert_valid(const char16_t *buf, size_t len, - char *utf8_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 4 ASCII characters - if (pos + 4 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if (!match_system(big_endian)) { - v = (v >> 8) | (v << (64 - 8)); + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec + + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); + + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + } else { + // case: at least one 32-bit word is larger than 0xFFFF <=> it will + // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD may require + // large, non-trivial tables? + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); } - if ((v & 0xFF80FF80FF80FF80) == 0) { - size_t final_pos = pos + 4; - while (pos < final_pos) { - *utf8_output++ = !match_system(big_endian) - ? char(utf16::swap_bytes(buf[pos])) - : char(buf[pos]); - pos++; + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { // 2-byte + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { // 3-byte + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { // 4-byte + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); } - continue; } + buf += k; } + } // while - uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xF800) != 0xD800) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - uint32_t value = (diff << 10) + diff2 + 0x10000; - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - pos += 2; - } - } - return utf8_output - start; + return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); } +/* end file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ +/* begin file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ +// file included directly + +// Todo: currently, this is just the haswell code, optimize for icelake kernel. +template +std::pair +avx512_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const char32_t *end = buf + len; -} // namespace utf16_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + __m256i forbidden_bytemask = _mm256_setzero_si256(); -#endif -/* end file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ -/* begin file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ -#ifndef SIMDUTF_UTF16_TO_UTF8_H -#define SIMDUTF_UTF16_TO_UTF8_H + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf8 { + const __m256i v_00000000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); -template -inline size_t convert(const char16_t *buf, size_t len, char *utf8_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 8 bytes - if (pos + 4 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if (!match_system(big_endian)) { - v = (v >> 8) | (v << (64 - 8)); - } - if ((v & 0xFF80FF80FF80FF80) == 0) { - size_t final_pos = pos + 4; - while (pos < final_pos) { - *utf8_output++ = !match_system(big_endian) - ? char(utf16::swap_bytes(buf[pos])) - : char(buf[pos]); - pos++; - } - continue; + // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs + const __m256i saturation_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + + if (saturation_bitmask == 0xffffffff) { + const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + forbidden_bytemask = _mm256_or_si256( + forbidden_bytemask, + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800)); + + __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), + _mm256_extractf128_si256(in, 1)); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); } - } - uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xF800) != 0xD800) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; + _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); + utf16_output += 8; + buf += 8; } else { - // must be a surrogate pair - if (pos + 1 >= len) { - return 0; - } - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return 0; + size_t forward = 7; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); } - uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return 0; + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, utf16_output); + } + *utf16_output++ = + big_endian + ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, utf16_output); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (big_endian) { + high_surrogate = + uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); + low_surrogate = + uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } } - uint32_t value = (diff << 10) + diff2 + 0x10000; - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - pos += 2; + buf += k; } } - return utf8_output - start; + + // check for invalid input + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, utf16_output); + } + + return std::make_pair(buf, utf16_output); } +// Todo: currently, this is just the haswell code, optimize for icelake kernel. template -inline result convert_with_errors(const char16_t *buf, size_t len, - char *utf8_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 8 bytes - if (pos + 4 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if (!match_system(big_endian)) - v = (v >> 8) | (v << (64 - 8)); - if ((v & 0xFF80FF80FF80FF80) == 0) { - size_t final_pos = pos + 4; - while (pos < final_pos) { - *utf8_output++ = !match_system(big_endian) - ? char(utf16::swap_bytes(buf[pos])) - : char(buf[pos]); - pos++; - } - continue; +std::pair +avx512_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; + + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + + const __m256i v_00000000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); + + // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs + const __m256i saturation_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + + if (saturation_bitmask == 0xffffffff) { + const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + const __m256i forbidden_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800); + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != + 0x0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + utf16_output); } - } - uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xF800) != 0xD800) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // must be a surrogate pair - if (pos + 1 >= len) { - return result(error_code::SURROGATE, pos); + + __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), + _mm256_extractf128_si256(in, 1)); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); } - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return result(error_code::SURROGATE, pos); + _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 7; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); } - uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return result(error_code::SURROGATE, pos); + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), utf16_output); + } + *utf16_output++ = + big_endian + ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), utf16_output); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (big_endian) { + high_surrogate = + uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); + low_surrogate = + uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } } - uint32_t value = (diff << 10) + diff2 + 0x10000; - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - pos += 2; + buf += k; } } - return result(error_code::SUCCESS, utf8_output - start); + + return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); } +/* end file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ +/* begin file src/icelake/icelake_ascii_validation.inl.cpp */ +// file included directly -} // namespace utf16_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +bool validate_ascii(const char *buf, size_t len) { + const char *end = buf + len; + const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80); + __m512i running_or = _mm512_setzero_si512(); + for (; end - buf >= 64; buf += 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)buf); + running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, + 0xf8); // running_or | (utf8 & ascii) + } + if (buf < end) { + const __m512i utf8 = _mm512_maskz_loadu_epi8( + (uint64_t(1) << (end - buf)) - 1, (const __m512i *)buf); + running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, + 0xf8); // running_or | (utf8 & ascii) + } + return (_mm512_test_epi8_mask(running_or, running_or) == 0); +} +/* end file src/icelake/icelake_ascii_validation.inl.cpp */ +/* begin file src/icelake/icelake_utf32_validation.inl.cpp */ +// file included directly -#endif -/* end file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ +const char32_t *validate_utf32(const char32_t *buf, size_t len) { + if (len < 16) { + return buf; + } + const char32_t *end = buf + len - 16; -/* begin file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ -#ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H -#define SIMDUTF_VALID_UTF16_TO_UTF32_H + const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000); + __m512i currentmax = _mm512_setzero_si512(); + __m512i currentoffsetmax = _mm512_setzero_si512(); -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf32 { + while (buf <= end) { + __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); + buf += 16; + currentoffsetmax = + _mm512_max_epu32(_mm512_add_epi32(utf32, offset), currentoffsetmax); + currentmax = _mm512_max_epu32(utf32, currentmax); + } -template -inline size_t convert_valid(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair, extend 16-bit word to 32-bit word - *utf32_output++ = char32_t(word); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - pos += 2; - } + const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); + const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); + __m512i is_zero = + _mm512_xor_si512(_mm512_max_epu32(currentmax, standardmax), standardmax); + if (_mm512_test_epi8_mask(is_zero, is_zero) != 0) { + return nullptr; } - return utf32_output - start; + is_zero = _mm512_xor_si512( + _mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); + if (_mm512_test_epi8_mask(is_zero, is_zero) != 0) { + return nullptr; + } + + return buf; } +/* end file src/icelake/icelake_utf32_validation.inl.cpp */ +/* begin file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ +// file included directly -} // namespace utf16_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +static inline size_t latin1_to_utf8_avx512_vec(__m512i input, size_t input_len, + char *utf8_output, + int mask_output) { + __mmask64 nonascii = _mm512_movepi8_mask(input); + size_t output_size = input_len + (size_t)count_ones(nonascii); -#endif -/* end file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ -/* begin file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ -#ifndef SIMDUTF_UTF16_TO_UTF32_H -#define SIMDUTF_UTF16_TO_UTF32_H + // Mask to denote whether the byte is a leading byte that is not ascii + __mmask64 sixth = _mm512_cmpge_epu8_mask( + input, _mm512_set1_epi8(-64)); // binary representation of -64: 1100 0000 -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf32 { + const uint64_t alternate_bits = UINT64_C(0x5555555555555555); + uint64_t ascii = ~nonascii; + // the bits in ascii are inverted and zeros are interspersed in between them + uint64_t maskA = ~_pdep_u64(ascii, alternate_bits); + uint64_t maskB = ~_pdep_u64(ascii >> 32, alternate_bits); -template -inline size_t convert(const char16_t *buf, size_t len, char32_t *utf32_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair, extend 16-bit word to 32-bit word - *utf32_output++ = char32_t(word); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return 0; - } - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return 0; - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - pos += 2; - } - } - return utf32_output - start; -} + // interleave bytes from top and bottom halves (abcd...ABCD -> aAbBcCdD) + __m512i input_interleaved = _mm512_permutexvar_epi8( + _mm512_set_epi32(0x3f1f3e1e, 0x3d1d3c1c, 0x3b1b3a1a, 0x39193818, + 0x37173616, 0x35153414, 0x33133212, 0x31113010, + 0x2f0f2e0e, 0x2d0d2c0c, 0x2b0b2a0a, 0x29092808, + 0x27072606, 0x25052404, 0x23032202, 0x21012000), + input); -template -inline result convert_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair, extend 16-bit word to 32-bit word - *utf32_output++ = char32_t(word); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - if (pos + 1 >= len) { - return result(error_code::SURROGATE, pos); - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? utf16::swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - pos += 2; - } - } - return result(error_code::SUCCESS, utf32_output - start); -} + // double size of each byte, and insert the leading byte 1100 0010 -} // namespace utf16_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + /* + upscale the bytes to 16-bit value, adding the 0b11000000 leading byte in the + process. We adjust for the bytes that have their two most significant bits. + This takes care of the first 32 bytes, assuming we interleaved the bytes. */ + __m512i outputA = + _mm512_shldi_epi16(input_interleaved, _mm512_set1_epi8(-62), 8); + outputA = _mm512_mask_add_epi16( + outputA, (__mmask32)sixth, outputA, + _mm512_set1_epi16(1 - 0x4000)); // 1- 0x4000 = 1100 0000 0000 0001???? -#endif -/* end file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ + // in the second 32-bit half, set first or second option based on whether + // original input is leading byte (second case) or not (first case) + __m512i leadingB = + _mm512_mask_blend_epi16((__mmask32)(sixth >> 32), + _mm512_set1_epi16(0x00c2), // 0000 0000 1101 0010 + _mm512_set1_epi16(0x40c3)); // 0100 0000 1100 0011 + __m512i outputB = _mm512_ternarylogic_epi32( + input_interleaved, leadingB, _mm512_set1_epi16((short)0xff00), + (240 & 170) ^ 204); // (input_interleaved & 0xff00) ^ leadingB -/* begin file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ -#ifndef SIMDUTF_VALID_UTF8_TO_UTF16_H -#define SIMDUTF_VALID_UTF8_TO_UTF16_H + // prune redundant bytes + outputA = _mm512_maskz_compress_epi8(maskA, outputA); + outputB = _mm512_maskz_compress_epi8(maskB, outputB); -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf16 { + size_t output_sizeA = (size_t)count_ones((uint32_t)nonascii) + 32; -template -inline size_t convert_valid(const char *buf, size_t len, - char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - // try to convert the next block of 8 ASCII bytes - if (pos + 8 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 8; - while (pos < final_pos) { - *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(buf[pos])) - : char16_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(leading_byte)) - : char16_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 1 >= len) { - break; - } // minimal bound checking - uint16_t code_point = uint16_t(((leading_byte & 0b00011111) << 6) | - (data[pos + 1] & 0b00111111)); - if (!match_system(big_endian)) { - code_point = utf16::swap_bytes(uint16_t(code_point)); - } - *utf16_output++ = char16_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 2 >= len) { - break; - } // minimal bound checking - uint16_t code_point = uint16_t(((leading_byte & 0b00001111) << 12) | - ((data[pos + 1] & 0b00111111) << 6) | - (data[pos + 2] & 0b00111111)); - if (!match_system(big_endian)) { - code_point = utf16::swap_bytes(uint16_t(code_point)); - } - *utf16_output++ = char16_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - break; - } // minimal bound checking - uint32_t code_point = ((leading_byte & 0b00000111) << 18) | - ((data[pos + 1] & 0b00111111) << 12) | - ((data[pos + 2] & 0b00111111) << 6) | - (data[pos + 3] & 0b00111111); - code_point -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos += 4; + if (mask_output) { + if (input_len > 32) { // is the second half of the input vector used? + __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_sizeA); + _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA); + utf8_output += output_sizeA; + write_mask = _bzhi_u64(~0ULL, (unsigned int)(output_size - output_sizeA)); + _mm512_mask_storeu_epi8(utf8_output, write_mask, outputB); } else { - // we may have a continuation but we do not do error checking - return 0; + __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_size); + _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA); } + } else { + _mm512_storeu_si512(utf8_output, outputA); + utf8_output += output_sizeA; + _mm512_storeu_si512(utf8_output, outputB); } - return utf16_output - start; + return output_size; } -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +static inline size_t latin1_to_utf8_avx512_branch(__m512i input, + char *utf8_output) { + __mmask64 nonascii = _mm512_movepi8_mask(input); + if (nonascii) { + return latin1_to_utf8_avx512_vec(input, 64, utf8_output, 0); + } else { + _mm512_storeu_si512(utf8_output, input); + return 64; + } +} -#endif -/* end file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ -/* begin file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ -#ifndef SIMDUTF_UTF8_TO_UTF16_H -#define SIMDUTF_UTF8_TO_UTF16_H +size_t latin1_to_utf8_avx512_start(const char *buf, size_t len, + char *utf8_output) { + char *start = utf8_output; + size_t pos = 0; + // if there's at least 128 bytes remaining, we don't need to mask the output + for (; pos + 128 <= len; pos += 64) { + __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos)); + utf8_output += latin1_to_utf8_avx512_branch(input, utf8_output); + } + // in the last 128 bytes, the first 64 may require masking the output + if (pos + 64 <= len) { + __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos)); + utf8_output += latin1_to_utf8_avx512_vec(input, 64, utf8_output, 1); + pos += 64; + } + // with the last 64 bytes, the input also needs to be masked + if (pos < len) { + __mmask64 load_mask = _bzhi_u64(~0ULL, (unsigned int)(len - pos)); + __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)(buf + pos)); + utf8_output += latin1_to_utf8_avx512_vec(input, len - pos, utf8_output, 1); + } + return (size_t)(utf8_output - start); +} +/* end file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ +/* begin file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ +// file included directly +template +size_t icelake_convert_latin1_to_utf16(const char *latin1_input, size_t len, + char16_t *utf16_output) { + size_t rounded_len = len & ~0x1F; // Round down to nearest multiple of 32 -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf16 { + __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + for (size_t i = 0; i < rounded_len; i += 32) { + // Load 32 Latin1 characters into a 256-bit register + __m256i in = _mm256_loadu_si256((__m256i *)&latin1_input[i]); + // Zero extend each set of 8 Latin1 characters to 32 16-bit integers + __m512i out = _mm512_cvtepu8_epi16(in); + if (big_endian) { + out = _mm512_shuffle_epi8(out, byteflip); + } + // Store the results back to memory + _mm512_storeu_si512((__m512i *)&utf16_output[i], out); + } + if (rounded_len != len) { + uint32_t mask = uint32_t(1 << (len - rounded_len)) - 1; + __m256i in = _mm256_maskz_loadu_epi8(mask, latin1_input + rounded_len); -template -inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(buf[pos])) - : char16_t(buf[pos]); - pos++; - } - continue; - } + // Zero extend each set of 8 Latin1 characters to 32 16-bit integers + __m512i out = _mm512_cvtepu8_epi16(in); + if (big_endian) { + out = _mm512_shuffle_epi8(out, byteflip); } + // Store the results back to memory + _mm512_mask_storeu_epi16(utf16_output + rounded_len, mask, out); + } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(leading_byte)) - : char16_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return 0; - } - if (!match_system(big_endian)) { - code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 2 >= len) { - return 0; - } // minimal bound checking + return len; +} +/* end file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ +/* begin file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ +std::pair +avx512_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point || - (0xd7ff < code_point && code_point < 0xe000)) { - return 0; - } - if (!match_system(big_endian)) { - code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return 0; - } + for (size_t i = 0; i < rounded_len; i += 16) { + // Load 16 Latin1 characters into a 128-bit register + __m128i in = _mm_loadu_si128((__m128i *)&buf[i]); - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return 0; - } - code_point -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos += 4; - } else { - return 0; - } + // Zero extend each set of 8 Latin1 characters to 16 32-bit integers using + // vpmovzxbd + __m512i out = _mm512_cvtepu8_epi32(in); + + // Store the results back to memory + _mm512_storeu_si512((__m512i *)&utf32_output[i], out); } - return utf16_output - start; + + // Return pointers pointing to where we left off + return std::make_pair(buf + rounded_len, utf32_output + rounded_len); } +/* end file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ +/* begin file src/icelake/icelake_base64.inl.cpp */ +// file included directly +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ -template -inline result convert_with_errors(const char *buf, size_t len, - char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(buf[pos])) - : char16_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf16_output++ = !match_system(big_endian) - ? char16_t(utf16::swap_bytes(leading_byte)) - : char16_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 1 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return result(error_code::OVERLONG, pos); - } - if (!match_system(big_endian)) { - code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 2 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking +struct block64 { + __m512i chunks[1]; +}; - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point)) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - if (!match_system(big_endian)) { - code_point = uint32_t(utf16::swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + // credit: Wojciech Muła + const uint8_t *input = (const uint8_t *)src; - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - code_point -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = utf16::swap_bytes(high_surrogate); - low_surrogate = utf16::swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos += 4; - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((leading_byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } else { - return result(error_code::HEADER_BITS, pos); - } - } + uint8_t *out = (uint8_t *)dst; + static const char *lookup_tbl = + base64_url + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + const __m512i shuffle_input = _mm512_setr_epi32( + 0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, 0x0d0e0c0d, 0x10110f10, + 0x13141213, 0x16171516, 0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122, + 0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e); + const __m512i lookup = + _mm512_loadu_si512(reinterpret_cast(lookup_tbl)); + const __m512i multi_shifts = _mm512_set1_epi64(UINT64_C(0x3036242a1016040a)); + size_t size = srclen; + __mmask64 input_mask = 0xffffffffffff; // (1 << 48) - 1 + while (size >= 48) { + const __m512i v = _mm512_maskz_loadu_epi8( + input_mask, reinterpret_cast(input)); + const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); + const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); + const __m512i result = _mm512_permutexvar_epi8(indices, lookup); + _mm512_storeu_si512(reinterpret_cast<__m512i *>(out), result); + out += 64; + input += 48; + size -= 48; + } + input_mask = ((__mmask64)1 << size) - 1; + const __m512i v = _mm512_maskz_loadu_epi8( + input_mask, reinterpret_cast(input)); + const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); + const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); + bool padding_needed = + (((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding)); + size_t padding_amount = ((size % 3) > 0) ? (3 - (size % 3)) : 0; + size_t output_len = ((size + 2) / 3) * 4; + size_t non_padded_output_len = output_len - padding_amount; + if (!padding_needed) { + output_len = non_padded_output_len; } - return result(error_code::SUCCESS, utf16_output - start); + __mmask64 output_mask = output_len == 64 ? (__mmask64)UINT64_MAX + : ((__mmask64)1 << output_len) - 1; + __m512i result = _mm512_mask_permutexvar_epi8( + _mm512_set1_epi8('='), ((__mmask64)1 << non_padded_output_len) - 1, + indices, lookup); + _mm512_mask_storeu_epi8(reinterpret_cast<__m512i *>(out), output_mask, + result); + return (size_t)(out - (uint8_t *)dst) + output_len; } -/** - * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and - * we have up to len input bytes left, and we encountered some error. It is - * possible that the error is at 'buf' exactly, but it could also be in the - * previous bytes (up to 3 bytes back). - * - * prior_bytes indicates how many bytes, prior to 'buf' may belong to the - * current memory section and can be safely accessed. We prior_bytes to access - * safely up to three bytes before 'buf'. - * - * The caller is responsible to ensure that len > 0. - * - * If the error is believed to have occurred prior to 'buf', the count value - * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. - */ -template -inline result rewind_and_convert_with_errors(size_t prior_bytes, - const char *buf, size_t len, - char16_t *utf16_output) { - size_t extra_len{0}; - // We potentially need to go back in time and find a leading byte. - // In theory '3' would be sufficient, but sometimes the error can go back - // quite far. - size_t how_far_back = prior_bytes; - // size_t how_far_back = 3; // 3 bytes in the past + current position - // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } - bool found_leading_bytes{false}; - // important: it is i <= how_far_back and not 'i < how_far_back'. - for (size_t i = 0; i <= how_far_back; i++) { - unsigned char byte = buf[-static_cast(i)]; - found_leading_bytes = ((byte & 0b11000000) != 0b10000000); - if (found_leading_bytes) { - if (i > 0 && byte < 128) { - // If we had to go back and the leading byte is ascii - // then we can stop right away. - return result(error_code::TOO_LONG, 0 - i + 1); - } - buf -= i; - extra_len = i; - break; - } +template +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error, + uint64_t input_mask = UINT64_MAX) { + __m512i input = b->chunks[0]; + const __m512i ascii_space_tbl = _mm512_set_epi8( + 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10, + 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, + 0, 0, 32, 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32); + __m512i lookup0; + if (base64_url) { + lookup0 = _mm512_set_epi8( + -128, -128, -128, -128, -128, -128, 61, 60, 59, 58, 57, 56, 55, 54, 53, + 52, -128, -128, 62, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -1, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -1, + -128, -128, -1, -1, -128, -128, -128, -128, -128, -128, -128, -128, -1); + } else { + lookup0 = _mm512_set_epi8( + -128, -128, -128, -128, -128, -128, 61, 60, 59, 58, 57, 56, 55, 54, 53, + 52, 63, -128, -128, -128, 62, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -1, -128, -128, -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -1, -128, + -128, -1, -1, -128, -128, -128, -128, -128, -128, -128, -128, -128); } - // - // It is possible for this function to return a negative count in its result. - // C++ Standard Section 18.1 defines size_t is in which is described - // in C Standard as . C Standard Section 4.1.5 defines size_t as an - // unsigned integral type of the result of the sizeof operator - // - // An unsigned type will simply wrap round arithmetically (well defined). - // - if (!found_leading_bytes) { - // If how_far_back == 3, we may have four consecutive continuation bytes!!! - // [....] [continuation] [continuation] [continuation] | [buf is - // continuation] Or we possibly have a stream that does not start with a - // leading byte. - return result(error_code::TOO_LONG, 0 - how_far_back); + __m512i lookup1; + if (base64_url) { + lookup1 = _mm512_set_epi8( + -128, -128, -128, -128, -128, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, + 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, -128, + 63, -128, -128, -128, -128, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, + 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -128); + } else { + lookup1 = _mm512_set_epi8( + -128, -128, -128, -128, -128, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, + 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, -128, + -128, -128, -128, -128, -128, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -128); } - result res = convert_with_errors(buf, len + extra_len, utf16_output); - if (res.error) { - res.count -= extra_len; + + const __m512i translated = _mm512_permutex2var_epi8(lookup0, input, lookup1); + const __m512i combined = _mm512_or_si512(translated, input); + const __mmask64 mask = _mm512_movepi8_mask(combined) & input_mask; + if (mask) { + const __mmask64 spaces = + _mm512_cmpeq_epi8_mask(_mm512_shuffle_epi8(ascii_space_tbl, input), + input) & + input_mask; + *error = (mask ^ spaces); } - return res; -} + b->chunks[0] = translated; -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + return mask | (~input_mask); +} -#endif -/* end file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ +static inline void copy_block(block64 *b, char *output) { + _mm512_storeu_si512(reinterpret_cast<__m512i *>(output), b->chunks[0]); +} -/* begin file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ -#ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H -#define SIMDUTF_VALID_UTF8_TO_UTF32_H +static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { + uint64_t nmask = ~mask; + __m512i c = _mm512_maskz_compress_epi8(nmask, b->chunks[0]); + _mm512_storeu_si512(reinterpret_cast<__m512i *>(output), c); + return _mm_popcnt_u64(nmask); +} -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf32 { +// The caller of this function is responsible to ensure that there are 64 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char *src) { + b->chunks[0] = _mm512_loadu_si512(reinterpret_cast(src)); +} -inline size_t convert_valid(const char *buf, size_t len, - char32_t *utf32_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - // try to convert the next block of 8 ASCII bytes - if (pos + 8 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 8; - while (pos < final_pos) { - *utf32_output++ = char32_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf32_output++ = char32_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - break; - } // minimal bound checking - *utf32_output++ = char32_t(((leading_byte & 0b00011111) << 6) | - (data[pos + 1] & 0b00111111)); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - if (pos + 2 >= len) { - break; - } // minimal bound checking - *utf32_output++ = char32_t(((leading_byte & 0b00001111) << 12) | - ((data[pos + 1] & 0b00111111) << 6) | - (data[pos + 2] & 0b00111111)); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - break; - } // minimal bound checking - uint32_t code_word = ((leading_byte & 0b00000111) << 18) | - ((data[pos + 1] & 0b00111111) << 12) | - ((data[pos + 2] & 0b00111111) << 6) | - (data[pos + 3] & 0b00111111); - *utf32_output++ = char32_t(code_word); - pos += 4; - } else { - // we may have a continuation but we do not do error checking - return 0; - } - } - return utf32_output - start; +static inline void load_block_partial(block64 *b, const char *src, + __mmask64 input_mask) { + b->chunks[0] = _mm512_maskz_loadu_epi8( + input_mask, reinterpret_cast(src)); } -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +// The caller of this function is responsible to ensure that there are 128 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char16_t *src) { + __m512i m1 = _mm512_loadu_si512(reinterpret_cast(src)); + __m512i m2 = _mm512_loadu_si512(reinterpret_cast(src + 32)); + __m512i p = _mm512_packus_epi16(m1, m2); + b->chunks[0] = + _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), p); +} -#endif -/* end file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ -/* begin file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ -#ifndef SIMDUTF_UTF8_TO_UTF32_H -#define SIMDUTF_UTF8_TO_UTF32_H +static inline void load_block_partial(block64 *b, const char16_t *src, + __mmask64 input_mask) { + __m512i m1 = _mm512_maskz_loadu_epi16((__mmask32)input_mask, + reinterpret_cast(src)); + __m512i m2 = + _mm512_maskz_loadu_epi16((__mmask32)(input_mask >> 32), + reinterpret_cast(src + 32)); + __m512i p = _mm512_packus_epi16(m1, m2); + b->chunks[0] = + _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), p); +} -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf32 { +static inline void base64_decode(char *out, __m512i str) { + const __m512i merge_ab_and_bc = + _mm512_maddubs_epi16(str, _mm512_set1_epi32(0x01400140)); + const __m512i merged = + _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000)); + const __m512i pack = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 61, 62, 56, 57, 58, + 52, 53, 54, 48, 49, 50, 44, 45, 46, 40, 41, 42, 36, 37, 38, 32, 33, 34, + 28, 29, 30, 24, 25, 26, 20, 21, 22, 16, 17, 18, 12, 13, 14, 8, 9, 10, 4, + 5, 6, 0, 1, 2); + const __m512i shuffled = _mm512_permutexvar_epi8(pack, merged); + _mm512_mask_storeu_epi8( + (__m512i *)out, 0xffffffffffff, + shuffled); // mask would be 0xffffffffffff since we write 48 bytes. +} +// decode 64 bytes and output 48 bytes +static inline void base64_decode_block(char *out, const char *src) { + base64_decode(out, + _mm512_loadu_si512(reinterpret_cast(src))); +} +static inline void base64_decode_block(char *out, block64 *b) { + base64_decode(out, b->chunks[0]); +} -inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf32_output++ = char32_t(buf[pos]); - pos++; - } - continue; - } +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + (void)options; + const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + size_t equallocation = + srclen; // location of the first padding character if any + size_t equalsigns = 0; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 1; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf32_output++ = char32_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return 0; - } - *utf32_output++ = char32_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - if (pos + 2 >= len) { - return 0; - } // minimal bound checking + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 2; + } + } + if (srclen == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point || - (0xd7ff < code_point && code_point < 0xe000)) { - return 0; - } - *utf32_output++ = char32_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; + // figure out why block_size == 2 is sometimes best??? + constexpr size_t block_size = 6; + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b; + load_block(&b, src); + src += 64; + uint64_t error = 0; + uint64_t badcharmask = to_base64_mask(&b, &error); + if (error) { + src -= 64; + size_t error_offset = _tzcnt_u64(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return 0; + if (badcharmask != 0) { + // optimization opportunity: check for simple masks like those made of + // continuous 1s followed by continuous 0s. And masks containing a + // single bad character. + bufferptr += compress_block(&b, badcharmask, bufferptr); + } else if (bufferptr != buffer) { + copy_block(&b, bufferptr); + bufferptr += 64; + } else { + base64_decode_block(dst, &b); + dst += 48; } - - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return 0; + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 1); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; } - *utf32_output++ = char32_t(code_point); - pos += 4; - } else { - return 0; } } - return utf32_output - start; -} -inline result convert_with_errors(const char *buf, size_t len, - char32_t *utf32_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf32_output++ = char32_t(buf[pos]); - pos++; - } - continue; - } + int last_block_len = (int)(srcend - src); + if (last_block_len != 0) { + __mmask64 input_mask = ((__mmask64)1 << last_block_len) - 1; + block64 b; + load_block_partial(&b, src, input_mask); + uint64_t error = 0; + uint64_t badcharmask = to_base64_mask(&b, &error, input_mask); + if (error) { + size_t error_offset = _tzcnt_u64(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf32_output++ = char32_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return result(error_code::OVERLONG, pos); - } - *utf32_output++ = char32_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - if (pos + 2 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking + src += last_block_len; + bufferptr += compress_block(&b, badcharmask, bufferptr); + } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - *utf32_output++ = char32_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } + char *buffer_start = buffer; + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + base64_decode_block(dst, buffer_start); + dst += 48; + } - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - *utf32_output++ = char32_t(code_point); - pos += 4; - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((leading_byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); + if ((bufferptr - buffer_start) != 0) { + size_t rem = (bufferptr - buffer_start); + int idx = rem % 4; + __mmask64 mask = ((__mmask64)1 << rem) - 1; + __m512i input = _mm512_maskz_loadu_epi8(mask, buffer_start); + size_t output_len = (rem / 4) * 3; + __mmask64 output_mask = mask >> (rem - output_len); + const __m512i merge_ab_and_bc = + _mm512_maddubs_epi16(input, _mm512_set1_epi32(0x01400140)); + const __m512i merged = + _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000)); + const __m512i pack = _mm512_set_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 61, 62, 56, 57, 58, + 52, 53, 54, 48, 49, 50, 44, 45, 46, 40, 41, 42, 36, 37, 38, 32, 33, 34, + 28, 29, 30, 24, 25, 26, 20, 21, 22, 16, 17, 18, 12, 13, 14, 8, 9, 10, 4, + 5, 6, 0, 1, 2); + const __m512i shuffled = _mm512_permutexvar_epi8(pack, merged); + + if (last_chunk_options == last_chunk_handling_options::strict && + (idx != 1) && ((idx + equalsigns) & 3) != 0) { + // The partial chunk was at src - idx + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial && + (idx != 1) && ((idx + equalsigns) & 3) != 0) { + // Rewind src to before partial chunk + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + src -= idx; + } else { + if (idx == 2) { + if (last_chunk_options == last_chunk_handling_options::strict) { + uint32_t triple = (uint32_t(bufferptr[-2]) << 3 * 6) + + (uint32_t(bufferptr[-1]) << 2 * 6); + if (triple & 0xffff) { + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + } + output_mask = (output_mask << 1) | 1; + output_len += 1; + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + } else if (idx == 3) { + if (last_chunk_options == last_chunk_handling_options::strict) { + uint32_t triple = (uint32_t(bufferptr[-3]) << 3 * 6) + + (uint32_t(bufferptr[-2]) << 2 * 6) + + (uint32_t(bufferptr[-1]) << 1 * 6); + if (triple & 0xff) { + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + } + output_mask = (output_mask << 2) | 3; + output_len += 2; + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + } else if (idx == 1) { + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; } else { - return result(error_code::HEADER_BITS, pos); + _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); + dst += output_len; } } - } - return result(error_code::SUCCESS, utf32_output - start); -} -/** - * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and - * we have up to len input bytes left, and we encountered some error. It is - * possible that the error is at 'buf' exactly, but it could also be in the - * previous bytes location (up to 3 bytes back). - * - * prior_bytes indicates how many bytes, prior to 'buf' may belong to the - * current memory section and can be safely accessed. We prior_bytes to access - * safely up to three bytes before 'buf'. - * - * The caller is responsible to ensure that len > 0. - * - * If the error is believed to have occurred prior to 'buf', the count value - * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. - */ -inline result rewind_and_convert_with_errors(size_t prior_bytes, - const char *buf, size_t len, - char32_t *utf32_output) { - size_t extra_len{0}; - // We potentially need to go back in time and find a leading byte. - size_t how_far_back = 3; // 3 bytes in the past + current position - if (how_far_back > prior_bytes) { - how_far_back = prior_bytes; - } - bool found_leading_bytes{false}; - // important: it is i <= how_far_back and not 'i < how_far_back'. - for (size_t i = 0; i <= how_far_back; i++) { - unsigned char byte = buf[-static_cast(i)]; - found_leading_bytes = ((byte & 0b11000000) != 0b10000000); - if (found_leading_bytes) { - if (i > 0 && byte < 128) { - // If we had to go back and the leading byte is ascii - // then we can stop right away. - return result(error_code::TOO_LONG, 0 - i + 1); + if (last_chunk_options != stop_before_partial && equalsigns > 0) { + size_t output_count = size_t(dst - dstinit); + if ((output_count % 3 == 0) || + ((output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, output_count}; } - buf -= i; - extra_len = i; - break; } - } - // - // It is possible for this function to return a negative count in its result. - // C++ Standard Section 18.1 defines size_t is in which is described - // in C Standard as . C Standard Section 4.1.5 defines size_t as an - // unsigned integral type of the result of the sizeof operator - // - // An unsigned type will simply wrap round arithmetically (well defined). - // - if (!found_leading_bytes) { - // If how_far_back == 3, we may have four consecutive continuation bytes!!! - // [....] [continuation] [continuation] [continuation] | [buf is - // continuation] Or we possibly have a stream that does not start with a - // leading byte. - return result(error_code::TOO_LONG, 0 - how_far_back); + + return {SUCCESS, srclen, size_t(dst - dstinit)}; } - result res = convert_with_errors(buf, len + extra_len, utf32_output); - if (res.error) { - res.count -= extra_len; + if (equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } } - return res; + return {SUCCESS, srclen, size_t(dst - dstinit)}; } +/* end file src/icelake/icelake_base64.inl.cpp */ -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ +#include -/* begin file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF16_H -#define SIMDUTF_LATIN1_TO_UTF16_H +} // namespace +} // namespace icelake +} // namespace simdutf namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf16 { - -template -inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; +namespace icelake { - while (pos < len) { - uint16_t word = - uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point - *utf16_output++ = - char16_t(match_system(big_endian) ? word : utf16::swap_bytes(word)); - pos++; +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + // todo: convert to a one-pass algorithm + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; } - - return utf16_output - start; + int out = 0; + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; + } + } + return out; } -template -inline result convert_with_errors(const char *buf, size_t len, - char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - - while (pos < len) { - uint16_t word = - uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point - *utf16_output++ = - char16_t(match_system(big_endian) ? word : utf16::swap_bytes(word)); - pos++; +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return true; } - - return result(error_code::SUCCESS, utf16_output - start); + avx512_utf8_checker checker{}; + const char *ptr = buf; + const char *end = ptr + len; + for (; end - ptr >= 64; ptr += 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + checker.check_next_input(utf8); + } + if (end != ptr) { + const __m512i utf8 = _mm512_maskz_loadu_epi8( + ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr); + checker.check_next_input(utf8); + } + checker.check_eof(); + return !checker.errors(); } -} // namespace latin1_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ -/* begin file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF32_H -#define SIMDUTF_LATIN1_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf32 { - -inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) { - const unsigned char *data = reinterpret_cast(buf); - char32_t *start{utf32_output}; - for (size_t i = 0; i < len; i++) { - *utf32_output++ = (char32_t)data[i]; +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, len); } - return utf32_output - start; + avx512_utf8_checker checker{}; + const char *ptr = buf; + const char *end = ptr + len; + size_t count{0}; + for (; end - ptr >= 64; ptr += 64) { + const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); + checker.check_next_input(utf8); + if (checker.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(buf), + reinterpret_cast(buf + count), len - count); + res.count += count; + return res; + } + count += 64; + } + if (end != ptr) { + const __m512i utf8 = _mm512_maskz_loadu_epi8( + ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr); + checker.check_next_input(utf8); + } + checker.check_eof(); + if (checker.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(buf), + reinterpret_cast(buf + count), len - count); + res.count += count; + return res; + } + return result(error_code::SUCCESS, len); } -} // namespace latin1_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ - -/* begin file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ -#ifndef SIMDUTF_UTF8_TO_LATIN1_H -#define SIMDUTF_UTF8_TO_LATIN1_H +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return icelake::validate_ascii(buf, len); +} -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_latin1 { +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + const char *buf_orig = buf; + const char *end = buf + len; + const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80); + for (; end - buf >= 64; buf += 64) { + const __m512i input = _mm512_loadu_si512((const __m512i *)buf); + __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT); + if (notascii) { + return result(error_code::TOO_LARGE, + buf - buf_orig + _tzcnt_u64(notascii)); + } + } + if (end != buf) { + const __m512i input = _mm512_maskz_loadu_epi8( + ~UINT64_C(0) >> (64 - (end - buf)), (const __m512i *)buf); + __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT); + if (notascii) { + return result(error_code::TOO_LARGE, + buf - buf_orig + _tzcnt_u64(notascii)); + } + } + return result(error_code::SUCCESS, len); +} -inline size_t convert(const char *buf, size_t len, char *latin_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + const char16_t *end = buf + len; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 - // 1000 1000 .... etc - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = char(buf[pos]); - pos++; - } - continue; + for (; end - buf >= 32;) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + return false; + } + bool ends_with_high = ((highsurrogates & 0x80000000) != 0); + if (ends_with_high) { + buf += 31; // advance only by 31 code units so that we start with the + // high surrogate on the next round. + } else { + buf += 32; + } + } else { + buf += 32; + } + } + if (buf < end) { + __m512i in = + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + return false; } } + } + return true; +} - // suppose it is not an all ASCII byte sequence - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *latin_output++ = char(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == - 0b11000000) { // the first three bits indicate: - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } // checks if the next byte is a valid continuation byte in UTF-8. A - // valid continuation byte starts with 10. - // range check - - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | - (data[pos + 1] & - 0b00111111); // assembles the Unicode code point from the two bytes. - // It does this by discarding the leading 110 and 10 - // bits from the two bytes, shifting the remaining bits - // of the first byte, and then combining the results - // with a bitwise OR operation. - if (code_point < 0x80 || 0xFF < code_point) { - return 0; // We only care about the range 129-255 which is Non-ASCII - // latin1 characters. A code_point beneath 0x80 is invalid as - // it is already covered by bytes whose leading bit is zero. +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + const char16_t *end = buf + len; + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + for (; end - buf >= 32;) { + __m512i in = + _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)buf), byteflip); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + return false; + } + bool ends_with_high = ((highsurrogates & 0x80000000) != 0); + if (ends_with_high) { + buf += 31; // advance only by 31 code units so that we start with the + // high surrogate on the next round. + } else { + buf += 32; } - *latin_output++ = char(code_point); - pos += 2; } else { - return 0; + buf += 32; } } - return latin_output - start; + if (buf < end) { + __m512i in = _mm512_shuffle_epi8( + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), + byteflip); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + return false; + } + } + } + return true; } -inline result convert_with_errors(const char *buf, size_t len, - char *latin_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 - // 1000 1000...etc - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = char(buf[pos]); - pos++; - } - continue; +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + const char16_t *start_buf = buf; + const char16_t *end = buf + len; + for (; end - buf >= 32;) { + __m512i in = _mm512_loadu_si512((__m512i *)buf); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); + uint32_t extra_high = + _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); + return result(error_code::SURROGATE, + (buf - start_buf) + + (extra_low < extra_high ? extra_low : extra_high)); } - } - // suppose it is not an all ASCII byte sequence - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *latin_output++ = char(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == - 0b11000000) { // the first three bits indicate: - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } // checks if the next byte is a valid continuation byte in UTF-8. A - // valid continuation byte starts with 10. - // range check - - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | - (data[pos + 1] & - 0b00111111); // assembles the Unicode code point from the two bytes. - // It does this by discarding the leading 110 and 10 - // bits from the two bytes, shifting the remaining bits - // of the first byte, and then combining the results - // with a bitwise OR operation. - if (code_point < 0x80) { - return result(error_code::OVERLONG, pos); + bool ends_with_high = ((highsurrogates & 0x80000000) != 0); + if (ends_with_high) { + buf += 31; // advance only by 31 code units so that we start with the + // high surrogate on the next round. + } else { + buf += 32; } - if (0xFF < code_point) { - return result(error_code::TOO_LARGE, pos); - } // We only care about the range 129-255 which is Non-ASCII latin1 - // characters - *latin_output++ = char(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - return result(error_code::TOO_LARGE, pos); - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - return result(error_code::TOO_LARGE, pos); } else { - // we either have too many continuation bytes or an invalid leading byte - if ((leading_byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); + buf += 32; + } + } + if (buf < end) { + __m512i in = + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); + uint32_t extra_high = + _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); + return result(error_code::SURROGATE, + (buf - start_buf) + + (extra_low < extra_high ? extra_low : extra_high)); } - - return result(error_code::HEADER_BITS, pos); } } - return result(error_code::SUCCESS, latin_output - start); + return result(error_code::SUCCESS, len); } -inline result rewind_and_convert_with_errors(size_t prior_bytes, - const char *buf, size_t len, - char *latin1_output) { - size_t extra_len{0}; - // We potentially need to go back in time and find a leading byte. - // In theory '3' would be sufficient, but sometimes the error can go back - // quite far. - size_t how_far_back = prior_bytes; - // size_t how_far_back = 3; // 3 bytes in the past + current position - // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } - bool found_leading_bytes{false}; - // important: it is i <= how_far_back and not 'i < how_far_back'. - for (size_t i = 0; i <= how_far_back; i++) { - unsigned char byte = buf[-static_cast(i)]; - found_leading_bytes = ((byte & 0b11000000) != 0b10000000); - if (found_leading_bytes) { - if (i > 0 && byte < 128) { - // If we had to go back and the leading byte is ascii - // then we can stop right away. - return result(error_code::TOO_LONG, 0 - i + 1); +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + const char16_t *start_buf = buf; + const char16_t *end = buf + len; + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + for (; end - buf >= 32;) { + __m512i in = + _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)buf), byteflip); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); + uint32_t extra_high = + _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); + return result(error_code::SURROGATE, + (buf - start_buf) + + (extra_low < extra_high ? extra_low : extra_high)); } - buf -= i; - extra_len = i; - break; + bool ends_with_high = ((highsurrogates & 0x80000000) != 0); + if (ends_with_high) { + buf += 31; // advance only by 31 code units so that we start with the + // high surrogate on the next round. + } else { + buf += 32; + } + } else { + buf += 32; } } - // - // It is possible for this function to return a negative count in its result. - // C++ Standard Section 18.1 defines size_t is in which is described - // in C Standard as . C Standard Section 4.1.5 defines size_t as an - // unsigned integral type of the result of the sizeof operator - // - // An unsigned type will simply wrap round arithmetically (well defined). - // - if (!found_leading_bytes) { - // If how_far_back == 3, we may have four consecutive continuation bytes!!! - // [....] [continuation] [continuation] [continuation] | [buf is - // continuation] Or we possibly have a stream that does not start with a - // leading byte. - return result(error_code::TOO_LONG, 0 - how_far_back); + if (buf < end) { + __m512i in = _mm512_shuffle_epi8( + _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), + byteflip); + __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + if (surrogates) { + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + // high must be followed by low + if ((highsurrogates << 1) != lowsurrogates) { + uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); + uint32_t extra_high = + _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); + return result(error_code::SURROGATE, + (buf - start_buf) + + (extra_low < extra_high ? extra_low : extra_high)); + } + } } - result res = convert_with_errors(buf, len + extra_len, latin1_output); - if (res.error) { - res.count -= extra_len; + return result(error_code::SUCCESS, len); +} + +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + const char32_t *tail = icelake::validate_utf32(buf, len); + if (tail) { + return scalar::utf32::validate(tail, len - (tail - buf)); + } else { + // we come here if there was an error, or buf was nullptr which may happen + // for empty input. + return len == 0; } - return res; } -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + const char32_t *buf_orig = buf; + if (len >= 16) { + const char32_t *end = buf + len - 16; + while (buf <= end) { + __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); + __mmask16 outside_range = _mm512_cmp_epu32_mask( + utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT); -#endif -/* end file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ -/* begin file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ -#ifndef SIMDUTF_UTF16_TO_LATIN1_H -#define SIMDUTF_UTF16_TO_LATIN1_H + __m512i utf32_off = + _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_latin1 { + __mmask16 surrogate_range = _mm512_cmp_epu32_mask( + utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT); + if ((outside_range | surrogate_range)) { + auto outside_idx = _tzcnt_u32(outside_range); + auto surrogate_idx = _tzcnt_u32(surrogate_range); -#include // for std::memcpy + if (outside_idx < surrogate_idx) { + return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); + } -template -inline size_t convert(const char16_t *buf, size_t len, char *latin_output) { - if (len == 0) { - return 0; - } - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *current_write = latin_output; - uint16_t word = 0; - uint16_t too_large = 0; + return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); + } - while (pos < len) { - word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; - too_large |= word; - *current_write++ = char(word & 0xFF); - pos++; + buf += 16; + } } - if ((too_large & 0xFF00) != 0) { - return 0; + if (len > 0) { + __m512i utf32 = _mm512_maskz_loadu_epi32( + __mmask16((1U << (buf_orig + len - buf)) - 1), (const __m512i *)buf); + __mmask16 outside_range = _mm512_cmp_epu32_mask( + utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT); + __m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); + + __mmask16 surrogate_range = _mm512_cmp_epu32_mask( + utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT); + if ((outside_range | surrogate_range)) { + auto outside_idx = _tzcnt_u32(outside_range); + auto surrogate_idx = _tzcnt_u32(surrogate_range); + + if (outside_idx < surrogate_idx) { + return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); + } + + return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); + } } - return current_write - latin_output; + return result(error_code::SUCCESS, len); } -template -inline result convert_with_errors(const char16_t *buf, size_t len, - char *latin_output) { - if (len == 0) { - return result(error_code::SUCCESS, 0); - } - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - uint16_t word; +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + return icelake::latin1_to_utf8_avx512_start(buf, len, utf8_output); +} - while (pos < len) { - if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that - // they are Latin1 - uint64_t v1, v2, v3, v4; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - ::memcpy(&v2, data + pos + 4, sizeof(uint64_t)); - ::memcpy(&v3, data + pos + 8, sizeof(uint64_t)); - ::memcpy(&v4, data + pos + 12, sizeof(uint64_t)); +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return icelake_convert_latin1_to_utf16(buf, len, + utf16_output); +} - if (!match_system(big_endian)) { - v1 = (v1 >> 8) | (v1 << (64 - 8)); - } - if (!match_system(big_endian)) { - v2 = (v2 >> 8) | (v2 << (64 - 8)); - } - if (!match_system(big_endian)) { - v3 = (v3 >> 8) | (v3 << (64 - 8)); - } - if (!match_system(big_endian)) { - v4 = (v4 >> 8) | (v4 << (64 - 8)); - } +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return icelake_convert_latin1_to_utf16(buf, len, + utf16_output); +} - if (((v1 | v2 | v3 | v4) & 0xFF00FF00FF00FF00) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = !match_system(big_endian) - ? char(utf16::swap_bytes(data[pos])) - : char(data[pos]); - pos++; - } - continue; - } - } - word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF00) == 0) { - *latin_output++ = char(word & 0xFF); - pos++; - } else { - return result(error_code::TOO_LARGE, pos); +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + avx512_convert_latin1_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t converted_chars = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; } + converted_chars += scalar_converted_chars; } - return result(error_code::SUCCESS, latin_output - start); + return converted_chars; } -} // namespace utf16_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return icelake::utf8_to_latin1_avx512(buf, len, latin1_output); +} -#endif -/* end file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ -/* begin file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ -#ifndef SIMDUTF_UTF32_TO_LATIN1_H -#define SIMDUTF_UTF32_TO_LATIN1_H +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + // First, try to convert as much as possible using the SIMD implementation. + const char *obuf = buf; + char *olatin1_output = latin1_output; + size_t written = icelake::utf8_to_latin1_avx512(obuf, len, olatin1_output); -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_latin1 { + // If we have completely converted the string + if (obuf == buf + len) { + return {simdutf::SUCCESS, written}; + } + size_t pos = obuf - buf; + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, buf + pos, len - pos, latin1_output); + res.count += pos; + return res; +} -inline size_t convert(const char32_t *buf, size_t len, char *latin1_output) { - const uint32_t *data = reinterpret_cast(buf); - char *start = latin1_output; - uint32_t utf32_char; - size_t pos = 0; - uint32_t too_large = 0; +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return icelake::valid_utf8_to_latin1_avx512(buf, len, latin1_output); +} - while (pos < len) { - utf32_char = (uint32_t)data[pos]; - too_large |= utf32_char; - *latin1_output++ = (char)(utf32_char & 0xFF); - pos++; - } - if ((too_large & 0xFFFFFF00) != 0) { +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16_result ret = + fast_avx512_convert_utf8_to_utf16(buf, len, + utf16_output); + if (ret.second == nullptr) { return 0; } - return latin1_output - start; + return ret.second - utf16_output; } -inline result convert_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const uint32_t *data = reinterpret_cast(buf); - char *start{latin1_output}; - size_t pos = 0; - while (pos < len) { - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are Latin1 - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF00FFFFFF00) == 0) { - *latin1_output++ = char(buf[pos]); - *latin1_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t utf32_char = data[pos]; - if ((utf32_char & 0xFFFFFF00) == - 0) { // Check if the character can be represented in Latin-1 - *latin1_output++ = (char)(utf32_char & 0xFF); - pos++; - } else { - return result(error_code::TOO_LARGE, pos); - }; +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16_result ret = fast_avx512_convert_utf8_to_utf16( + buf, len, utf16_output); + if (ret.second == nullptr) { + return 0; } - return result(error_code::SUCCESS, latin1_output - start); + return ret.second - utf16_output; } -} // namespace utf32_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ - -/* begin file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ -#ifndef SIMDUTF_VALID_UTF8_TO_LATIN1_H -#define SIMDUTF_VALID_UTF8_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_latin1 { +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return fast_avx512_convert_utf8_to_utf16_with_errors( + buf, len, utf16_output); +} -inline size_t convert_valid(const char *buf, size_t len, char *latin_output) { - const uint8_t *data = reinterpret_cast(buf); +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + return fast_avx512_convert_utf8_to_utf16_with_errors( + buf, len, utf16_output); +} - size_t pos = 0; - char *start{latin_output}; +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16_result ret = + icelake::valid_utf8_to_fixed_length( + buf, len, utf16_output); + size_t saved_bytes = ret.second - utf16_output; + const char *end = buf + len; + if (ret.first == end) { + return saved_bytes; + } - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = char(buf[pos]); - pos++; - } - continue; - } - } + // Note: AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outsiede 16-byte window. + // It meas, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { + ret.first += 1; + } - // suppose it is not an all ASCII byte sequence - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *latin_output++ = char(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == - 0b11000000) { // the first three bits indicate: - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - break; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } // checks if the next byte is a valid continuation byte in UTF-8. A - // valid continuation byte starts with 10. - // range check - - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | - (data[pos + 1] & - 0b00111111); // assembles the Unicode code point from the two bytes. - // It does this by discarding the leading 110 and 10 - // bits from the two bytes, shifting the remaining bits - // of the first byte, and then combining the results - // with a bitwise OR operation. - *latin_output++ = char(code_point); - pos += 2; - } else { - // we may have a continuation but we do not do error checking + if (ret.first != end) { + const size_t scalar_saved_bytes = + scalar::utf8_to_utf16::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { return 0; } + saved_bytes += scalar_saved_bytes; } - return latin_output - start; -} - -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf -#endif -/* end file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ -/* begin file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ -#ifndef SIMDUTF_VALID_UTF16_TO_LATIN1_H -#define SIMDUTF_VALID_UTF16_TO_LATIN1_H + return saved_bytes; +} -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_latin1 { +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16_result ret = + icelake::valid_utf8_to_fixed_length( + buf, len, utf16_output); + size_t saved_bytes = ret.second - utf16_output; + const char *end = buf + len; + if (ret.first == end) { + return saved_bytes; + } -template -inline size_t convert_valid(const char16_t *buf, size_t len, - char *latin_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - uint16_t word = 0; + // Note: AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outsiede 16-byte window. + // It meas, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { + ret.first += 1; + } - while (pos < len) { - word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; - *latin_output++ = char(word); - pos++; + if (ret.first != end) { + const size_t scalar_saved_bytes = + scalar::utf8_to_utf16::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; } - return latin_output - start; + return saved_bytes; } -} // namespace utf16_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_out) const noexcept { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + utf8_to_utf32_result ret = + icelake::validating_utf8_to_fixed_length( + buf, len, utf32_output); + if (ret.second == nullptr) + return 0; -#endif -/* end file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ -/* begin file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ -#ifndef SIMDUTF_VALID_UTF32_TO_LATIN1_H -#define SIMDUTF_VALID_UTF32_TO_LATIN1_H + size_t saved_bytes = ret.second - utf32_output; + const char *end = buf + len; + if (ret.first == end) { + return saved_bytes; + } -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_latin1 { + // Note: the AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outside 16-byte window. + // It means, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { + ret.first += 1; + } + if (ret.first != end) { + const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert( + ret.first, len - (ret.first - buf), utf32_out + saved_bytes); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } -inline size_t convert_valid(const char32_t *buf, size_t len, - char *latin1_output) { - const uint32_t *data = reinterpret_cast(buf); - char *start = latin1_output; - uint32_t utf32_char; - size_t pos = 0; + return saved_bytes; +} - while (pos < len) { - utf32_char = (uint32_t)data[pos]; +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32) const noexcept { + if (simdutf_unlikely(len == 0)) { + return {error_code::SUCCESS, 0}; + } + uint32_t *utf32_output = reinterpret_cast(utf32); + auto ret = icelake::validating_utf8_to_fixed_length_with_constant_checks< + endianness::LITTLE, uint32_t>(buf, len, utf32_output); - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are Latin1 - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF00FFFFFF00) == 0) { - *latin1_output++ = char(buf[pos]); - *latin1_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } else { - // output can not be represented in latin1 - return 0; + if (!std::get<2>(ret)) { + size_t pos = std::get<0>(ret) - buf; + // We might have an error that occurs right before pos. + // This is only a concern if buf[pos] is not a continuation byte. + if ((buf[pos] & 0xc0) != 0x80 && pos >= 64) { + pos -= 1; + } else if ((buf[pos] & 0xc0) == 0x80 && pos >= 64) { + // We must check whether we are the fourth continuation byte + bool c1 = (buf[pos - 1] & 0xc0) == 0x80; + bool c2 = (buf[pos - 2] & 0xc0) == 0x80; + bool c3 = (buf[pos - 3] & 0xc0) == 0x80; + if (c1 && c2 && c3) { + return {simdutf::TOO_LONG, pos}; } } - if ((utf32_char & 0xFFFFFF00) == 0) { - *latin1_output++ = char(utf32_char); + // todo: we reset the output to utf32 instead of using std::get<2.(ret) as + // you'd expect. that is because + // validating_utf8_to_fixed_length_with_constant_checks may have processed + // data beyond the error. + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, buf + pos, len - pos, utf32); + res.count += pos; + return res; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + const char *end = buf + len; + if (std::get<0>(ret) == end) { + return {simdutf::SUCCESS, saved_bytes}; + } + + // Note: the AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outside 16-byte window. + // It means, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (std::get<0>(ret) != end and + ((uint8_t(*std::get<0>(ret)) & 0xc0) == 0x80)) { + std::get<0>(ret) += 1; + } + + if (std::get<0>(ret) != end) { + auto scalar_result = scalar::utf8_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), + reinterpret_cast(utf32_output) + saved_bytes); + if (scalar_result.error != simdutf::SUCCESS) { + scalar_result.count += (std::get<0>(ret) - buf); } else { - // output can not be represented in latin1 - return 0; + scalar_result.count += saved_bytes; } - pos++; + return scalar_result; } - return latin1_output - start; -} -} // namespace utf32_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf + return {simdutf::SUCCESS, size_t(std::get<1>(ret) - utf32_output)}; +} -#endif -/* end file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_out) const noexcept { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + utf8_to_utf32_result ret = + icelake::valid_utf8_to_fixed_length( + buf, len, utf32_output); + size_t saved_bytes = ret.second - utf32_output; + const char *end = buf + len; + if (ret.first == end) { + return saved_bytes; + } -SIMDUTF_PUSH_DISABLE_WARNINGS -SIMDUTF_DISABLE_UNDESIRED_WARNINGS + // Note: AVX512 procedure looks up 4 bytes forward, and + // correctly converts multi-byte chars even if their + // continuation bytes lie outsiede 16-byte window. + // It meas, we have to skip continuation bytes from + // the beginning ret.first, as they were already consumed. + while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { + ret.first += 1; + } -#if SIMDUTF_IMPLEMENTATION_ARM64 -/* begin file src/arm64/implementation.cpp */ -/* begin file src/simdutf/arm64/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "arm64" -// #define SIMDUTF_IMPLEMENTATION arm64 -/* end file src/simdutf/arm64/begin.h */ -namespace simdutf { -namespace arm64 { -namespace { -#ifndef SIMDUTF_ARM64_H - #error "arm64.h must be included" -#endif -using namespace simd; + if (ret.first != end) { + const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert_valid( + ret.first, len - (ret.first - buf), utf32_out + saved_bytes); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } -simdutf_really_inline bool is_ascii(const simd8x64 &input) { - simd8 bits = input.reduce_or(); - return bits.max_val() < 0b10000000u; + return saved_bytes; } -simdutf_unused simdutf_really_inline simd8 -must_be_continuation(const simd8 prev1, const simd8 prev2, - const simd8 prev3) { - simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller - // is using ^ as well. This will work fine because we only have to report - // errors for cases with 0-1 lead bytes. Multiple lead bytes implies 2 - // overlapping multibyte characters, and if that happens, there is guaranteed - // to be at least *one* lead byte that is part of only 1 other multibyte - // character. The error will be detected there. - return is_second_byte ^ is_third_byte ^ is_fourth_byte; +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf16_to_latin1(buf, len, + latin1_output); } -simdutf_really_inline simd8 -must_be_2_3_continuation(const simd8 prev2, - const simd8 prev3) { - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - return is_third_byte ^ is_fourth_byte; +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf16_to_latin1(buf, len, + latin1_output); } -// common functions for utf8 conversions -simdutf_really_inline uint16x4_t convert_utf8_3_byte_to_utf16(uint8x16_t in) { - // Low half contains 10cccccc|1110aaaa - // High half contains 10bbbbbb|10bbbbbb -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t sh = simdutf_make_uint8x16_t(0, 2, 3, 5, 6, 8, 9, 11, 1, 1, - 4, 4, 7, 7, 10, 10); -#else - const uint8x16_t sh = {0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10}; -#endif - uint8x16_t perm = vqtbl1q_u8(in, sh); - // Split into half vectors. - // 10cccccc|1110aaaa - uint8x8_t perm_low = vget_low_u8(perm); // no-op - // 10bbbbbb|10bbbbbb - uint8x8_t perm_high = vget_high_u8(perm); - // xxxxxxxx 10bbbbbb - uint16x4_t mid = vreinterpret_u16_u8(perm_high); // no-op - // xxxxxxxx 1110aaaa - uint16x4_t high = vreinterpret_u16_u8(perm_low); // no-op - // Assemble with shift left insert. - // xxxxxxaa aabbbbbb - uint16x4_t mid_high = vsli_n_u16(mid, high, 6); - // (perm_low << 8) | (perm_low >> 8) - // xxxxxxxx 10cccccc - uint16x4_t low = vreinterpret_u16_u8(vrev16_u8(perm_low)); - // Shift left insert into the low bits - // aaaabbbb bbcccccc - uint16x4_t composed = vsli_n_u16(low, mid_high, 6); - return composed; +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output) + .first; } -simdutf_really_inline uint16x8_t convert_utf8_2_byte_to_utf16(uint8x16_t in) { - // Converts 6 2 byte UTF-8 characters to 6 UTF-16 characters. - // Technically this calculates 8, but 6 does better and happens more often - // (The languages which use these codepoints use ASCII spaces so 8 would need - // to be in the middle of a very long word). - - // 10bbbbbb 110aaaaa - uint16x8_t upper = vreinterpretq_u16_u8(in); - // (in << 8) | (in >> 8) - // 110aaaaa 10bbbbbb - uint16x8_t lower = vreinterpretq_u16_u8(vrev16q_u8(in)); - // 00000000 000aaaaa - uint16x8_t upper_masked = vandq_u16(upper, vmovq_n_u16(0x1F)); - // Assemble with shift left insert. - // 00000aaa aabbbbbb - uint16x8_t composed = vsliq_n_u16(lower, upper_masked, 6); - return composed; +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output) + .first; } -simdutf_really_inline uint16x8_t -convert_utf8_1_to_2_byte_to_utf16(uint8x16_t in, size_t shufutf8_idx) { - // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. - // This is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx])); - // Shuffle - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 110aaaaa 10bbbbbb - uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh)); - // Mask - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000000 00bbbbbb - uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits - // 1 byte: 00000000 00000000 - // 2 byte: 000aaaaa 00000000 - uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits - // Combine with a shift right accumulate - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000aaa aabbbbbb - uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2); - return composed; +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement custom function + return convert_utf16be_to_latin1(buf, len, latin1_output); } -/* begin file src/arm64/arm_validate_utf16.cpp */ -template -const char16_t *arm_validate_utf16(const char16_t *input, size_t size) { - const char16_t *end = input + size; - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - while (end - input >= 16) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - if (!match_system(big_endian)) { - in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0))); - in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1))); - } - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - const simd8 in = simd16::pack(t0, t1); - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64(); - if (surrogates_wordmask == 0) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint64_t V = ~surrogates_wordmask; - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = ((in & v_fc) == v_dc); - const uint64_t H = vH.to_bitmask64(); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint64_t L = ~H & surrogates_wordmask; - - const uint64_t a = - L & (H >> 4); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint64_t b = - a << 4; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint64_t c = V | a | b; // Combine all the masks into the final one. - if (c == ~0ull) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0xfffffffffffffffull) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return nullptr; - } - } - } - return input; +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement custom function + return convert_utf16le_to_latin1(buf, len, latin1_output); } -template -const result arm_validate_utf16_with_errors(const char16_t *input, - size_t size) { - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - while (input + 16 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - - if (!match_system(big_endian)) { - in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0))); - in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1))); - } - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - const simd8 in = simd16::pack(t0, t1); - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64(); - if (surrogates_wordmask == 0) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint64_t V = ~surrogates_wordmask; - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = ((in & v_fc) == v_dc); - const uint64_t H = vH.to_bitmask64(); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint64_t L = ~H & surrogates_wordmask; - - const uint64_t a = - L & (H >> 4); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint64_t b = - a << 4; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint64_t c = V | a | b; // Combine all the masks into the final one. - if (c == ~0ull) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0xfffffffffffffffull) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return result(error_code::SURROGATE, input - start); - } - } +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + size_t outlen; + size_t inlen = utf16_to_utf8_avx512i( + buf, len, (unsigned char *)utf8_output, &outlen); + if (inlen != len) { + return 0; } - return result(error_code::SUCCESS, input - start); + return outlen; } -/* end file src/arm64/arm_validate_utf16.cpp */ -/* begin file src/arm64/arm_validate_utf32le.cpp */ - -const char32_t *arm_validate_utf32le(const char32_t *input, size_t size) { - const char32_t *end = input + size; - const uint32x4_t standardmax = vmovq_n_u32(0x10ffff); - const uint32x4_t offset = vmovq_n_u32(0xffff2000); - const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff); - uint32x4_t currentmax = vmovq_n_u32(0x0); - uint32x4_t currentoffsetmax = vmovq_n_u32(0x0); - - while (end - input >= 4) { - const uint32x4_t in = vld1q_u32(reinterpret_cast(input)); - currentmax = vmaxq_u32(in, currentmax); - currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax); - input += 4; +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + size_t outlen; + size_t inlen = utf16_to_utf8_avx512i( + buf, len, (unsigned char *)utf8_output, &outlen); + if (inlen != len) { + return 0; } + return outlen; +} - uint32x4_t is_zero = - veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax); - if (vmaxvq_u32(is_zero) != 0) { - return nullptr; +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + size_t outlen; + size_t inlen = utf16_to_utf8_avx512i( + buf, len, (unsigned char *)utf8_output, &outlen); + if (inlen != len) { + result res = scalar::utf16_to_utf8::convert_with_errors( + buf + inlen, len - inlen, utf8_output + outlen); + res.count += inlen; + return res; } + return {simdutf::SUCCESS, outlen}; +} - is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (vmaxvq_u32(is_zero) != 0) { - return nullptr; +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + size_t outlen; + size_t inlen = utf16_to_utf8_avx512i( + buf, len, (unsigned char *)utf8_output, &outlen); + if (inlen != len) { + result res = scalar::utf16_to_utf8::convert_with_errors( + buf + inlen, len - inlen, utf8_output + outlen); + res.count += inlen; + return res; } + return {simdutf::SUCCESS, outlen}; +} - return input; +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); } -const result arm_validate_utf32le_with_errors(const char32_t *input, - size_t size) { - const char32_t *start = input; - const char32_t *end = input + size; +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} - const uint32x4_t standardmax = vmovq_n_u32(0x10ffff); - const uint32x4_t offset = vmovq_n_u32(0xffff2000); - const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff); - uint32x4_t currentmax = vmovq_n_u32(0x0); - uint32x4_t currentoffsetmax = vmovq_n_u32(0x0); +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf32_to_latin1(buf, len, latin1_output); +} - while (end - input >= 4) { - const uint32x4_t in = vld1q_u32(reinterpret_cast(input)); - currentmax = vmaxq_u32(in, currentmax); - currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax); +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf32_to_latin1_with_errors(buf, len, latin1_output) + .first; +} - uint32x4_t is_zero = - veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax); - if (vmaxvq_u32(is_zero) != 0) { - return result(error_code::TOO_LARGE, input - start); - } +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return icelake_convert_utf32_to_latin1(buf, len, latin1_output); +} - is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (vmaxvq_u32(is_zero) != 0) { - return result(error_code::SURROGATE, input - start); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + avx512_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } - - input += 4; + saved_bytes += scalar_saved_bytes; } - - return result(error_code::SUCCESS, input - start); + return saved_bytes; } -/* end file src/arm64/arm_validate_utf32le.cpp */ - -/* begin file src/arm64/arm_convert_latin1_to_utf16.cpp */ -template -std::pair -arm_convert_latin1_to_utf16(const char *buf, size_t len, - char16_t *utf16_output) { - const char *end = buf + len; - while (end - buf >= 16) { - uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf)); - uint16x8_t inlow = vmovl_u8(vget_low_u8(in8)); - if (!match_system(big_endian)) { - inlow = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inlow))); - } - vst1q_u16(reinterpret_cast(utf16_output), inlow); - uint16x8_t inhigh = vmovl_u8(vget_high_u8(in8)); - if (!match_system(big_endian)) { - inhigh = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inhigh))); +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + icelake::avx512_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; } - vst1q_u16(reinterpret_cast(utf16_output + 8), inhigh); - utf16_output += 16; - buf += 16; } - - return std::make_pair(buf, utf16_output); + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; } -/* end file src/arm64/arm_convert_latin1_to_utf16.cpp */ -/* begin file src/arm64/arm_convert_latin1_to_utf32.cpp */ -std::pair -arm_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - const char *end = buf + len; - while (end - buf >= 16) { - uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf)); - uint16x8_t in8low = vmovl_u8(vget_low_u8(in8)); - uint32x4_t in16lowlow = vmovl_u16(vget_low_u16(in8low)); - uint32x4_t in16lowhigh = vmovl_u16(vget_high_u16(in8low)); - uint16x8_t in8high = vmovl_u8(vget_high_u8(in8)); - uint32x4_t in8highlow = vmovl_u16(vget_low_u16(in8high)); - uint32x4_t in8highhigh = vmovl_u16(vget_high_u16(in8high)); - vst1q_u32(reinterpret_cast(utf32_output), in16lowlow); - vst1q_u32(reinterpret_cast(utf32_output + 4), in16lowhigh); - vst1q_u32(reinterpret_cast(utf32_output + 8), in8highlow); - vst1q_u32(reinterpret_cast(utf32_output + 12), in8highhigh); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf32_to_utf8(buf, len, utf8_output); +} - utf32_output += 16; - buf += 16; +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx512_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; } - - return std::make_pair(buf, utf32_output); -} -/* end file src/arm64/arm_convert_latin1_to_utf32.cpp */ -/* begin file src/arm64/arm_convert_latin1_to_utf8.cpp */ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ -std::pair -arm_convert_latin1_to_utf8(const char *latin1_input, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char *end = latin1_input + len; - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - // We always write 16 bytes, of which more than the first 8 bytes - // are valid. A safety margin of 8 is more than sufficient. - while (end - latin1_input >= 16 + 8) { - uint8x16_t in8 = vld1q_u8(reinterpret_cast(latin1_input)); - if (vmaxvq_u8(in8) <= 0x7F) { // ASCII fast path!!!! - vst1q_u8(utf8_output, in8); - utf8_output += 16; - latin1_input += 16; - continue; + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - // We just fallback on UTF-16 code. This could be optimized/simplified - // further. - uint16x8_t in16 = vmovl_u8(vget_low_u8(in8)); - // 1. prepare 2-byte values - // input 8-bit word : [aabb|bbbb] x 8 - // expected output : [1100|00aa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx512_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - // t0 = [0000|00aa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(in16, 2); - // t1 = [0000|00aa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(in16, v_003f); - // t3 = [0000|00aa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [1100|00aa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in16, v_007f); - const uint8x16_t utf8_unpacked = - vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in16, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); -#else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; -#endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + avx512_convert_utf32_to_utf16_with_errors( + buf, len, utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); - // 6. adjust pointers - latin1_input += 8; - utf8_output += row[0]; +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + avx512_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} - } // while +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); +} - return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); } -/* end file src/arm64/arm_convert_latin1_to_utf8.cpp */ -/* begin file src/arm64/arm_convert_utf8_to_latin1.cpp */ -// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 16, usually 12). -size_t convert_masked_utf8_to_latin1(const char *input, - uint64_t utf8_end_of_code_point_mask, - char *&latin1_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - uint8x16_t in = vld1q_u8(reinterpret_cast(input)); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, + utf32_output); + if (!std::get<2>(ret)) { + return 0; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - // We first try a few fast paths. - // The obvious first test is ASCII, which actually consumes the full 16. - if (utf8_end_of_code_point_mask == 0xfff) { - // We process in chunks of 12 bytes - vst1q_u8(reinterpret_cast(latin1_output), in); - latin1_output += 12; // We wrote 12 18-bit characters. - return 12; // We consumed 12 bytes. +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, utf32_output); + if (!std::get<2>(ret)) { + return 0; } - /// We do not have a fast path available, or the fast path is unimportant, so - /// we fallback. - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - // this indicates an invalid input: - if (idx >= 64) { - return consumed; +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, + utf32_output); + if (!std::get<2>(ret)) { + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + scalar_res.count += (std::get<0>(ret) - buf); + return scalar_res; } - // Here we should have (idx < 64), if not, there is a bug in the validation or - // elsewhere. SIX (6) input code-code units this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 - // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy - // scenario we process SIX (6) input code-code units. The max length in bytes - // of six code code units spanning between 1 and 2 bytes each is 12 bytes. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); - // Shuffle - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 110aaaaa 10bbbbbb - uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh)); - // Mask - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000000 00bbbbbb - uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits - // 1 byte: 00000000 00000000 - // 2 byte: 000aaaaa 00000000 - uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits - // Combine with a shift right accumulate - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000aaa aabbbbbb - uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2); - // writing 8 bytes even though we only care about the first 6 bytes. - uint8x8_t latin1_packed = vmovn_u16(composed); - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - latin1_output += 6; // We wrote 6 bytes. - return consumed; + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_res.error) { + scalar_res.count += (std::get<0>(ret) - buf); + return scalar_res; + } else { + scalar_res.count += saved_bytes; + return scalar_res; + } + } + return simdutf::result(simdutf::SUCCESS, saved_bytes); } -/* end file src/arm64/arm_convert_utf8_to_latin1.cpp */ -/* begin file src/arm64/arm_convert_utf8_to_utf16.cpp */ -// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 16, usually 12). -template -size_t convert_masked_utf8_to_utf16(const char *input, - uint64_t utf8_end_of_code_point_mask, - char16_t *&utf16_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - uint8x16_t in = vld1q_u8(reinterpret_cast(input)); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // We first try a few fast paths. - // The obvious first test is ASCII, which actually consumes the full 16. - if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xffff) { - // We process in chunks of 16 bytes - // The routine in simd.h is reused. - simd8 temp{vreinterpretq_s8_u8(in)}; - temp.store_ascii_as_utf16(utf16_output); - utf16_output += 16; // We wrote 16 16-bit characters. - return 16; // We consumed 16 bytes. +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, utf32_output); + if (!std::get<2>(ret)) { + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + scalar_res.count += (std::get<0>(ret) - buf); + return scalar_res; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_res.error) { + scalar_res.count += (std::get<0>(ret) - buf); + return scalar_res; + } else { + scalar_res.count += saved_bytes; + return scalar_res; + } } + return simdutf::result(simdutf::SUCCESS, saved_bytes); +} - // 3 byte sequences are the next most common, as seen in CJK, which has long - // sequences of these. - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte - // UTF-16 code units. - uint16x4_t composed = convert_utf8_3_byte_to_utf16(in); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed))); +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, + utf32_output); + if (!std::get<2>(ret)) { + return 0; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_saved_bytes == 0) { + return 0; } - vst1_u16(reinterpret_cast(utf16_output), composed); - utf16_output += 4; // We wrote 4 16-bit characters. - return 12; // We consumed 12 bytes. + saved_bytes += scalar_saved_bytes; } + return saved_bytes; +} - // 2 byte sequences occur in short bursts in languages like Greek and Russian. - if ((utf8_end_of_code_point_mask & 0xFFF) == 0xaaa) { - // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte - // UTF-16 code units. - uint16x8_t composed = convert_utf8_2_byte_to_utf16(in); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = - vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::tuple ret = + icelake::convert_utf16_to_utf32(buf, len, utf32_output); + if (!std::get<2>(ret)) { + return 0; + } + size_t saved_bytes = std::get<1>(ret) - utf32_output; + if (std::get<0>(ret) != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); + if (scalar_saved_bytes == 0) { + return 0; } - vst1q_u16(reinterpret_cast(utf16_output), composed); + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - utf16_output += 6; // We wrote 6 16-bit characters. - return 12; // We consumed 12 bytes. +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + size_t pos = 0; + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + while (pos + 32 <= length) { + __m512i utf16 = _mm512_loadu_si512((const __m512i *)(input + pos)); + utf16 = _mm512_shuffle_epi8(utf16, byteflip); + _mm512_storeu_si512(output + pos, utf16); + pos += 32; + } + if (pos < length) { + __mmask32 m((1U << (length - pos)) - 1); + __m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i *)(input + pos)); + utf16 = _mm512_shuffle_epi8(utf16, byteflip); + _mm512_mask_storeu_epi16(output + pos, m, utf16); } +} - /// We do not have a fast path available, or the fast path is unimportant, so - /// we fallback. - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + const char16_t *ptr = input; + size_t count{0}; - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; + if (length >= 32) { + const char16_t *end = input + length - 32; - if (idx < 64) { - // SIX (6) input code-code units - // Convert to UTF-16 - uint16x8_t composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = - vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); - } - // Store - vst1q_u16(reinterpret_cast(utf16_output), composed); - utf16_output += 6; // We wrote 6 16-bit characters. - return consumed; - } else if (idx < 145) { - // FOUR (4) input code-code units - // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); - // XXX: depending on the system scalar instructions might be faster. - // 1 byte: 00000000 00000000 0ccccccc - // 2 byte: 00000000 110bbbbb 10cccccc - // 3 byte: 1110aaaa 10bbbbbb 10cccccc - uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); - // 1 byte: 00000000 0ccccccc - // 2 byte: xx0bbbbb x0cccccc - // 3 byte: xxbbbbbb x0cccccc - uint16x4_t lowperm = vmovn_u32(perm); - // Partially mask with bic (doesn't require a temporary register unlike and) - // The shift left insert below will clear the top bits. - // 1 byte: 00000000 00000000 - // 2 byte: xx0bbbbb 00000000 - // 3 byte: xxbbbbbb 00000000 - uint16x4_t middlebyte = vbic_u16(lowperm, vmov_n_u16(uint16_t(~0xFF00))); - // ASCII - // 1 byte: 00000000 0ccccccc - // 2+byte: 00000000 00cccccc - uint16x4_t ascii = vand_u16(lowperm, vmov_n_u16(0x7F)); - // Split into narrow vectors. - // 2 byte: 00000000 00000000 - // 3 byte: 00000000 xxxxaaaa - uint16x4_t highperm = vshrn_n_u32(perm, 16); - // Shift right accumulate the middle byte - // 1 byte: 00000000 0ccccccc - // 2 byte: 00xx0bbb bbcccccc - // 3 byte: 00xxbbbb bbcccccc - uint16x4_t middlelow = vsra_n_u16(ascii, middlebyte, 2); - // Shift left and insert the top 4 bits, overwriting the garbage - // 1 byte: 00000000 0ccccccc - // 2 byte: 00000bbb bbcccccc - // 3 byte: aaaabbbb bbcccccc - uint16x4_t composed = vsli_n_u16(middlelow, highperm, 12); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed))); + const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00); + const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff); + + while (ptr <= end) { + __m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr); + ptr += 32; + uint64_t not_high_surrogate = + static_cast(_mm512_cmpgt_epu16_mask(utf16, high) | + _mm512_cmplt_epu16_mask(utf16, low)); + count += count_ones(not_high_surrogate); } - vst1_u16(reinterpret_cast(utf16_output), composed); + } - utf16_output += 4; // We wrote 4 16-bit codepoints - return consumed; - } else if (idx < 209) { - // THREE (3) input code-code units - if (input_utf8_end_of_code_point_mask == 0x888) { - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but - // it is easier when we can assume they are all pairs. This version does - // not use the LUT, but 4 byte sequences are less common and the overhead - // of the extra memory access is less important than the early branch - // overhead in shorter sequences. + return count + scalar::utf16::count_code_points( + ptr, length - (ptr - input)); +} - // Swap byte pairs - // 10dddddd 10cccccc|10bbbbbb 11110aaa - // 10cccccc 10dddddd|11110aaa 10bbbbbb - uint8x16_t swap = vrev16q_u8(in); - // Shift left 2 bits - // cccccc00 dddddd00 xxxxxxxx bbbbbb00 - uint32x4_t shift = vreinterpretq_u32_u8(vshlq_n_u8(swap, 2)); - // Create a magic number containing the low 2 bits of the trail surrogate - // and all the corrections needed to create the pair. UTF-8 4b prefix = - // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) - // surrogate high = +0x0000|0xD800 - // surrogate low = +0xDC00|0x0000 - // ------------------------------- - // = +0xDC00|0xE7C0 - uint32x4_t magic = vmovq_n_u32(0xDC00E7C0); - // Generate unadjusted trail surrogate minus lowest 2 bits - // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 - uint32x4_t trail = - vbslq_u32(vmovq_n_u32(0x0000FF00), vreinterpretq_u32_u8(swap), shift); - // Insert low 2 bits of trail surrogate to magic number for later - // 11011100 00000000 11100111 110000cc - uint16x8_t magic_with_low_2 = - vreinterpretq_u16_u32(vsraq_n_u32(magic, shift, 30)); - // Generate lead surrogate - // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx - uint32x4_t lead = vreinterpretq_u32_u16( - vsliq_n_u16(vreinterpretq_u16_u8(swap), vreinterpretq_u16_u8(in), 6)); - // Mask out lead - // 000000cc ccdddddd|xxxxxxxx xxxxxxxx - lead = vbicq_u32(lead, vmovq_n_u32(uint32_t(~0x03FFFFFF))); - // Blend pairs - // 000000cc ccdddddd|11110aaa bbbbbb00 - uint16x8_t blend = vreinterpretq_u16_u32( - vbslq_u32(vmovq_n_u32(0x0000FFFF), trail, lead)); - // Add magic number to finish the result - // 110111CC CCDDDDDD|110110AA BBBBBBCC - uint16x8_t composed = vaddq_u16(blend, magic_with_low_2); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = - vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); - } - uint16_t buffer[8]; - vst1q_u16(reinterpret_cast(buffer), composed); - for (int k = 0; k < 6; k++) { - utf16_output[k] = buffer[k]; - } // the loop might compiler to a couple of instructions. - utf16_output += 6; // We wrote 3 32-bit surrogate pairs. - return 12; // We consumed 12 bytes. - } - // 3 1-4 byte sequences - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + const char16_t *ptr = input; + size_t count{0}; + if (length >= 32) { - // 1 byte: 00000000 00000000 00000000 0ddddddd - // 3 byte: 00000000 00000000 110ccccc 10dddddd - // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd - // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd - uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); - // added to fix issue https://github.com/simdutf/simdutf/issues/514 - // We only want to write 2 * 16-bit code units when that is actually what we - // have. Unfortunately, we cannot trust the input. So it is possible to get - // 0xff as an input byte and it should not result in a surrogate pair. We - // need to check for that. - uint32_t permbuffer[4]; - vst1q_u32(permbuffer, perm); - // Mask the low and middle bytes - // 00000000 00000000 00000000 0ddddddd - uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7f)); - // Because the surrogates need more work, the high surrogate is computed - // first. - uint32x4_t middlehigh = vshlq_n_u32(perm, 2); - // 00000000 00000000 00cccccc 00000000 - uint32x4_t middlebyte = vandq_u32(perm, vmovq_n_u32(0x3F00)); - // Start assembling the sequence. Since the 4th byte is in the same position - // as it would be in a surrogate and there is no dependency, shift left - // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: - // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx - uint32x4_t ab = vbslq_u32(vmovq_n_u32(0xFF000000), perm, middlehigh); - // Top 16 bits contains the high ten bits of the surrogate pair before - // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa - // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction - uint32x4_t abc = - vbslq_u32(vmovq_n_u32(0xFFFC0000), ab, vshlq_n_u32(middlebyte, 4)); - // Combine the low 6 or 7 bits by a shift right accumulate - // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct - // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o - // correction - uint32x4_t composed = vsraq_n_u32(ascii, abc, 6); - // After this is for surrogates - // Blend the low and high surrogates - // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd - uint32x4_t mixed = vbslq_u32(vmovq_n_u32(0xFFFF0000), abc, composed); - // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits - // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: - // 11110aaa bbbbbbcc|000000cc ccdddddd - uint16x8_t masked_pair = vreinterpretq_u16_u32( - vbicq_u32(mixed, vmovq_n_u32(uint32_t(~0xFFFF03FF)))); - // Correct the remaining UTF-8 prefix, surrogate offset, and add the - // surrogate prefixes in one magic 16-bit addition. similar magic number but - // without the continue byte adjust and halfword swapped UTF-8 4b prefix = - // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) - // surrogate high = +0xD800|0x0000 - // surrogate low = +0x0000|0xDC00 - // ----------------------------------- - // = +0xE7C0|0xDC00 - uint16x8_t magic = vreinterpretq_u16_u32(vmovq_n_u32(0xE7C0DC00)); - // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete - uint32x4_t surrogates = - vreinterpretq_u32_u16(vaddq_u16(masked_pair, magic)); - // If the high bit is 1 (s32 less than zero), this needs a surrogate pair - uint32x4_t is_pair = vcltzq_s32(vreinterpretq_s32_u32(perm)); + const char16_t *end = input + length - 32; - // Select either the 4 byte surrogate pair or the 2 byte solo codepoint - // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd - // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - uint32x4_t selected = vbslq_u32(is_pair, surrogates, composed); - // Byte swap if necessary - if (!match_system(big_endian)) { - selected = - vreinterpretq_u32_u8(vrev16q_u8(vreinterpretq_u8_u32(selected))); - } - // Attempting to shuffle and store would be complex, just scalarize. - uint32_t buffer[4]; - vst1q_u32(buffer, selected); - // Test for the top bit of the surrogate mask. Remove due to issue 514 - // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : - // 0x00800000; - for (size_t i = 0; i < 3; i++) { - // Surrogate - // Used to be if (buffer[i] & SURROGATE_MASK) { - // See discussion above. - // patch for issue https://github.com/simdutf/simdutf/issues/514 - if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { - utf16_output[0] = uint16_t(buffer[i] >> 16); - utf16_output[1] = uint16_t(buffer[i] & 0xFFFF); - utf16_output += 2; - } else { - utf16_output[0] = uint16_t(buffer[i] & 0xFFFF); - utf16_output++; - } + const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00); + const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff); + + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + while (ptr <= end) { + __m512i utf16 = + _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)ptr), byteflip); + ptr += 32; + uint64_t not_high_surrogate = + static_cast(_mm512_cmpgt_epu16_mask(utf16, high) | + _mm512_cmplt_epu16_mask(utf16, low)); + count += count_ones(not_high_surrogate); } - return consumed; - } else { - // here we know that there is an error but we do not handle errors - return 12; - } -} -/* end file src/arm64/arm_convert_utf8_to_utf16.cpp */ -/* begin file src/arm64/arm_convert_utf8_to_utf32.cpp */ -// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_utf32(const char *input, - uint64_t utf8_end_of_code_point_mask, - char32_t *&utf32_out) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - uint32_t *&utf32_output = reinterpret_cast(utf32_out); - uint8x16_t in = vld1q_u8(reinterpret_cast(input)); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xFFF; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - if (utf8_end_of_code_point_mask == 0xfff) { - // We process in chunks of 12 bytes. - // use fast implementation in src/simdutf/arm64/simd.h - // Ideally the compiler can keep the tables in registers. - simd8 temp{vreinterpretq_s8_u8(in)}; - temp.store_ascii_as_utf32_tbl(utf32_out); - utf32_output += 12; // We wrote 12 32-bit characters. - return 12; // We consumed 12 bytes. - } - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte - // UTF-32 code units. Convert to UTF-16 - uint16x4_t composed_utf16 = convert_utf8_3_byte_to_utf16(in); - // Zero extend and store via ST2 with a zero. - uint16x4x2_t interleaver = {{composed_utf16, vmov_n_u16(0)}}; - vst2_u16(reinterpret_cast(utf32_output), interleaver); - utf32_output += 4; // We wrote 4 32-bit characters. - return 12; // We consumed 12 bytes. } - // 2 byte sequences occur in short bursts in languages like Greek and Russian. - if (input_utf8_end_of_code_point_mask == 0xaaa) { - // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte - // UTF-32 code units. Convert to UTF-16 - uint16x8_t composed_utf16 = convert_utf8_2_byte_to_utf16(in); - // Zero extend and store via ST2 with a zero. - uint16x8x2_t interleaver = {{composed_utf16, vmovq_n_u16(0)}}; - vst2q_u16(reinterpret_cast(utf32_output), interleaver); - utf32_output += 6; // We wrote 6 32-bit characters. - return 12; // We consumed 12 bytes. - } - /// Either no fast path or an unimportant fast path. + return count + scalar::utf16::count_code_points( + ptr, length - (ptr - input)); +} - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + const uint8_t *str = reinterpret_cast(input); + size_t answer = + length / sizeof(__m512i) * + sizeof(__m512i); // Number of 512-bit chunks that fits into the length. + size_t i = 0; + __m512i unrolled_popcount{0}; - if (idx < 64) { - // SIX (6) input code-code units - // Convert to UTF-16 - uint16x8_t composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); - // Zero extend and store with ST2 and zero - uint16x8x2_t interleaver = {{composed_utf16, vmovq_n_u16(0)}}; - vst2q_u16(reinterpret_cast(utf32_output), interleaver); - utf32_output += 6; // We wrote 6 32-bit characters. - return consumed; - } else if (idx < 145) { - // FOUR (4) input code-code units - // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); - // Shuffle - // 1 byte: 00000000 00000000 0ccccccc - // 2 byte: 00000000 110bbbbb 10cccccc - // 3 byte: 1110aaaa 10bbbbbb 10cccccc - uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); - // Split - // 00000000 00000000 0ccccccc - uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F)); // 6 or 7 bits - // Note: unmasked - // xxxxxxxx aaaaxxxx xxxxxxxx - uint32x4_t high = vshrq_n_u32(perm, 4); // 4 bits - // Use 16 bit bic instead of and. - // The top bits will be corrected later in the bsl - // 00000000 10bbbbbb 00000000 - uint32x4_t middle = vreinterpretq_u32_u16( - vbicq_u16(vreinterpretq_u16_u32(perm), - vmovq_n_u16(uint16_t(~0xff00)))); // 5 or 6 bits - // Combine low and middle with shift right accumulate - // 00000000 00xxbbbb bbcccccc - uint32x4_t lowmid = vsraq_n_u32(ascii, middle, 2); - // Insert top 4 bits from high byte with bitwise select - // 00000000 aaaabbbb bbcccccc - uint32x4_t composed = vbslq_u32(vmovq_n_u32(0x0000F000), high, lowmid); - vst1q_u32(utf32_output, composed); - utf32_output += 4; // We wrote 4 32-bit characters. - return consumed; - } else if (idx < 209) { - // THREE (3) input code-code units - if (input_utf8_end_of_code_point_mask == 0x888) { - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-32 code units. This uses the same method as the fixed 3 byte - // version, reversing and shift left insert. However, there is no need for - // a shuffle mask now, just rev16 and rev32. - // - // This version does not use the LUT, but 4 byte sequences are less common - // and the overhead of the extra memory access is less important than the - // early branch overhead in shorter sequences, so it comes last. + const __m512i continuation = _mm512_set1_epi8(char(0b10111111)); - // Swap pairs of bytes - // 10dddddd|10cccccc|10bbbbbb|11110aaa - // 10cccccc 10dddddd|11110aaa 10bbbbbb - uint16x8_t swap1 = vreinterpretq_u16_u8(vrev16q_u8(in)); - // Shift left and insert - // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb - uint16x8_t merge1 = vsliq_n_u16(swap1, vreinterpretq_u16_u8(in), 6); - // Swap 16-bit lanes - // xxxxcccc ccdddddd xxxxxxxa aabbbbbb - // xxxxxxxa aabbbbbb xxxxcccc ccdddddd - uint32x4_t swap2 = vreinterpretq_u32_u16(vrev32q_u16(merge1)); - // Shift insert again - // xxxxxxxx xxxaaabb bbbbcccc ccdddddd - uint32x4_t merge2 = vsliq_n_u32(swap2, vreinterpretq_u32_u16(merge1), 12); - // Clear the garbage - // 00000000 000aaabb bbbbcccc ccdddddd - uint32x4_t composed = vandq_u32(merge2, vmovq_n_u32(0x1FFFFF)); - // Store - vst1q_u32(utf32_output, composed); + while (i + sizeof(__m512i) <= length) { + size_t iterations = (length - i) / sizeof(__m512i); - utf32_output += 3; // We wrote 3 32-bit characters. - return 12; // We consumed 12 bytes. - } - // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit - // due to surrogates no longer being involved. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); - // 1 byte: 00000000 00000000 00000000 0ddddddd - // 2 byte: 00000000 00000000 110ccccc 10dddddd - // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd - // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd - uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); - // Ascii - uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F)); - uint32x4_t middle = vandq_u32(perm, vmovq_n_u32(0x3f00)); - // When converting the way we do, the 3 byte prefix will be interpreted as - // the 18th bit being set, since the code would interpret the lead byte - // (0b1110bbbb) as a continuation byte (0b10bbbbbb). To fix this, we can - // either xor or do an 8 bit add of the 6th bit shifted right by 1. Since - // NEON has shift right accumulate, we use that. - // 4 byte 3 byte - // 10bbbbbb 1110bbbb - // 00000000 01000000 6th bit - // 00000000 00100000 shift right - // 10bbbbbb 0000bbbb add - // 00bbbbbb 0000bbbb mask - uint8x16_t correction = - vreinterpretq_u8_u32(vandq_u32(perm, vmovq_n_u32(0x00400000))); - uint32x4_t corrected = vreinterpretq_u32_u8( - vsraq_n_u8(vreinterpretq_u8_u32(perm), correction, 1)); - // 00000000 00000000 0000cccc ccdddddd - uint32x4_t cd = vsraq_n_u32(ascii, middle, 2); - // Insert twice - // xxxxxxxx xxxaaabb bbbbxxxx xxxxxxxx - uint32x4_t ab = vbslq_u32(vmovq_n_u32(0x01C0000), vshrq_n_u32(corrected, 6), - vshrq_n_u32(corrected, 4)); - // 00000000 000aaabb bbbbcccc ccdddddd - uint32x4_t composed = vbslq_u32(vmovq_n_u32(0xFFE00FFF), cd, ab); - // Store - vst1q_u32(utf32_output, composed); - utf32_output += 3; // We wrote 3 32-bit characters. - return consumed; - } else { - // here we know that there is an error but we do not handle errors - return 12; - } -} -/* end file src/arm64/arm_convert_utf8_to_utf32.cpp */ + size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i); + for (; i + 8 * sizeof(__m512i) <= max_i; i += 8 * sizeof(__m512i)) { + __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i)); + __m512i input2 = + _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i))); + __m512i input3 = + _mm512_loadu_si512((const __m512i *)(str + i + 2 * sizeof(__m512i))); + __m512i input4 = + _mm512_loadu_si512((const __m512i *)(str + i + 3 * sizeof(__m512i))); + __m512i input5 = + _mm512_loadu_si512((const __m512i *)(str + i + 4 * sizeof(__m512i))); + __m512i input6 = + _mm512_loadu_si512((const __m512i *)(str + i + 5 * sizeof(__m512i))); + __m512i input7 = + _mm512_loadu_si512((const __m512i *)(str + i + 6 * sizeof(__m512i))); + __m512i input8 = + _mm512_loadu_si512((const __m512i *)(str + i + 7 * sizeof(__m512i))); -/* begin file src/arm64/arm_convert_utf16_to_latin1.cpp */ + __mmask64 mask1 = _mm512_cmple_epi8_mask(input1, continuation); + __mmask64 mask2 = _mm512_cmple_epi8_mask(input2, continuation); + __mmask64 mask3 = _mm512_cmple_epi8_mask(input3, continuation); + __mmask64 mask4 = _mm512_cmple_epi8_mask(input4, continuation); + __mmask64 mask5 = _mm512_cmple_epi8_mask(input5, continuation); + __mmask64 mask6 = _mm512_cmple_epi8_mask(input6, continuation); + __mmask64 mask7 = _mm512_cmple_epi8_mask(input7, continuation); + __mmask64 mask8 = _mm512_cmple_epi8_mask(input8, continuation); -template -std::pair -arm_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - while (end - buf >= 8) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } - if (vmaxvq_u16(in) <= 0xff) { - // 1. pack the bytes - uint8x8_t latin1_packed = vmovn_u16(in); - // 2. store (8 bytes) - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - // 3. adjust pointers - buf += 8; - latin1_output += 8; - } else { - return std::make_pair(nullptr, reinterpret_cast(latin1_output)); - } - } // while - return std::make_pair(buf, latin1_output); -} + __m512i mask_register = _mm512_set_epi64(mask8, mask7, mask6, mask5, + mask4, mask3, mask2, mask1); -template -std::pair -arm_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - while (end - buf >= 8) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + unrolled_popcount = _mm512_add_epi64(unrolled_popcount, + _mm512_popcnt_epi64(mask_register)); } - if (vmaxvq_u16(in) <= 0xff) { - // 1. pack the bytes - uint8x8_t latin1_packed = vmovn_u16(in); - // 2. store (8 bytes) - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - // 3. adjust pointers - buf += 8; - latin1_output += 8; - } else { - // Let us do a scalar fallback. - for (int k = 0; k < 8; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } + + for (; i <= max_i; i += sizeof(__m512i)) { + __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i)); + uint64_t continuation_bitmask = static_cast( + _mm512_cmple_epi8_mask(more_input, continuation)); + answer -= count_ones(continuation_bitmask); } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); + } + + __m256i first_half = _mm512_extracti64x4_epi64(unrolled_popcount, 0); + __m256i second_half = _mm512_extracti64x4_epi64(unrolled_popcount, 1); + answer -= (size_t)_mm256_extract_epi64(first_half, 0) + + (size_t)_mm256_extract_epi64(first_half, 1) + + (size_t)_mm256_extract_epi64(first_half, 2) + + (size_t)_mm256_extract_epi64(first_half, 3) + + (size_t)_mm256_extract_epi64(second_half, 0) + + (size_t)_mm256_extract_epi64(second_half, 1) + + (size_t)_mm256_extract_epi64(second_half, 2) + + (size_t)_mm256_extract_epi64(second_half, 3); + + return answer + scalar::utf8::count_code_points( + reinterpret_cast(str + i), length - i); } -/* end file src/arm64/arm_convert_utf16_to_latin1.cpp */ -/* begin file src/arm64/arm_convert_utf16_to_utf32.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} - Ad 1. +simdutf_warn_unused size_t +implementation::latin1_length_from_utf16(size_t length) const noexcept { + return scalar::utf16::latin1_length_from_utf16(length); +} - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII - char) or 2) two UTF8 bytes. +simdutf_warn_unused size_t +implementation::latin1_length_from_utf32(size_t length) const noexcept { + return scalar::utf32::latin1_length_from_utf32(length); +} - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + const char16_t *ptr = input; + size_t count{0}; + if (length >= 32) { + const char16_t *end = input + length - 32; - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. + const __m512i v_007f = _mm512_set1_epi16((uint16_t)0x007f); + const __m512i v_07ff = _mm512_set1_epi16((uint16_t)0x07ff); + const __m512i v_dfff = _mm512_set1_epi16((uint16_t)0xdfff); + const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800); - Ad 2. + while (ptr <= end) { + __m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr); + ptr += 32; + __mmask32 ascii_bitmask = _mm512_cmple_epu16_mask(utf16, v_007f); + __mmask32 two_bytes_bitmask = + _mm512_mask_cmple_epu16_mask(~ascii_bitmask, utf16, v_07ff); + __mmask32 not_one_two_bytes = ~(ascii_bitmask | two_bytes_bitmask); + __mmask32 surrogates_bitmask = + _mm512_mask_cmple_epu16_mask(not_one_two_bytes, utf16, v_dfff) & + _mm512_mask_cmpge_epu16_mask(not_one_two_bytes, utf16, v_d800); - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. + size_t ascii_count = count_ones(ascii_bitmask); + size_t two_bytes_count = count_ones(two_bytes_bitmask); + size_t surrogate_bytes_count = count_ones(surrogates_bitmask); + size_t three_bytes_count = + 32 - ascii_count - two_bytes_count - surrogate_bytes_count; - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. + count += ascii_count + 2 * two_bytes_count + 3 * three_bytes_count + + 2 * surrogate_bytes_count; + } + } - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. + return count + scalar::utf16::utf8_length_from_utf16( + ptr, length - (ptr - input)); +} - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + const char16_t *ptr = input; + size_t count{0}; + if (length >= 32) { + const char16_t *end = input + length - 32; - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::pair -arm_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_out) { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - const char16_t *end = buf + len; + const __m512i v_007f = _mm512_set1_epi16((uint16_t)0x007f); + const __m512i v_07ff = _mm512_set1_epi16((uint16_t)0x07ff); + const __m512i v_dfff = _mm512_set1_epi16((uint16_t)0xdfff); + const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800); - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); + const __m512i byteflip = _mm512_setr_epi64( + 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, + 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001, 0x0e0f0c0d0a0b0809); + while (ptr <= end) { + __m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr); + utf16 = _mm512_shuffle_epi8(utf16, byteflip); + ptr += 32; + __mmask32 ascii_bitmask = _mm512_cmple_epu16_mask(utf16, v_007f); + __mmask32 two_bytes_bitmask = + _mm512_mask_cmple_epu16_mask(~ascii_bitmask, utf16, v_07ff); + __mmask32 not_one_two_bytes = ~(ascii_bitmask | two_bytes_bitmask); + __mmask32 surrogates_bitmask = + _mm512_mask_cmple_epu16_mask(not_one_two_bytes, utf16, v_dfff) & + _mm512_mask_cmpge_epu16_mask(not_one_two_bytes, utf16, v_d800); - while (end - buf >= 8) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); + size_t ascii_count = count_ones(ascii_bitmask); + size_t two_bytes_count = count_ones(two_bytes_bitmask); + size_t surrogate_bytes_count = count_ones(surrogates_bitmask); + size_t three_bytes_count = + 32 - ascii_count - two_bytes_count - surrogate_bytes_count; + count += ascii_count + 2 * two_bytes_count + 3 * three_bytes_count + + 2 * surrogate_bytes_count; } + } - const uint16x8_t surrogates_bytemask = - vceqq_u16(vandq_u16(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (vmaxvq_u16(surrogates_bytemask) == 0) { - // case: no surrogate pairs, extend all 16-bit code units to 32-bit code - // units - vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in))); - vst1q_u32(utf32_output + 4, vmovl_high_u16(in)); - utf32_output += 8; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); + return count + scalar::utf16::utf8_length_from_utf16( + ptr, length - (ptr - input)); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return implementation::count_utf16le(input, length); +} + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return implementation::count_utf16be(input, length); +} + +simdutf_warn_unused size_t +implementation::utf16_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf16_length_from_latin1(length); +} + +simdutf_warn_unused size_t +implementation::utf32_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf32_length_from_latin1(length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + const uint8_t *str = reinterpret_cast(input); + size_t answer = length / sizeof(__m512i) * sizeof(__m512i); + size_t i = 0; + if (answer >= 2048) { // long strings optimization + unsigned char v_0xFF = 0xff; + __m512i eight_64bits = _mm512_setzero_si512(); + while (i + sizeof(__m512i) <= length) { + __m512i runner = _mm512_setzero_si512(); + size_t iterations = (length - i) / sizeof(__m512i); + if (iterations > 255) { + iterations = 255; } - for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } + size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i); + for (; i + 4 * sizeof(__m512i) <= max_i; i += 4 * sizeof(__m512i)) { + // Load four __m512i vectors + __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i)); + __m512i input2 = + _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i))); + __m512i input3 = _mm512_loadu_si512( + (const __m512i *)(str + i + 2 * sizeof(__m512i))); + __m512i input4 = _mm512_loadu_si512( + (const __m512i *)(str + i + 3 * sizeof(__m512i))); + + // Generate four masks + __mmask64 mask1 = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input1); + __mmask64 mask2 = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input2); + __mmask64 mask3 = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input3); + __mmask64 mask4 = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input4); + // Apply the masks and subtract from the runner + __m512i not_ascii1 = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask1, v_0xFF); + __m512i not_ascii2 = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask2, v_0xFF); + __m512i not_ascii3 = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask3, v_0xFF); + __m512i not_ascii4 = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask4, v_0xFF); + + runner = _mm512_sub_epi8(runner, not_ascii1); + runner = _mm512_sub_epi8(runner, not_ascii2); + runner = _mm512_sub_epi8(runner, not_ascii3); + runner = _mm512_sub_epi8(runner, not_ascii4); } - buf += k; + + for (; i <= max_i; i += sizeof(__m512i)) { + __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i)); + + __mmask64 mask = + _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), more_input); + __m512i not_ascii = + _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask, v_0xFF); + runner = _mm512_sub_epi8(runner, not_ascii); + } + + eight_64bits = _mm512_add_epi64( + eight_64bits, _mm512_sad_epu8(runner, _mm512_setzero_si512())); } - } // while - return std::make_pair(buf, reinterpret_cast(utf32_output)); + + __m256i first_half = _mm512_extracti64x4_epi64(eight_64bits, 0); + __m256i second_half = _mm512_extracti64x4_epi64(eight_64bits, 1); + answer += (size_t)_mm256_extract_epi64(first_half, 0) + + (size_t)_mm256_extract_epi64(first_half, 1) + + (size_t)_mm256_extract_epi64(first_half, 2) + + (size_t)_mm256_extract_epi64(first_half, 3) + + (size_t)_mm256_extract_epi64(second_half, 0) + + (size_t)_mm256_extract_epi64(second_half, 1) + + (size_t)_mm256_extract_epi64(second_half, 2) + + (size_t)_mm256_extract_epi64(second_half, 3); + } else if (answer > 0) { + for (; i + sizeof(__m512i) <= length; i += sizeof(__m512i)) { + __m512i latin = _mm512_loadu_si512((const __m512i *)(str + i)); + uint64_t non_ascii = _mm512_movepi8_mask(latin); + answer += count_ones(non_ascii); + } + } + return answer + scalar::latin1::utf8_length_from_latin1( + reinterpret_cast(str + i), length - i); } -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -arm_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_out) { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - const char16_t *start = buf; - const char16_t *end = buf + len; +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= length; pos += 64) { + __m512i utf8 = _mm512_loadu_si512((const __m512i *)(input + pos)); + uint64_t utf8_continuation_mask = + _mm512_cmplt_epi8_mask(utf8, _mm512_set1_epi8(-65 + 1)); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + uint64_t utf8_4byte = + _mm512_cmpge_epu8_mask(utf8, _mm512_set1_epi8(int8_t(240))); + count += count_ones(utf8_4byte); + } + return count + + scalar::utf8::utf16_length_from_utf8(input + pos, length - pos); +} - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const char32_t *ptr = input; + size_t count{0}; - while ((end - buf) >= 8) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } + if (length >= 16) { + const char32_t *end = input + length - 16; - const uint16x8_t surrogates_bytemask = - vceqq_u16(vandq_u16(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (vmaxvq_u16(surrogates_bytemask) == 0) { - // case: no surrogate pairs, extend all 16-bit code units to 32-bit code - // units - vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in))); - vst1q_u32(utf32_output + 4, vmovl_high_u16(in)); - utf32_output += 8; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; + const __m512i v_0000_007f = _mm512_set1_epi32((uint32_t)0x7f); + const __m512i v_0000_07ff = _mm512_set1_epi32((uint32_t)0x7ff); + const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff); + + while (ptr <= end) { + __m512i utf32 = _mm512_loadu_si512((const __m512i *)ptr); + ptr += 16; + __mmask16 ascii_bitmask = _mm512_cmple_epu32_mask(utf32, v_0000_007f); + __mmask16 two_bytes_bitmask = _mm512_mask_cmple_epu32_mask( + _knot_mask16(ascii_bitmask), utf32, v_0000_07ff); + __mmask16 three_bytes_bitmask = _mm512_mask_cmple_epu32_mask( + _knot_mask16(_mm512_kor(ascii_bitmask, two_bytes_bitmask)), utf32, + v_0000_ffff); + + size_t ascii_count = count_ones(ascii_bitmask); + size_t two_bytes_count = count_ones(two_bytes_bitmask); + size_t three_bytes_count = count_ones(three_bytes_bitmask); + size_t four_bytes_count = + 16 - ascii_count - two_bytes_count - three_bytes_count; + count += ascii_count + 2 * two_bytes_count + 3 * three_bytes_count + + 4 * four_bytes_count; } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf32_output)); + } + + return count + + scalar::utf32::utf8_length_from_utf32(ptr, length - (ptr - input)); } -/* end file src/arm64/arm_convert_utf16_to_utf32.cpp */ -/* begin file src/arm64/arm_convert_utf16_to_utf8.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const char32_t *ptr = input; + size_t count{0}; + + if (length >= 16) { + const char32_t *end = input + length - 16; + + const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff); - Ad 1. + while (ptr <= end) { + __m512i utf32 = _mm512_loadu_si512((const __m512i *)ptr); + ptr += 16; + __mmask16 surrogates_bitmask = + _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff); - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII - char) or 2) two UTF8 bytes. + count += 16 + count_ones(surrogates_bitmask); + } + } - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. + return count + + scalar::utf32::utf16_length_from_utf32(ptr, length - (ptr - input)); +} - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return implementation::count_utf8(input, length); +} - Ad 2. +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::pair -arm_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char16_t *end = buf + len; +simdutf_warn_unused size_t implementation::base64_length_from_binary( + size_t length, base64_options options) const noexcept { + return scalar::base64::base64_length_from_binary(length, options); +} - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } - if (vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!! - // It is common enough that we have sequences of 16 consecutive ASCII - // characters. - uint16x8_t nextin = - vld1q_u16(reinterpret_cast(buf) + 8); - if (!match_system(big_endian)) { - nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); - } - if (vmaxvq_u16(nextin) > 0x7F) { - // 1. pack the bytes - // obviously suboptimal. - uint8x8_t utf8_packed = vmovn_u16(in); - // 2. store (8 bytes) - vst1_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } else { - // 1. pack the bytes - // obviously suboptimal. - uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin); - // 2. store (16 bytes) - vst1q_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - } +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } +} - if (vmaxvq_u16(in) <= 0x7FF) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); +} // namespace icelake +} // namespace simdutf - // t0 = [000a|aaaa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(in, 2); - // t1 = [000a|aaaa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); - const uint8x16_t utf8_unpacked = - vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); +/* begin file src/simdutf/icelake/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. #else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; +SIMDUTF_UNTARGET_REGION #endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - const uint16x8_t surrogates_bytemask = - vceqq_u16(vandq_u16(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (vmaxvq_u16(surrogates_bytemask) == 0) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t dup_even = simdutf_make_uint16x8_t( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_POP_DISABLE_WARNINGS +#endif // end of workaround +/* end file src/simdutf/icelake/end.h */ +/* end file src/icelake/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_HASWELL +/* begin file src/haswell/implementation.cpp */ + +/* begin file src/simdutf/haswell/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "haswell" +// #define SIMDUTF_IMPLEMENTATION haswell + +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. #else - const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; +SIMDUTF_TARGET_HASWELL #endif - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +// clang-format off +SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) +// clang-format on +#endif // end of workaround +/* end file src/simdutf/haswell/begin.h */ +namespace simdutf { +namespace haswell { +namespace { +#ifndef SIMDUTF_HASWELL_H + #error "haswell.h must be included" +#endif +using namespace simd; - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + return input.reduce_or().is_ascii(); +} - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. +simdutf_unused simdutf_really_inline simd8 +must_be_continuation(const simd8 prev1, const simd8 prev2, + const simd8 prev3) { + simd8 is_second_byte = + prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 + simd8 is_third_byte = + prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = + prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction + // will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > + int8_t(0); +} - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const uint16x8_t t0 = vreinterpretq_u16_u8( - vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = + prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be > 0x80 + simd8 is_fourth_byte = + prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be > 0x80 + return simd8(is_third_byte | is_fourth_byte); +} - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - const uint16x8_t s0 = vshrq_n_u16(in, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - const uint16x8_t s1s = vshlq_n_u16(s1, 2); - // [00bb|bbbb|0000|aaaa] - const uint16x8_t s2 = vorrq_u16(s0, s1s); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); - const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); - const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); - const uint16x8_t m0 = - vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); - const uint16x8_t s4 = veorq_u16(s3, m0); -#undef simdutf_vec +/* begin file src/haswell/avx2_validate_utf16.cpp */ +/* + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - // 4. expand code units 16-bit => 32-bit - const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); - const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t onemask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); - const uint16x8_t twomask = simdutf_make_uint16x8_t( - 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); -#else - const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0100, 0x0400, 0x1000, 0x4000}; - const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, - 0x0200, 0x0800, 0x2000, 0x8000}; -#endif - const uint16x8_t combined = - vorrq_u16(vandq_u16(one_byte_bytemask, onemask), - vandq_u16(one_or_two_bytes_bytemask, twomask)); - const uint16_t mask = vaddvq_u16(combined); - // The following fast path may or may not be beneficial. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); - vst1q_u8(utf8_output, utf8_0); - utf8_output += 12; - vst1q_u8(utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate - vst1q_u8(utf8_output, utf8_0); - utf8_output += row0[0]; - vst1q_u8(utf8_output, utf8_1); - utf8_output += row1[0]; + We're going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) - buf += 8; - // surrogate pair(s) in a register + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ + +/* Returns: + - pointer to the last unprocessed character (a scalar fallback should check + the rest); + - nullptr if an error was detected. +*/ +template +const char16_t *avx2_validate_utf16(const char16_t *input, size_t size) { + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::ELEMENTS * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = simd16(input + simd16::ELEMENTS); + + if (big_endian) { + in0 = in0.swap_bytes(); + in1 = in1.swap_bytes(); + } + + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + + const auto in = simd16::pack(t0, t1); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); + if (surrogates_bitmask == 0x0) { + input += simd16::ELEMENTS * 2; } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher word) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint32_t V = ~surrogates_bitmask; + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint32_t H = vH.to_bitmask(); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint32_t L = ~H & surrogates_bitmask; + + const uint32_t a = + L & (H >> 1); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint32_t b = + a << 1; // Just mark that the opposite fact is hold, + // thanks to that we have only two masks for valid case. + const uint32_t c = V | a | b; // Combine all the masks into the final one. + + if (c == 0xffffffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += simd16::ELEMENTS * 2; + } else if (c == 0x7fffffff) { + // The 31 lower code units of the input register contains valid UTF-16. + // The 31 word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += simd16::ELEMENTS * 2 - 1; + } else { + return nullptr; } - buf += k; } - } // while + } - return std::make_pair(buf, reinterpret_cast(utf8_output)); + return input; } -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ template -std::pair -arm_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char16_t *start = buf; - const char16_t *end = buf + len; +const result avx2_validate_utf16_with_errors(const char16_t *input, + size_t size) { + if (simdutf_unlikely(size == 0)) { + return result(error_code::SUCCESS, 0); + } + const char16_t *start = input; + const char16_t *end = input + size; - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } - if (vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!! - // It is common enough that we have sequences of 16 consecutive ASCII - // characters. - uint16x8_t nextin = - vld1q_u16(reinterpret_cast(buf) + 8); - if (!match_system(big_endian)) { - nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); - } - if (vmaxvq_u16(nextin) > 0x7F) { - // 1. pack the bytes - // obviously suboptimal. - uint8x8_t utf8_packed = vmovn_u16(in); - // 2. store (8 bytes) - vst1_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } else { - // 1. pack the bytes - // obviously suboptimal. - uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin); - // 2. store (16 bytes) - vst1q_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } + while (input + simd16::ELEMENTS * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = simd16(input + simd16::ELEMENTS); + + if (big_endian) { + in0 = in0.swap_bytes(); + in1 = in1.swap_bytes(); } - if (vmaxvq_u16(in) <= 0x7FF) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); - // t0 = [000a|aaaa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(in, 2); - // t1 = [000a|aaaa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); - const uint8x16_t utf8_unpacked = - vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); -#else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; -#endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + const auto in = simd16::pack(t0, t1); - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); + if (surrogates_bitmask == 0x0) { + input += simd16::ELEMENTS * 2; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - const uint16x8_t surrogates_bytemask = - vceqq_u16(vandq_u16(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (vmaxvq_u16(surrogates_bytemask) == 0) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t dup_even = simdutf_make_uint16x8_t( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#else - const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; -#endif - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint32_t V = ~surrogates_bitmask; - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint32_t H = vH.to_bitmask(); - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint32_t L = ~H & surrogates_bitmask; - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. + const uint32_t a = + L & (H >> 1); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint32_t b = + a << 1; // Just mark that the opposite fact is hold, + // thanks to that we have only two masks for valid case. + const uint32_t c = V | a | b; // Combine all the masks into the final one. - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const uint16x8_t t0 = vreinterpretq_u16_u8( - vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + if (c == 0xffffffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += simd16::ELEMENTS * 2; + } else if (c == 0x7fffffff) { + // The 31 lower code units of the input register contains valid UTF-16. + // The 31 word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += simd16::ELEMENTS * 2 - 1; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - const uint16x8_t s0 = vshrq_n_u16(in, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - const uint16x8_t s1s = vshlq_n_u16(s1, 2); - // [00bb|bbbb|0000|aaaa] - const uint16x8_t s2 = vorrq_u16(s0, s1s); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); - const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); - const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); - const uint16x8_t m0 = - vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); - const uint16x8_t s4 = veorq_u16(s3, m0); -#undef simdutf_vec + return result(error_code::SUCCESS, input - start); +} +/* end file src/haswell/avx2_validate_utf16.cpp */ +/* begin file src/haswell/avx2_validate_utf32le.cpp */ +/* Returns: + - pointer to the last unprocessed character (a scalar fallback should check + the rest); + - nullptr if an error was detected. +*/ +const char32_t *avx2_validate_utf32le(const char32_t *input, size_t size) { + const char32_t *end = input + size; - // 4. expand code units 16-bit => 32-bit - const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); - const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + const __m256i standardmax = _mm256_set1_epi32(0x10ffff); + const __m256i offset = _mm256_set1_epi32(0xffff2000); + const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff); + __m256i currentmax = _mm256_setzero_si256(); + __m256i currentoffsetmax = _mm256_setzero_si256(); - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t onemask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); - const uint16x8_t twomask = simdutf_make_uint16x8_t( - 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); -#else - const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0100, 0x0400, 0x1000, 0x4000}; - const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, - 0x0200, 0x0800, 0x2000, 0x8000}; -#endif - const uint16x8_t combined = - vorrq_u16(vandq_u16(one_byte_bytemask, onemask), - vandq_u16(one_or_two_bytes_bytemask, twomask)); - const uint16_t mask = vaddvq_u16(combined); - // The following fast path may or may not be beneficial. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); - vst1q_u8(utf8_output, utf8_0); - utf8_output += 12; - vst1q_u8(utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); + while (input + 8 < end) { + const __m256i in = _mm256_loadu_si256((__m256i *)input); + currentmax = _mm256_max_epu32(in, currentmax); + currentoffsetmax = + _mm256_max_epu32(_mm256_add_epi32(in, offset), currentoffsetmax); + input += 8; + } + __m256i is_zero = + _mm256_xor_si256(_mm256_max_epu32(currentmax, standardmax), standardmax); + if (_mm256_testz_si256(is_zero, is_zero) == 0) { + return nullptr; + } - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + is_zero = _mm256_xor_si256( + _mm256_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); + if (_mm256_testz_si256(is_zero, is_zero) == 0) { + return nullptr; + } - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + return input; +} - vst1q_u8(utf8_output, utf8_0); - utf8_output += row0[0]; - vst1q_u8(utf8_output, utf8_1); - utf8_output += row1[0]; +const result avx2_validate_utf32le_with_errors(const char32_t *input, + size_t size) { + const char32_t *start = input; + const char32_t *end = input + size; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k]) - : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::utf16::swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - reinterpret_cast(utf8_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; + const __m256i standardmax = _mm256_set1_epi32(0x10ffff); + const __m256i offset = _mm256_set1_epi32(0xffff2000); + const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff); + __m256i currentmax = _mm256_setzero_si256(); + __m256i currentoffsetmax = _mm256_setzero_si256(); + + while (input + 8 < end) { + const __m256i in = _mm256_loadu_si256((__m256i *)input); + currentmax = _mm256_max_epu32(in, currentmax); + currentoffsetmax = + _mm256_max_epu32(_mm256_add_epi32(in, offset), currentoffsetmax); + + __m256i is_zero = _mm256_xor_si256( + _mm256_max_epu32(currentmax, standardmax), standardmax); + if (_mm256_testz_si256(is_zero, is_zero) == 0) { + return result(error_code::TOO_LARGE, input - start); } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf8_output)); + is_zero = + _mm256_xor_si256(_mm256_max_epu32(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (_mm256_testz_si256(is_zero, is_zero) == 0) { + return result(error_code::SURROGATE, input - start); + } + input += 8; + } + + return result(error_code::SUCCESS, input - start); } -/* end file src/arm64/arm_convert_utf16_to_utf8.cpp */ +/* end file src/haswell/avx2_validate_utf32le.cpp */ -/* begin file src/arm64/arm_base64.cpp */ -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ +/* begin file src/haswell/avx2_convert_latin1_to_utf8.cpp */ +std::pair +avx2_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_output) { + const char *end = latin1_input + len; + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); + const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); + const size_t safety_margin = 12; -size_t encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // credit: Wojciech Muła - uint8_t *out = (uint8_t *)dst; - constexpr static uint8_t source_table[64] = { - 'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D', - 'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W', - 'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p', - '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8', - 'N', 'd', 't', '9', 'O', 'e', 'u', '+', 'P', 'f', 'v', '/', - }; - constexpr static uint8_t source_table_url[64] = { - 'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D', - 'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W', - 'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p', - '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8', - 'N', 'd', 't', '9', 'O', 'e', 'u', '-', 'P', 'f', 'v', '_', - }; - const uint8x16_t v3f = vdupq_n_u8(0x3f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - // When trying to load a uint8_t array, Visual Studio might - // error with: error C2664: '__n128x4 neon_ld4m_q8(const char *)': - // cannot convert argument 1 from 'const uint8_t [64]' to 'const char * - const uint8x16x4_t table = vld4q_u8( - (reinterpret_cast(options & base64_url) ? source_table_url - : source_table)); -#else - const uint8x16x4_t table = - vld4q_u8((options & base64_url) ? source_table_url : source_table); -#endif - size_t i = 0; - for (; i + 16 * 3 <= srclen; i += 16 * 3) { - const uint8x16x3_t in = vld3q_u8((const uint8_t *)src + i); - uint8x16x4_t result; - result.val[0] = vshrq_n_u8(in.val[0], 2); - result.val[1] = - vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[1], 4), in.val[0], 4), v3f); - result.val[2] = - vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[2], 6), in.val[1], 2), v3f); - result.val[3] = vandq_u8(in.val[2], v3f); - result.val[0] = vqtbl4q_u8(table, result.val[0]); - result.val[1] = vqtbl4q_u8(table, result.val[1]); - result.val[2] = vqtbl4q_u8(table, result.val[2]); - result.val[3] = vqtbl4q_u8(table, result.val[3]); - vst4q_u8(out, result); - out += 64; - } - out += scalar::base64::tail_encode_base64((char *)out, src + i, srclen - i, - options); + while (end - latin1_input >= std::ptrdiff_t(16 + safety_margin)) { + __m128i in8 = _mm_loadu_si128((__m128i *)latin1_input); + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + const __m128i v_80 = _mm_set1_epi8((char)0x80); + if (_mm_testz_si128(in8, v_80)) { // ASCII fast path!!!! + // 1. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, in8); + // 2. adjust pointers + latin1_input += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // We proceed only with the first 16 bytes. + const __m256i in = _mm256_cvtepu8_epi16((in8)); + + // 1. prepare 2-byte values + // input 16-bit word : [0000|0000|aabb|bbbb] x 8 + // expected output : [1100|00aa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + + // t0 = [0000|00aa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in, 2); + // t1 = [0000|00aa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [1100|00aa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); - return size_t((char *)out - dst); -} + // 2. merge ASCII and 2-byte codewords -static inline void compress(uint8x16_t data, uint16_t mask, char *output) { - if (mask == 0) { - vst1q_u8((uint8_t *)output, data); - return; - } - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - uint64x2_t compactmasku64 = {tables::base64::thintable_epi8[mask1], - tables::base64::thintable_epi8[mask2]}; - uint8x16_t compactmask = vreinterpretq_u8_u64(compactmasku64); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t off = - simdutf_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); -#else - const uint8x16_t off = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8}; -#endif + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - compactmask = vaddq_u8(compactmask, off); - uint8x16_t pruned = vqtbl1q_u8(data, compactmask); + const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in, one_byte_bytemask); - int pop1 = tables::base64::BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - compactmask = vld1q_u8(tables::base64::pshufb_combine_table + pop1 * 8); - uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); - vst1q_u8((uint8_t *)output, answer); + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes + + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> 16)] + [0]; + + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + latin1_input += 16; + continue; + + } // while + return std::make_pair(latin1_input, utf8_output); } +/* end file src/haswell/avx2_convert_latin1_to_utf8.cpp */ +/* begin file src/haswell/avx2_convert_latin1_to_utf16.cpp */ +template +std::pair +avx2_convert_latin1_to_utf16(const char *latin1_input, size_t len, + char16_t *utf16_output) { + size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 32 -struct block64 { - uint8x16_t chunks[4]; -}; + size_t i = 0; + for (; i < rounded_len; i += 16) { + // Load 16 bytes from the address (input + i) into a xmm register + __m128i xmm0 = + _mm_loadu_si128(reinterpret_cast(latin1_input + i)); -static_assert(sizeof(block64) == 64, "block64 is not 64 bytes"); -template uint64_t to_base64_mask(block64 *b, bool *error) { - uint8x16_t v0f = vdupq_n_u8(0xf); + // Zero extend each byte in xmm0 to word and put it in another xmm register + __m128i xmm1 = _mm_cvtepu8_epi16(xmm0); - uint8x16_t underscore0, underscore1, underscore2, underscore3; - if (base64_url) { - underscore0 = vceqq_u8(b->chunks[0], vdupq_n_u8(0x5f)); - underscore1 = vceqq_u8(b->chunks[1], vdupq_n_u8(0x5f)); - underscore2 = vceqq_u8(b->chunks[2], vdupq_n_u8(0x5f)); - underscore3 = vceqq_u8(b->chunks[3], vdupq_n_u8(0x5f)); - } else { - (void)underscore0; - (void)underscore1; - (void)underscore2; - (void)underscore3; - } + // Shift xmm0 to the right by 8 bytes + xmm0 = _mm_srli_si128(xmm0, 8); - uint8x16_t lo_nibbles0 = vandq_u8(b->chunks[0], v0f); - uint8x16_t lo_nibbles1 = vandq_u8(b->chunks[1], v0f); - uint8x16_t lo_nibbles2 = vandq_u8(b->chunks[2], v0f); - uint8x16_t lo_nibbles3 = vandq_u8(b->chunks[3], v0f); + // Zero extend each byte in the shifted xmm0 to word in xmm0 + xmm0 = _mm_cvtepu8_epi16(xmm0); - // Needed by the decoding step. - uint8x16_t hi_nibbles0 = vshrq_n_u8(b->chunks[0], 4); - uint8x16_t hi_nibbles1 = vshrq_n_u8(b->chunks[1], 4); - uint8x16_t hi_nibbles2 = vshrq_n_u8(b->chunks[2], 4); - uint8x16_t hi_nibbles3 = vshrq_n_u8(b->chunks[3], 4); - uint8x16_t lut_lo; -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - if (base64_url) { - lut_lo = - simdutf_make_uint8x16_t(0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, - 0x70, 0x61, 0xe1, 0xf4, 0xe5, 0xa5, 0xf4, 0xf4); - } else { - lut_lo = - simdutf_make_uint8x16_t(0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, - 0x70, 0x61, 0xe1, 0xb4, 0xe5, 0xe5, 0xf4, 0xb4); - } -#else - if (base64_url) { - lut_lo = uint8x16_t{0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, - 0x70, 0x61, 0xe1, 0xf4, 0xe5, 0xa5, 0xf4, 0xf4}; - } else { - lut_lo = uint8x16_t{0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, - 0x70, 0x61, 0xe1, 0xb4, 0xe5, 0xe5, 0xf4, 0xb4}; - } -#endif - uint8x16_t lo0 = vqtbl1q_u8(lut_lo, lo_nibbles0); - uint8x16_t lo1 = vqtbl1q_u8(lut_lo, lo_nibbles1); - uint8x16_t lo2 = vqtbl1q_u8(lut_lo, lo_nibbles2); - uint8x16_t lo3 = vqtbl1q_u8(lut_lo, lo_nibbles3); - uint8x16_t lut_hi; -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - if (base64_url) { - lut_hi = - simdutf_make_uint8x16_t(0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20); - } else { - lut_hi = - simdutf_make_uint8x16_t(0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20); - } -#else - if (base64_url) { - lut_hi = uint8x16_t{0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}; - } else { - lut_hi = uint8x16_t{0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}; - } -#endif - uint8x16_t hi0 = vqtbl1q_u8(lut_hi, hi_nibbles0); - uint8x16_t hi1 = vqtbl1q_u8(lut_hi, hi_nibbles1); - uint8x16_t hi2 = vqtbl1q_u8(lut_hi, hi_nibbles2); - uint8x16_t hi3 = vqtbl1q_u8(lut_hi, hi_nibbles3); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + xmm0 = _mm_shuffle_epi8(xmm0, swap); + xmm1 = _mm_shuffle_epi8(xmm1, swap); + } - if (base64_url) { - hi0 = vbicq_u8(hi0, underscore0); - hi1 = vbicq_u8(hi1, underscore1); - hi2 = vbicq_u8(hi2, underscore2); - hi3 = vbicq_u8(hi3, underscore3); - } + // Store the contents of xmm1 into the address pointed by (output + i) + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + i), xmm1); - uint8_t checks = - vmaxvq_u8(vorrq_u8(vorrq_u8(vandq_u8(lo0, hi0), vandq_u8(lo1, hi1)), - vorrq_u8(vandq_u8(lo2, hi2), vandq_u8(lo3, hi3)))); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = - simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - uint64_t badcharmask = 0; - *error = checks > 0x3; - if (checks) { - // Add each of the elements next to each other, successively, to stuff each - // 8 byte mask into one. - uint8x16_t test0 = vtstq_u8(lo0, hi0); - uint8x16_t test1 = vtstq_u8(lo1, hi1); - uint8x16_t test2 = vtstq_u8(lo2, hi2); - uint8x16_t test3 = vtstq_u8(lo3, hi3); - uint8x16_t sum0 = - vpaddq_u8(vandq_u8(test0, bit_mask), vandq_u8(test1, bit_mask)); - uint8x16_t sum1 = - vpaddq_u8(vandq_u8(test2, bit_mask), vandq_u8(test3, bit_mask)); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - badcharmask = vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); - } - // This is the transformation step that can be done while we are waiting for - // sum0 - uint8x16_t roll_lut; -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - if (base64_url) { - roll_lut = - simdutf_make_uint8x16_t(0xe0, 0x11, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0); - } else { - roll_lut = - simdutf_make_uint8x16_t(0x0, 0x10, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0); - } -#else - if (base64_url) { - roll_lut = uint8x16_t{0xe0, 0x11, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; - } else { - roll_lut = uint8x16_t{0x0, 0x10, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; - } -#endif - uint8x16_t vsecond_last = base64_url ? vdupq_n_u8(0x2d) : vdupq_n_u8(0x2f); - if (base64_url) { - hi_nibbles0 = vbicq_u8(hi_nibbles0, underscore0); - hi_nibbles1 = vbicq_u8(hi_nibbles1, underscore1); - hi_nibbles2 = vbicq_u8(hi_nibbles2, underscore2); - hi_nibbles3 = vbicq_u8(hi_nibbles3, underscore3); + // Store the contents of xmm0 into the address pointed by (output + i + 8) + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + i + 8), xmm0); } - uint8x16_t roll0 = vqtbl1q_u8( - roll_lut, vaddq_u8(vceqq_u8(b->chunks[0], vsecond_last), hi_nibbles0)); - uint8x16_t roll1 = vqtbl1q_u8( - roll_lut, vaddq_u8(vceqq_u8(b->chunks[1], vsecond_last), hi_nibbles1)); - uint8x16_t roll2 = vqtbl1q_u8( - roll_lut, vaddq_u8(vceqq_u8(b->chunks[2], vsecond_last), hi_nibbles2)); - uint8x16_t roll3 = vqtbl1q_u8( - roll_lut, vaddq_u8(vceqq_u8(b->chunks[3], vsecond_last), hi_nibbles3)); - b->chunks[0] = vaddq_u8(b->chunks[0], roll0); - b->chunks[1] = vaddq_u8(b->chunks[1], roll1); - b->chunks[2] = vaddq_u8(b->chunks[2], roll2); - b->chunks[3] = vaddq_u8(b->chunks[3], roll3); - return badcharmask; -} -void copy_block(block64 *b, char *output) { - vst1q_u8((uint8_t *)output, b->chunks[0]); - vst1q_u8((uint8_t *)output + 16, b->chunks[1]); - vst1q_u8((uint8_t *)output + 32, b->chunks[2]); - vst1q_u8((uint8_t *)output + 48, b->chunks[3]); + return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len); } +/* end file src/haswell/avx2_convert_latin1_to_utf16.cpp */ +/* begin file src/haswell/avx2_convert_latin1_to_utf32.cpp */ +std::pair +avx2_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + size_t rounded_len = ((len | 7) ^ 7); // Round down to nearest multiple of 8 -uint64_t compress_block(block64 *b, uint64_t mask, char *output) { - uint64_t popcounts = - vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); - uint64_t offsets = popcounts * 0x0101010101010101; - compress(b->chunks[0], uint16_t(mask), output); - compress(b->chunks[1], uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF]); - compress(b->chunks[2], uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF]); - compress(b->chunks[3], uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF]); - return offsets >> 56; -} + for (size_t i = 0; i < rounded_len; i += 8) { + // Load 8 Latin1 characters into a 64-bit register + __m128i in = _mm_loadl_epi64((__m128i *)&buf[i]); -// The caller of this function is responsible to ensure that there are 64 bytes -// available from reading at src. The data is read into a block64 structure. -void load_block(block64 *b, const char *src) { - b->chunks[0] = vld1q_u8(reinterpret_cast(src)); - b->chunks[1] = vld1q_u8(reinterpret_cast(src) + 16); - b->chunks[2] = vld1q_u8(reinterpret_cast(src) + 32); - b->chunks[3] = vld1q_u8(reinterpret_cast(src) + 48); -} + // Zero extend each set of 8 Latin1 characters to 8 32-bit integers using + // vpmovzxbd + __m256i out = _mm256_cvtepu8_epi32(in); -// The caller of this function is responsible to ensure that there are 32 bytes -// available from reading at data. It returns a 16-byte value, narrowing with -// saturation the 16-bit words. -inline uint8x16_t load_satured(const uint16_t *data) { - uint16x8_t in1 = vld1q_u16(data); - uint16x8_t in2 = vld1q_u16(data + 8); - return vqmovn_high_u16(vqmovn_u16(in1), in2); -} + // Store the results back to memory + _mm256_storeu_si256((__m256i *)&utf32_output[i], out); + } -// The caller of this function is responsible to ensure that there are 128 bytes -// available from reading at src. The data is read into a block64 structure. -void load_block(block64 *b, const char16_t *src) { - b->chunks[0] = load_satured(reinterpret_cast(src)); - b->chunks[1] = load_satured(reinterpret_cast(src) + 16); - b->chunks[2] = load_satured(reinterpret_cast(src) + 32); - b->chunks[3] = load_satured(reinterpret_cast(src) + 48); + // return pointers pointing to where we left off + return std::make_pair(buf + rounded_len, utf32_output + rounded_len); } +/* end file src/haswell/avx2_convert_latin1_to_utf32.cpp */ -// decode 64 bytes and output 48 bytes -void base64_decode_block(char *out, const char *src) { - uint8x16x4_t str = vld4q_u8((uint8_t *)src); - uint8x16x3_t outvec; - outvec.val[0] = - vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4)); - outvec.val[1] = - vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2)); - outvec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]); - vst3q_u8((uint8_t *)out, outvec); -} +/* begin file src/haswell/avx2_convert_utf8_to_utf16.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" -template -full_result -compress_decode_base64(char *dst, const char_type *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equalsigns = 0; - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; +// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m128i in = _mm_loadu_si128((__m128i *)input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + __m256i ascii = _mm256_cvtepu8_epi16(in); + if (big_endian) { + const __m256i swap256 = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + ascii = _mm256_shuffle_epi8(ascii, swap256); } + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf16_output), ascii); + utf16_output += 12; // We wrote 12 16-bit characters. + return 12; // We consumed 12 bytes. } - if (srclen == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation, 0}; + if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte + // UTF-16 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + if (big_endian) + composed = _mm_shuffle_epi8(composed, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed); + utf16_output += 8; // We wrote 16 bytes, 8 code points. + return 16; + } + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + __m128i composed_repacked = _mm_packus_epi32(composed, composed); + if (big_endian) + composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); + utf16_output += 4; + return 12; + } + + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const __m128i sh = _mm_loadu_si128( + (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + if (big_endian) + composed = _mm_shuffle_epi8(composed, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed); + utf16_output += 6; // We wrote 12 bytes, 6 code points. There is a potential + // overflow of 4 bytes. + } else if (idx < 145) { + // FOUR (4) input code-code units + const __m128i sh = _mm_loadu_si128( + (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + __m128i composed_repacked = _mm_packus_epi32(composed, composed); + if (big_endian) + composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); + _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); + utf16_output += 4; // Here we overflow by 8 bytes. + } else if (idx < 209) { + // TWO (2) input code-code units + ////////////// + // There might be garbage inputs where a leading byte mascarades as a + // four-byte leading byte (by being followed by 3 continuation byte), but is + // not greater than 0xf0. This could trigger a buffer overflow if we only + // counted leading bytes of the form 0xf0 as generating surrogate pairs, + // without further UTF-8 validation. Thus we must be careful to ensure that + // only leading bytes at least as large as 0xf0 generate surrogate pairs. We + // do as at the cost of an extra mask. + ///////////// + const __m128i sh = _mm_loadu_si128( + (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); + const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); + // correct for spurious high bit + const __m128i correct = + _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); + middlehighbyte = _mm_xor_si128(correct, middlehighbyte); + const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); + // We deliberately carry the leading four bits in highbyte if they are + // present, we remove them later when computing hightenbits. + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); + // When we need to generate a surrogate pair (leading byte > 0xF0), then + // the corresponding 32-bit value in 'composed' will be greater than + // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the + // location of the surrogate pairs. + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), + _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); + const __m128i composedminus = + _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); + const __m128i lowtenbits = + _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); + // Notice the 0x3ff mask: + const __m128i hightenbits = + _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); + const __m128i lowtenbitsadd = + _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); + const __m128i hightenbitsadd = + _mm_add_epi32(hightenbits, _mm_set1_epi32(0xD800)); + const __m128i lowtenbitsaddshifted = _mm_slli_epi32(lowtenbitsadd, 16); + __m128i surrogates = _mm_or_si128(hightenbitsadd, lowtenbitsaddshifted); + uint32_t basic_buffer[4]; + uint32_t basic_buffer_swap[4]; + if (big_endian) { + _mm_storeu_si128((__m128i *)basic_buffer_swap, + _mm_shuffle_epi8(composed, swap)); + surrogates = _mm_shuffle_epi8(surrogates, swap); } - return {SUCCESS, 0, 0}; - } - const char_type *const srcinit = src; - const char *const dstinit = dst; - const char_type *const srcend = src + srclen; - - constexpr size_t block_size = 10; - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const char_type *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b; - load_block(&b, src); - src += 64; - bool error = false; - uint64_t badcharmask = to_base64_mask(&b, &error); - if (badcharmask) { - if (error) { - src -= 64; - while (src < srcend && scalar::base64::is_eight_byte(*src) && - to_base64[uint8_t(*src)] <= 64) { - src++; - } - if (src < srcend) { - // should never happen - } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - } - - if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); + _mm_storeu_si128((__m128i *)basic_buffer, composed); + uint32_t surrogate_buffer[4]; + _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); + for (size_t i = 0; i < 3; i++) { + if (basic_buffer[i] > 0x3c00000) { + utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); + utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); + utf16_output += 2; } else { - // optimization opportunity: if bufferptr == buffer and mask == 0, we - // can avoid the call to compress_block and decode directly. - copy_block(&b, bufferptr); - bufferptr += 64; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 1); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } - } - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if (!scalar::base64::is_eight_byte(*src) || val > 64) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; + utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) + : uint16_t(basic_buffer[i]); + utf16_output++; } - bufferptr += (val <= 63); - src++; } + } else { + // here we know that there is an error but we do not handle errors } + return consumed; +} +/* end file src/haswell/avx2_convert_utf8_to_utf16.cpp */ +/* begin file src/haswell/avx2_convert_utf8_to_utf32.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - base64_decode_block(dst, buffer_start); - dst += 48; +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + const __m128i in = _mm_loadu_si128((__m128i *)input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), + _mm256_cvtepu8_epi32(in)); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output + 8), + _mm256_cvtepu8_epi32(_mm_srli_si128(in, 8))); + utf32_output += 12; // We wrote 12 32-bit characters. + return 12; // We consumed 12 bytes. } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::utf32::swap_bytes(triple); - std::memcpy(dst, &triple, 4); - - dst += 3; - buffer_start += 4; - } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::utf32::swap_bytes(triple); - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - src--; - leftover--; - } + if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { + // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte + // UTF-32 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + _mm256_storeu_si256((__m256i *)utf32_output, + _mm256_cvtepu16_epi32(composed)); + utf32_output += 8; // We wrote 16 bytes, 8 code points. + return 16; } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - r.input_count += size_t(src - srcinit); - return r; - } else { - r.output_count += size_t(dst - dstinit); - } - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } - } - return r; + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. There is probably a more efficient sequence, but the + // following might do. + const __m128i sh = + _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + _mm_storeu_si128((__m128i *)utf32_output, composed); + utf32_output += 4; + return 12; } - if (equalsigns > 0) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } + /// We do not have a fast path available, so we fallback. + + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + if (idx < 64) { + // SIX (6) input code-code units + // this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small + // lookup table. + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + _mm256_storeu_si256((__m256i *)utf32_output, + _mm256_cvtepu16_epi32(composed)); + utf32_output += 6; // We wrote 24 bytes, 6 code points. There is a potential + // overflow of 32 - 24 = 8 bytes. + } else if (idx < 145) { + // FOUR (4) input code-code units + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = + _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits + const __m128i middlebyte = + _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + const __m128i highbyte = + _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); + _mm_storeu_si128((__m128i *)utf32_output, composed); + utf32_output += 4; + } else if (idx < 209) { + // TWO (2) input code-code units + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); + const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); + const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); + __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); + // correct for spurious high bit + const __m128i correct = + _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); + middlehighbyte = _mm_xor_si128(correct, middlehighbyte); + const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); + const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); + const __m128i composed = + _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), + _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); + _mm_storeu_si128((__m128i *)utf32_output, composed); + utf32_output += + 3; // We wrote 3 * 4 bytes, there is a potential overflow of 4 bytes. + } else { + // here we know that there is an error but we do not handle errors } - return {SUCCESS, srclen, size_t(dst - dstinit)}; + return consumed; } -/* end file src/arm64/arm_base64.cpp */ -/* begin file src/arm64/arm_convert_utf32_to_latin1.cpp */ -std::pair -arm_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *end = buf + len; - while (end - buf >= 8) { - uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf)); - uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf + 4)); +/* end file src/haswell/avx2_convert_utf8_to_utf32.cpp */ - uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2)); - if (vmaxvq_u16(utf16_packed) <= 0xff) { - // 1. pack the bytes - uint8x8_t latin1_packed = vmovn_u16(utf16_packed); - // 2. store (8 bytes) - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - // 3. adjust pointers - buf += 8; - latin1_output += 8; +/* begin file src/haswell/avx2_convert_utf16_to_latin1.cpp */ +template +std::pair +avx2_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (end - buf >= 16) { + // Load 16 UTF-16 characters into 256-bit AVX2 register + __m256i in = _mm256_loadu_si256(reinterpret_cast(buf)); + + if (!match_system(big_endian)) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); + } + + __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00); + if (_mm256_testz_si256(in, high_byte_mask)) { + // Pack 16-bit characters into 8-bit and store in latin1_output + __m128i lo = _mm256_extractf128_si256(in, 0); + __m128i hi = _mm256_extractf128_si256(in, 1); + __m128i latin1_packed_lo = _mm_packus_epi16(lo, lo); + __m128i latin1_packed_hi = _mm_packus_epi16(hi, hi); + _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), + latin1_packed_lo); + _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output + 8), + latin1_packed_hi); + // Adjust pointers for next iteration + buf += 16; + latin1_output += 16; } else { return std::make_pair(nullptr, reinterpret_cast(latin1_output)); } @@ -15943,658 +29074,335 @@ arm_convert_utf32_to_latin1(const char32_t *buf, size_t len, return std::make_pair(buf, latin1_output); } -std::pair -arm_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *start = buf; - const char32_t *end = buf + len; - - while (end - buf >= 8) { - uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf)); - uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf + 4)); - - uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2)); - - if (vmaxvq_u16(utf16_packed) <= 0xff) { - // 1. pack the bytes - uint8x8_t latin1_packed = vmovn_u16(utf16_packed); - // 2. store (8 bytes) - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - // 3. adjust pointers - buf += 8; - latin1_output += 8; - } else { - // Let us do a scalar fallback. - for (int k = 0; k < 8; k++) { - uint32_t word = buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/arm64/arm_convert_utf32_to_latin1.cpp */ -/* begin file src/arm64/arm_convert_utf32_to_utf16.cpp */ template -std::pair -arm_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_out) { - uint16_t *utf16_output = reinterpret_cast(utf16_out); - const char32_t *end = buf + len; - - uint16x4_t forbidden_bytemask = vmov_n_u16(0x0); - - while (end - buf >= 4) { - uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); - - // Check if no bits set above 16th - if (vmaxvq_u32(in) <= 0xFFFF) { - uint16x4_t utf16_packed = vmovn_u32(in); - - const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800); - const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff); - forbidden_bytemask = vorr_u16(vand_u16(vcle_u16(utf16_packed, v_dfff), - vcge_u16(utf16_packed, v_d800)), - forbidden_bytemask); +std::pair +avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + while (end - buf >= 16) { + __m256i in = _mm256_loadu_si256(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - utf16_packed = - vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(utf16_packed))); - } - vst1_u16(utf16_output, utf16_packed); - utf16_output += 4; - buf += 4; - } else { - size_t forward = 3; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = - uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; + if (!match_system(big_endian)) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); } - } - - // check for invalid input - if (vmaxv_u16(forbidden_bytemask) != 0) { - return std::make_pair(nullptr, reinterpret_cast(utf16_output)); - } - - return std::make_pair(buf, reinterpret_cast(utf16_output)); -} - -template -std::pair -arm_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_out) { - uint16_t *utf16_output = reinterpret_cast(utf16_out); - const char32_t *start = buf; - const char32_t *end = buf + len; - - while (end - buf >= 4) { - uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); - // Check if no bits set above 16th - if (vmaxvq_u32(in) <= 0xFFFF) { - uint16x4_t utf16_packed = vmovn_u32(in); - - const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800); - const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff); - const uint16x4_t forbidden_bytemask = vand_u16( - vcle_u16(utf16_packed, v_dfff), vcge_u16(utf16_packed, v_d800)); - if (vmaxv_u16(forbidden_bytemask) != 0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf16_output)); - } - - if (!match_system(big_endian)) { - utf16_packed = - vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(utf16_packed))); - } - vst1_u16(utf16_output, utf16_packed); - utf16_output += 4; - buf += 4; - } else { - size_t forward = 3; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); + __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00); + if (_mm256_testz_si256(in, high_byte_mask)) { + __m128i lo = _mm256_extractf128_si256(in, 0); + __m128i hi = _mm256_extractf128_si256(in, 1); + __m128i latin1_packed_lo = _mm_packus_epi16(lo, lo); + __m128i latin1_packed_hi = _mm_packus_epi16(hi, hi); + _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), + latin1_packed_lo); + _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output + 8), + latin1_packed_hi); + buf += 16; + latin1_output += 16; + } else { + // Fallback to scalar code for handling errors + for (int k = 0; k < 16; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if (word <= 0xff) { + *latin1_output++ = char(word); } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = - uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); + return std::make_pair( + result{error_code::TOO_LARGE, (size_t)(buf - start + k)}, + latin1_output); } } - buf += k; + buf += 16; } - } - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf16_output)); + } // while + return std::make_pair(result{error_code::SUCCESS, (size_t)(buf - start)}, + latin1_output); } -/* end file src/arm64/arm_convert_utf32_to_utf16.cpp */ -/* begin file src/arm64/arm_convert_utf32_to_utf8.cpp */ -std::pair -arm_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char32_t *end = buf + len; - - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - - uint16x8_t forbidden_bytemask = vmovq_n_u16(0x0); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (buf + 16 + safety_margin < end) { - uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); - uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf + 4)); - - // Check if no bits set above 16th - if (vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) { - // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp) - uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin)); - if (vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - uint8x8_t utf8_packed = vmovn_u16(utf16_packed); - // 2. store (8 bytes) - vst1_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - continue; // we are done for this round! - } - - if (vmaxvq_u16(utf16_packed) <= 0x7FF) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); +/* end file src/haswell/avx2_convert_utf16_to_latin1.cpp */ +/* begin file src/haswell/avx2_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. - // t0 = [000a|aaaa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2); - // t1 = [000a|aaaa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); - const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16( - vbslq_u16(one_byte_bytemask, utf16_packed, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); -#else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; -#endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); + Ad 1. - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } else { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff); - forbidden_bytemask = - vorrq_u16(vandq_u16(vcleq_u16(utf16_packed, v_dfff), - vcgeq_u16(utf16_packed, v_d800)), - forbidden_bytemask); + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t dup_even = simdutf_make_uint16x8_t( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#else - const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; -#endif - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. + Ad 2. - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const uint16x8_t t0 = - vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), - vreinterpretq_u8_u16(dup_even))); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - const uint16x8_t s1 = - vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - const uint16x8_t s1s = vshlq_n_u16(s1, 2); - // [00bb|bbbb|0000|aaaa] - const uint16x8_t s2 = vorrq_u16(s0, s1s); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); - const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); - const uint16x8_t one_or_two_bytes_bytemask = - vcleq_u16(utf16_packed, v_07ff); - const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), - one_or_two_bytes_bytemask); - const uint16x8_t s4 = veorq_u16(s3, m0); -#undef simdutf_vec + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. - // 4. expand code units 16-bit => 32-bit - const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); - const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t onemask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); - const uint16x8_t twomask = simdutf_make_uint16x8_t( - 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); -#else - const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0100, 0x0400, 0x1000, 0x4000}; - const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, - 0x0200, 0x0800, 0x2000, 0x8000}; -#endif - const uint16x8_t combined = - vorrq_u16(vandq_u16(one_byte_bytemask, onemask), - vandq_u16(one_or_two_bytes_bytemask, twomask)); - const uint16_t mask = vaddvq_u16(combined); - // The following fast path may or may not be beneficial. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); - vst1q_u8(utf8_output, utf8_0); - utf8_output += 12; - vst1q_u8(utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ - vst1q_u8(utf8_output, utf8_0); - utf8_output += row0[0]; - vst1q_u8(utf8_output, utf8_1); - utf8_output += row1[0]; +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +avx2_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { + const char16_t *end = buf + len; + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); + const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 - buf += 8; - } - // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes. - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); } - } // while + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); + if (_mm256_testz_si256(in, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in), _mm256_extractf128_si256(in, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - // check for invalid input - if (vmaxvq_u16(forbidden_bytemask) != 0) { - return std::make_pair(nullptr, reinterpret_cast(utf8_output)); - } - return std::make_pair(buf, reinterpret_cast(utf8_output)); -} + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { -std::pair -arm_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char32_t *start = buf; - const char32_t *end = buf + len; + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); - while (buf + 16 + safety_margin < end) { - uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); - uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf + 4)); + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in, one_byte_bytemask); - // Check if no bits set above 16th - if (vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) { - // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp) - uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin)); - if (vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - uint8x8_t utf8_packed = vmovn_u16(utf16_packed); - // 2. store (8 bytes) - vst1_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - continue; // we are done for this round! - } + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes - if (vmaxvq_u16(utf16_packed) <= 0x7FF) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; - // t0 = [000a|aaaa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2); - // t1 = [000a|aaaa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); - const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16( - vbslq_u16(one_byte_bytemask, utf16_packed, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); -#else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; -#endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } else { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + // 6. adjust pointers + buf += 16; + continue; + } + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m256i surrogates_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); - // check for invalid input - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff); - const uint16x8_t forbidden_bytemask = vandq_u16( - vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800)); - if (vmaxvq_u16(forbidden_bytemask) != 0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf8_output)); - } + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint32_t surrogates_bitmask = + static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x00000000) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t dup_even = simdutf_make_uint16x8_t( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#else - const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; -#endif - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const uint16x8_t t0 = - vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), - vreinterpretq_u8_u16(dup_even))); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - const uint16x8_t s1 = - vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - const uint16x8_t s1s = vshlq_n_u16(s1, 2); - // [00bb|bbbb|0000|aaaa] - const uint16x8_t s2 = vorrq_u16(s0, s1s); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); - const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); - const uint16x8_t one_or_two_bytes_bytemask = - vcleq_u16(utf16_packed, v_07ff); - const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), - one_or_two_bytes_bytemask); - const uint16x8_t s4 = veorq_u16(s3, m0); + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); #undef simdutf_vec - // 4. expand code units 16-bit => 32-bit - const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); - const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t onemask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); - const uint16x8_t twomask = simdutf_make_uint16x8_t( - 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); -#else - const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0100, 0x0400, 0x1000, 0x4000}; - const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, - 0x0200, 0x0800, 0x2000, 0x8000}; -#endif - const uint16x8_t combined = - vorrq_u16(vandq_u16(one_byte_bytemask, onemask), - vandq_u16(one_or_two_bytes_bytemask, twomask)); - const uint16_t mask = vaddvq_u16(combined); - // The following fast path may or may not be beneficial. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); - vst1q_u8(utf8_output, utf8_0); - utf8_output += 12; - vst1q_u8(utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - vst1q_u8(utf8_output, utf8_0); - utf8_output += row0[0]; - vst1q_u8(utf8_output, utf8_1); - utf8_output += row1[0]; + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); - buf += 8; - } - // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes. + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + // surrogate pair(s) in a register } else { // Let us do a scalar fallback. // It may seem wasteful to use scalar code, but being efficient with SIMD @@ -16605,8685 +29413,8411 @@ arm_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { + uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { + } else if ((word & 0xF800) == 0) { *utf8_output++ = char((word >> 6) | 0b11000000); *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), - reinterpret_cast(utf8_output)); - } + } else if ((word & 0xF800) != 0xD800) { *utf8_output++ = char((word >> 12) | 0b11100000); *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); *utf8_output++ = char((word & 0b111111) | 0b10000000); } else { - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), - reinterpret_cast(utf8_output)); + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, utf8_output); } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); } } buf += k; } } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf8_output)); -} -/* end file src/arm64/arm_convert_utf32_to_utf8.cpp */ - -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* begin file src/generic/buf_block_reader.h */ -namespace simdutf { -namespace arm64 { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with -// spaces -template struct buf_block_reader { -public: - simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdutf_really_inline size_t block_index(); - simdutf_really_inline bool has_full_block() const; - simdutf_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 - * (in which case this function fills the buffer with spaces and returns 0. In - * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder - * block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdutf_really_inline size_t get_remainder(uint8_t *dst) const; - simdutf_really_inline void advance(); - -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text_64(const uint8_t *text) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text(const simd8x64 &in) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - in.store(reinterpret_cast(buf)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - if (buf[i] < ' ') { - buf[i] = '_'; - } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdutf_unused static char *format_mask(uint64_t mask) { - static char *buf = reinterpret_cast(malloc(64 + 1)); - for (size_t i = 0; i < 64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdutf_really_inline -buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) - : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, - idx{0} {} - -template -simdutf_really_inline size_t buf_block_reader::block_index() { - return idx; -} - -template -simdutf_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdutf_really_inline const uint8_t * -buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdutf_really_inline size_t -buf_block_reader::get_remainder(uint8_t *dst) const { - if (len == idx) { - return 0; - } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, - STEP_SIZE); // std::memset STEP_SIZE because it is more efficient - // to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdutf_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/buf_block_reader.h */ -/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_validation { - -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = {255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 0b11110000u - 1, - 0b11100000u - 1, - 0b11000000u - 1}; - const simd8 max_value( - &max_array[sizeof(max_array) - sizeof(simd8)]); - return input.gt_bits(max_value); + return std::make_pair(buf, utf8_output); } -struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error |= this->prev_incomplete; - } + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); + const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 - simdutf_really_inline void check_next_input(const simd8x64 &input) { - if (simdutf_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = - is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); } - } + // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes + const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); + if (_mm256_testz_si256(in, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in), _mm256_extractf128_si256(in, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { -}; // struct utf8_checker -} // namespace utf8_validation + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); -using utf8_validation::utf8_checker; + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -/* begin file src/generic/utf8_validation/utf8_validator.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_validation { + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in, one_byte_bytemask); -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return !c.errors(); -} + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes -bool generic_validate_utf8(const char *input, size_t length) { - return generic_validate_utf8( - reinterpret_cast(input), length); -} + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; -/** - * Validates that the string is actual UTF-8 and stops on errors. - */ -template -result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input + count), length - count); - res.count += count; - return res; + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; + + // 6. adjust pointers + buf += 16; + continue; } - reader.advance(); - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input) + count, length - count); - res.count += count; - return res; - } else { - return result(error_code::SUCCESS, length); - } -} + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m256i surrogates_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); -result generic_validate_utf8_with_errors(const char *input, size_t length) { - return generic_validate_utf8_with_errors( - reinterpret_cast(input), length); -} + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint32_t surrogates_bitmask = + static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x00000000) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -template -bool generic_validate_ascii(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); - uint8_t blocks[64]{}; - simd::simd8x64 running_or(blocks); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - running_or |= in; - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - running_or |= in; - return running_or.is_ascii(); -} + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes -bool generic_validate_ascii(const char *input, size_t length) { - return generic_validate_ascii( - reinterpret_cast(input), length); -} + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. -template -result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } - reader.advance(); + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } else { - return result(error_code::SUCCESS, length); - } -} + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. -result generic_validate_ascii_with_errors(const char *input, size_t length) { - return generic_validate_ascii_with_errors( - reinterpret_cast(input), length); -} + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); -} // namespace utf8_validation -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -// transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_utf16 { -using namespace simd; + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); + + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + utf8_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); +} +/* end file src/haswell/avx2_convert_utf16_to_utf8.cpp */ +/* begin file src/haswell/avx2_convert_utf16_to_utf32.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. + + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, + Ad 1. - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it is an ASCII + char) or 2) two UTF8 bytes. - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; + Ad 2. - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. - template - simdutf_really_inline size_t convert(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = scalar::utf8_to_utf16::convert( - in + pos, size - pos, utf16_output); - if (howmany == 0) { - return 0; - } - utf16_output += howmany; - } - return utf16_output - start; - } + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. - template - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. + + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. + + + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ + +/* + Returns a pair: the first unprocessed byte from buf and utf32_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +avx2_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *end = buf + len; + const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); + + while (end - buf >= 16) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. + + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m256i surrogates_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); + + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint32_t surrogates_bitmask = + static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x00000000) { + // case: we extend all sixteen 16-bit code units to sixteen 32-bit code + // units + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), + _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in))); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(utf32_output + 8), + _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in, 1))); + utf32_output += 16; + buf += 16; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf16_output += res.count; + for (; k < forward; k++) { + uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, utf32_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } } + buf += k; } - return result(error_code::SUCCESS, utf16_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } + } // while + return std::make_pair(buf, utf32_output); +} -}; // struct utf8_checker -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +avx2_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_utf16 { + while (end - buf >= 16) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + if (big_endian) { + const __m256i swap = _mm256_setr_epi8( + 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, + 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); + in = _mm256_shuffle_epi8(in, swap); + } -using namespace simd; + // 1. Check if there are any surrogate word in the input chunk. + // We have also deal with situation when there is a surrogate word + // at the end of a chunk. + const __m256i surrogates_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); -template -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char16_t *utf16_output) noexcept { - // The implementation is not specific to haswell and should be moved to the - // generic directory. - size_t pos = 0; - char16_t *start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - // this loop could be unrolled further. For example, we could process the - // mask far more than 64 bytes. - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; + // bitmask = 0x0000 if there are no surrogates + // = 0xc000 if the last word is a surrogate + const uint32_t surrogates_bitmask = + static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (surrogates_bitmask == 0x00000000) { + // case: we extend all sixteen 16-bit code units to sixteen 32-bit code + // units + _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), + _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in))); + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(utf32_output + 8), + _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in, 1))); + utf32_output += 16; + buf += 16; + // surrogate pair(s) in a register } else { - // Slow path. We hope that the compiler will recognize that this is a slow - // path. Anything that is not a continuation mask is a 'leading byte', - // that is, the start of a new code point. - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - // The *start* of code points is not so useful, rather, we want the *end* - // of code points. - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times when using solely - // the slow/regular path, and at least four times if there are fast paths. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - // - // Thus we may allow convert_masked_utf8_to_utf16 to process - // more bytes at a time under a fast-path mode where 16 bytes - // are consumed at once (e.g., when encountering ASCII). - size_t consumed = convert_masked_utf8_to_utf16( - input + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. + for (; k < forward; k++) { + uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + if ((word & 0xF800) != 0xD800) { + // No surrogate pair + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = + big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + utf32_output); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } + } + buf += k; } - } - utf16_output += scalar::utf8_to_utf16::convert_valid( - input + pos, size - pos, utf16_output); - return utf16_output - start; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -// transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output); +} +/* end file src/haswell/avx2_convert_utf16_to_utf32.cpp */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_utf32 { -using namespace simd; +/* begin file src/haswell/avx2_convert_utf32_to_latin1.cpp */ +std::pair +avx2_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const size_t rounded_len = + len & ~0x1F; // Round down to nearest multiple of 32 -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, + __m256i shufmask = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, + for (size_t i = 0; i < rounded_len; i += 16) { + __m256i in1 = _mm256_loadu_si256((__m256i *)buf); + __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8)); - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, + __m256i check_combined = _mm256_or_si256(in1, in2); - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { + return std::make_pair(nullptr, latin1_output); + } - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} + // Turn UTF32 bytes into latin 1 bytes + __m256i shuffled1 = _mm256_shuffle_epi8(in1, shufmask); + __m256i shuffled2 = _mm256_shuffle_epi8(in2, shufmask); -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; + // move Latin1 bytes to their correct spot + __m256i idx1 = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, 4, 0); + __m256i idx2 = _mm256_set_epi32(-1, -1, -1, -1, 4, 0, -1, -1); + __m256i reshuffled1 = _mm256_permutevar8x32_epi32(shuffled1, idx1); + __m256i reshuffled2 = _mm256_permutevar8x32_epi32(shuffled2, idx2); - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } + __m256i result = _mm256_or_si256(reshuffled1, reshuffled2); + _mm_storeu_si128((__m128i *)latin1_output, _mm256_castsi256_si128(result)); - simdutf_really_inline size_t convert(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // we have an error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); - if (howmany == 0) { - return 0; - } - utf32_output += howmany; - } - return utf32_output - start; + latin1_output += 16; + buf += 16; } - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - if (pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf32_output += res.count; - } - } - return result(error_code::SUCCESS, utf32_output - start); - } + return std::make_pair(buf, latin1_output); +} +std::pair +avx2_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const size_t rounded_len = + len & ~0x1F; // Round down to nearest multiple of 32 - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } + __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); + __m256i shufmask = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); -}; // struct utf8_checker -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ + const char32_t *start = buf; -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_utf32 { + for (size_t i = 0; i < rounded_len; i += 16) { + __m256i in1 = _mm256_loadu_si256((__m256i *)buf); + __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8)); -using namespace simd; + __m256i check_combined = _mm256_or_si256(in1, in2); -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char32_t *utf32_output) noexcept { - size_t pos = 0; - char32_t *start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - size_t max_starting_point = (pos + 64) - 12; - while (pos < max_starting_point) { - size_t consumed = convert_masked_utf8_to_utf32( - input + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; + if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { + // Fallback to scalar code for handling errors + for (int k = 0; k < 8; k++) { + char32_t codepoint = buf[k]; + if (codepoint <= 0xFF) { + *latin1_output++ = static_cast(codepoint); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } } - } - } - utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, - utf32_output); - return utf32_output - start; -} + buf += 8; + } else { + __m256i shuffled1 = _mm256_shuffle_epi8(in1, shufmask); + __m256i shuffled2 = _mm256_shuffle_epi8(in2, shufmask); -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -// other functions -/* begin file src/generic/utf16.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf16 { + __m256i idx1 = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, 4, 0); + __m256i idx2 = _mm256_set_epi32(-1, -1, -1, -1, 4, 0, -1, -1); + __m256i reshuffled1 = _mm256_permutevar8x32_epi32(shuffled1, idx1); + __m256i reshuffled2 = _mm256_permutevar8x32_epi32(shuffled2, idx2); -template -simdutf_really_inline size_t count_code_points(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); + __m256i result = _mm256_or_si256(reshuffled1, reshuffled2); + _mm_storeu_si128((__m128i *)latin1_output, + _mm256_castsi256_si128(result)); + + latin1_output += 16; + buf += 16; } - uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); - count += count_ones(not_pair) / 2; } - return count + - scalar::utf16::count_code_points(in + pos, size - pos); + + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); } +/* end file src/haswell/avx2_convert_utf32_to_latin1.cpp */ +/* begin file src/haswell/avx2_convert_utf32_to_utf8.cpp */ +std::pair +avx2_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { + const char32_t *end = buf + len; + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); + const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); + const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); + const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); + __m256i running_max = _mm256_setzero_si256(); + __m256i forbidden_bytemask = _mm256_setzero_si256(); -template -simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); + running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin); + + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), + _mm256_and_si256(nextin, v_7fffffff)); + in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); + + // Try to apply UTF-16 => UTF-8 routine on 256 bits + // (haswell/avx2_convert_utf16_to_utf8.cpp) + + if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! } - uint64_t ascii_mask = input.lteq(0x7F); - uint64_t twobyte_mask = input.lteq(0x7FF); - uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - size_t ascii_count = count_ones(ascii_mask) / 2; - size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; - size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; - size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; - count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + - ascii_count; - } - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in_16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in_16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); -template -simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, - size_t size) { - return count_code_points(in, size); -} + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); -simdutf_really_inline void -change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { - size_t pos = 0; + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes - while (pos < size / 32 * 32) { - simd16x32 input(reinterpret_cast(in + pos)); - input.swap_bytes(); - input.store(reinterpret_cast(output)); - pos += 32; - output += 32; - } + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; - scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); -} + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); -} // namespace utf16 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf16.h */ -/* begin file src/generic/utf8.h */ + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8 { + // 6. adjust pointers + buf += 16; + continue; + } + // Must check for overflow in packing + const __m256i saturation_bytemask = _mm256_cmpeq_epi32( + _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffffffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); + forbidden_bytemask = _mm256_or_si256( + forbidden_bytemask, + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800)); -using namespace simd; + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); - } - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} -} // namespace utf8 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8.h */ -// transcoding from UTF-8 to Latin 1 -/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_latin1 { -using namespace simd; + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // For UTF-8 to Latin 1, we can allow any ASCII character, and any - // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or - // 0b11000010 and nothing else. - // - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - constexpr const uint8_t FORBIDDEN = 0xff; + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - FORBIDDEN, - // 1110____ ________ - FORBIDDEN, - // 1111____ ________ - FORBIDDEN); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); - // ____0100 ________ - FORBIDDEN, - // ____0101 ________ - FORBIDDEN, - // ____011_ ________ - FORBIDDEN, FORBIDDEN, + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec - // ____1___ ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, - // ____1101 ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); - } + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); - simdutf_really_inline size_t convert(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 16; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); - if (howmany == 0) { - return 0; + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + } else { + // case: at least one 32-bit word is larger than 0xFFFF <=> it will + // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD may require + // large, non-trivial tables? + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); } - latin1_output += howmany; - } - return latin1_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { // 2-byte + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { // 3-byte + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { // 4-byte + if (word > 0x10FFFF) { + return std::make_pair(nullptr, utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - latin1_output += res.count; } + buf += k; } - return result(error_code::SUCCESS, latin1_output - start); - } + } // while - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); + // check for invalid input + const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); + if (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi32( + _mm256_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffffffff) { + return std::make_pair(nullptr, utf8_output); } -}; // struct utf8_checker -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ - -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline size_t convert_valid(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last - // 16 bytes, and if the data is valid, then it is entirely safe because 16 - // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally - // assume that you have valid UTF-8 input, so we are going to go back from the - // end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (pos < size) { - size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, - latin1_output); - latin1_output += howmany; + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, utf8_output); } - return latin1_output - start; + + return std::make_pair(buf, utf8_output); } -} // namespace utf8_to_latin1 -} // namespace -} // namespace arm64 -} // namespace simdutf - // namespace simdutf -/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +std::pair +avx2_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_output) { + const char32_t *end = buf + len; + const char32_t *start = buf; -// placeholder scalars + const __m256i v_0000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); + const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); + const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); + const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); + const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); -// -// Implementation-specific overrides -// -namespace simdutf { -namespace arm64 { + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - // todo: reimplement as a one-pass algorithm. - int out = 0; - if (validate_utf8(input, length)) { - out |= encoding_type::UTF8; - } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; + while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); + // Check for too large input + const __m256i max_input = + _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff); + if (static_cast(_mm256_movemask_epi8( + _mm256_cmpeq_epi32(max_input, v_10ffff))) != 0xffffffff) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + utf8_output); } - } - return out; -} -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return arm64::utf8_validation::generic_validate_utf8(buf, len); -} + // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned + // saturation + __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), + _mm256_and_si256(nextin, v_7fffffff)); + in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return arm64::utf8_validation::generic_validate_utf8_with_errors(buf, len); -} + // Try to apply UTF-16 => UTF-8 routine on 256 bits + // (haswell/avx2_convert_utf16_to_utf8.cpp) -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return arm64::utf8_validation::generic_validate_ascii(buf, len); -} + if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16( + _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } + // no bits set above 7th bit + const __m256i one_byte_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); + const uint32_t one_byte_bitmask = + static_cast(_mm256_movemask_epi8(one_byte_bytemask)); -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return arm64::utf8_validation::generic_validate_ascii_with_errors(buf, len); -} + // no bits set above 11th bit + const __m256i one_or_two_bytes_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); + const uint32_t one_or_two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); + if (one_or_two_bytes_bitmask == 0xffffffff) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); + const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const char16_t *tail = arm_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, - len - (tail - buf)); - } else { - return false; - } -} + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = _mm256_slli_epi16(in_16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = _mm256_and_si256(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = _mm256_and_si256(in_16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = _mm256_or_si256(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = _mm256_or_si256(t3, v_c080); -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const char16_t *tail = arm_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, len - (tail - buf)); - } else { - return false; - } -} + // 2. merge ASCII and 2-byte codewords + const __m256i utf8_unpacked = + _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - result res = arm_validate_utf16_with_errors(buf, len); - if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} + // 3. prepare bitmask for 8-bit lookup + const uint32_t M0 = one_byte_bitmask & 0x55555555; + const uint32_t M1 = M0 >> 7; + const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // 4. pack the bytes -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - result res = arm_validate_utf16_with_errors(buf, len); - if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; + const uint8_t *row_2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> + 16)][0]; -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const char32_t *tail = arm_validate_utf32le(buf, len); - if (tail) { - return scalar::utf32::validate(tail, len - (tail - buf)); - } else { - return false; - } -} + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - result res = arm_validate_utf32le_with_errors(buf, len); - if (res.count != len) { - result scalar_res = - scalar::utf32::validate_with_errors(buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} + const __m256i utf8_packed = _mm256_shuffle_epi8( + utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_castsi256_si128(utf8_packed)); + utf8_output += row[0]; + _mm_storeu_si128((__m128i *)utf8_output, + _mm256_extractf128_si256(utf8_packed, 1)); + utf8_output += row_2[0]; -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - arm_convert_latin1_to_utf8(buf, len, utf8_output); - size_t converted_chars = ret.second - utf8_output; + // 6. adjust pointers + buf += 16; + continue; + } + // Must check for overflow in packing + const __m256i saturation_bytemask = _mm256_cmpeq_epi32( + _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffffffff) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} + // Check for illegal surrogate code units + const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); + const __m256i forbidden_bytemask = + _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800); + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != + 0x0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + utf8_output); + } -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - arm_convert_latin1_to_utf16(buf, len, utf16_output); - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} + const __m256i dup_even = _mm256_setr_epi16( + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, + 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - arm_convert_latin1_to_utf16(buf, len, utf16_output); - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - arm_convert_latin1_to_utf32(buf, len, utf32_output); - size_t converted_chars = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert(buf, len, latin1_output); -} + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert_with_errors(buf, len, latin1_output); -} + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return arm64::utf8_to_latin1::convert_valid(buf, len, latin1_output); -} + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ +#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} + // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] + const __m256i s0 = _mm256_srli_epi16(in_16, 4); + // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] + const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] + const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); + const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m256i s4 = _mm256_xor_si256(s3, m0); +#undef simdutf_vec -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} + // 4. expand code units 16-bit => 32-bit + const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); + const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, - utf16_output); -} + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + const uint32_t mask = (one_byte_bitmask & 0x55555555) | + (one_or_two_bytes_bitmask & 0xaaaaaaaa); + // Due to the wider registers, the following path is less likely to be + // useful. + /*if(mask == 0) { + // We only have three-byte code units. Use fast path. + const __m256i shuffle = + _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, + 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = + _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = + _mm256_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; + _mm_storeu_si128((__m128i*)utf8_output, + _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + continue; + }*/ + const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); + const __m128i utf8_0 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf16_output); -} + const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); + const __m128i utf8_1 = + _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} + const uint8_t mask2 = static_cast(mask >> 16); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; + const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); + const __m128i utf8_2 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} + const uint8_t mask3 = static_cast(mask >> 24); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; + const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); + const __m128i utf8_3 = + _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert(buf, len, utf32_output); -} + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); + utf8_output += row0[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + utf8_output += row1[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_2); + utf8_output += row2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_3); + utf8_output += row3[0]; + buf += 16; + } else { + // case: at least one 32-bit word is larger than 0xFFFF <=> it will + // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD may require + // large, non-trivial tables? + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { // 2-byte + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { // 3-byte + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { // 4-byte + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), utf8_output); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf32_output); + return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); } +/* end file src/haswell/avx2_convert_utf32_to_utf8.cpp */ +/* begin file src/haswell/avx2_convert_utf32_to_utf16.cpp */ +template +std::pair +avx2_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const char32_t *end = buf + len; -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *input, size_t size, char32_t *utf32_output) const noexcept { - return utf8_to_utf32::convert_valid(input, size, utf32_output); -} + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + __m256i forbidden_bytemask = _mm256_setzero_si256(); -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} + const __m256i v_00000000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; + // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs + const __m256i saturation_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} + if (saturation_bitmask == 0xffffffff) { + const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + forbidden_bytemask = _mm256_or_si256( + forbidden_bytemask, + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800)); -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; + __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), + _mm256_extractf128_si256(in, 1)); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); + } + _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); + utf16_output += 8; + buf += 8; } else { - ret.second += scalar_res.count; + size_t forward = 7; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, utf16_output); + } + *utf16_output++ = + big_endian + ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, utf16_output); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (big_endian) { + high_surrogate = + uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); + low_surrogate = + uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; } } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } + // check for invalid input + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + return std::make_pair(nullptr, utf16_output); } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf16be_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf16le_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; + return std::make_pair(buf, utf16_output); } -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} +template +std::pair +avx2_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} + while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { + __m256i in = _mm256_loadu_si256((__m256i *)buf); -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16le_to_utf8(buf, len, utf8_output); -} + const __m256i v_00000000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16be_to_utf8(buf, len, utf8_output); -} + // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs + const __m256i saturation_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); + const uint32_t saturation_bitmask = + static_cast(_mm256_movemask_epi8(saturation_bytemask)); -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - if (simdutf_unlikely(len == 0)) { - return 0; - } - std::pair ret = - arm_convert_utf32_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} + if (saturation_bitmask == 0xffffffff) { + const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); + const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); + const __m256i forbidden_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800); + if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != + 0x0) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + utf16_output); + } -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; + __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), + _mm256_extractf128_si256(in, 1)); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); + } + _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); + utf16_output += 8; + buf += 8; } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; + size_t forward = 7; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), utf16_output); + } + *utf16_output++ = + big_endian + ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), utf16_output); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (big_endian) { + high_surrogate = + uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); + low_surrogate = + uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; } - saved_bytes += scalar_saved_bytes; } - return saved_bytes; + + return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); } +/* end file src/haswell/avx2_convert_utf32_to_utf16.cpp */ -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; +/* begin file src/haswell/avx2_convert_utf8_to_latin1.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" + +// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + const __m128i in = _mm_loadu_si128((__m128i *)input); + + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & + 0xfff; // we are only processing 12 bytes in case it is not all ASCII + + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in); + latin1_output += 12; // We wrote 12 characters. + return 12; // We consumed 1 bytes. } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; + /// We do not have a fast path available, so we fallback. + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; } - return saved_bytes; + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small lookup + // table. + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + const __m128i latin1_packed = _mm_packus_epi16(composed, composed); + // writing 8 bytes even though we only care about the first 6 bytes. + // performance note: it would be faster to use _mm_storeu_si128, we should + // investigate. + _mm_storel_epi64((__m128i *)latin1_output, latin1_packed); + latin1_output += 6; // We wrote 6 bytes. + return consumed; } +/* end file src/haswell/avx2_convert_utf8_to_latin1.cpp */ -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} +/* begin file src/haswell/avx2_base64.cpp */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } +template +simdutf_really_inline __m256i lookup_pshufb_improved(const __m256i input) { + // credit: Wojciech Muła + __m256i result = _mm256_subs_epu8(input, _mm256_set1_epi8(51)); + const __m256i less = _mm256_cmpgt_epi8(_mm256_set1_epi8(26), input); + result = + _mm256_or_si256(result, _mm256_and_si256(less, _mm256_set1_epi8(13))); + __m256i shift_LUT; + if (base64_url) { + shift_LUT = _mm256_setr_epi8( + 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0, + + 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0); + } else { + shift_LUT = _mm256_setr_epi8( + 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0, + + 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; + + result = _mm256_shuffle_epi8(shift_LUT, result); + return _mm256_add_epi8(result, input); } -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + // credit: Wojciech Muła + const uint8_t *input = (const uint8_t *)src; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; + uint8_t *out = (uint8_t *)dst; + const __m256i shuf = + _mm256_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1, + + 10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1); + size_t i = 0; + for (; i + 100 <= srclen; i += 96) { + const __m128i lo0 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 0)); + const __m128i hi0 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 1)); + const __m128i lo1 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 2)); + const __m128i hi1 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 3)); + const __m128i lo2 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 4)); + const __m128i hi2 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 5)); + const __m128i lo3 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 6)); + const __m128i hi3 = _mm_loadu_si128( + reinterpret_cast(input + i + 4 * 3 * 7)); + + __m256i in0 = _mm256_shuffle_epi8(_mm256_set_m128i(hi0, lo0), shuf); + __m256i in1 = _mm256_shuffle_epi8(_mm256_set_m128i(hi1, lo1), shuf); + __m256i in2 = _mm256_shuffle_epi8(_mm256_set_m128i(hi2, lo2), shuf); + __m256i in3 = _mm256_shuffle_epi8(_mm256_set_m128i(hi3, lo3), shuf); + + const __m256i t0_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x0fc0fc00)); + const __m256i t0_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x0fc0fc00)); + const __m256i t0_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x0fc0fc00)); + const __m256i t0_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x0fc0fc00)); + + const __m256i t1_0 = + _mm256_mulhi_epu16(t0_0, _mm256_set1_epi32(0x04000040)); + const __m256i t1_1 = + _mm256_mulhi_epu16(t0_1, _mm256_set1_epi32(0x04000040)); + const __m256i t1_2 = + _mm256_mulhi_epu16(t0_2, _mm256_set1_epi32(0x04000040)); + const __m256i t1_3 = + _mm256_mulhi_epu16(t0_3, _mm256_set1_epi32(0x04000040)); + + const __m256i t2_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x003f03f0)); + const __m256i t2_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x003f03f0)); + const __m256i t2_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x003f03f0)); + const __m256i t2_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x003f03f0)); + + const __m256i t3_0 = + _mm256_mullo_epi16(t2_0, _mm256_set1_epi32(0x01000010)); + const __m256i t3_1 = + _mm256_mullo_epi16(t2_1, _mm256_set1_epi32(0x01000010)); + const __m256i t3_2 = + _mm256_mullo_epi16(t2_2, _mm256_set1_epi32(0x01000010)); + const __m256i t3_3 = + _mm256_mullo_epi16(t2_3, _mm256_set1_epi32(0x01000010)); + + const __m256i input0 = _mm256_or_si256(t1_0, t3_0); + const __m256i input1 = _mm256_or_si256(t1_1, t3_1); + const __m256i input2 = _mm256_or_si256(t1_2, t3_2); + const __m256i input3 = _mm256_or_si256(t1_3, t3_3); + + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), + lookup_pshufb_improved(input0)); + out += 32; + + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), + lookup_pshufb_improved(input1)); + out += 32; + + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), + lookup_pshufb_improved(input2)); + out += 32; + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), + lookup_pshufb_improved(input3)); + out += 32; } - return saved_bytes; -} + for (; i + 28 <= srclen; i += 24) { + // lo = [xxxx|DDDC|CCBB|BAAA] + // hi = [xxxx|HHHG|GGFF|FEEE] + const __m128i lo = + _mm_loadu_si128(reinterpret_cast(input + i)); + const __m128i hi = + _mm_loadu_si128(reinterpret_cast(input + i + 4 * 3)); -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = scalar::utf32_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } + // bytes from groups A, B and C are needed in separate 32-bit lanes + // in = [0HHH|0GGG|0FFF|0EEE[0DDD|0CCC|0BBB|0AAA] + __m256i in = _mm256_shuffle_epi8(_mm256_set_m128i(hi, lo), shuf); + + // this part is well commented in encode.sse.cpp + + const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00)); + const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); + const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0)); + const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); + const __m256i indices = _mm256_or_si256(t1, t3); + + _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), + lookup_pshufb_improved(indices)); + out += 32; } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; + return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, + srclen - i, options); } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; +static inline void compress(__m128i data, uint16_t mask, char *output) { + if (mask == 0) { + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data); + return; } - size_t saved_bytes = ret.second - latin1_output; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( - ret.first, len - (ret.first - buf), ret.second); - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} + __m128i shufmask = _mm_set_epi64x(tables::base64::thintable_epi8[mask2], + tables::base64::thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(data, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = _mm_loadu_si128(reinterpret_cast( + tables::base64::pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf32_to_utf8(buf, len, utf8_output); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); } -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; +static inline void compress(__m256i data, uint32_t mask, char *output) { + if (mask == 0) { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), data); + return; } - return saved_bytes; + compress(_mm256_castsi256_si128(data), uint16_t(mask), output); + compress(_mm256_extracti128_si256(data, 1), uint16_t(mask >> 16), + output + _mm_popcnt_u32(~mask & 0xFFFF)); } -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; +struct block64 { + __m256i chunks[2]; +}; + +template +static inline uint32_t to_base64_mask(__m256i *src, uint32_t *error) { + const __m256i ascii_space_tbl = + _mm256_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, + 0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0); + // credit: aqrit + __m256i delta_asso; + if (base64_url) { + delta_asso = + _mm256_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0, + 0x0, 0x0, 0xF, 0x0, 0xF, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF); + } else { + delta_asso = _mm256_setr_epi8( + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x0F, 0x00, 0x0F, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); } - return saved_bytes; -} -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } + __m256i delta_values; + if (base64_url) { + delta_values = _mm256_setr_epi8( + 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9), + uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), uint8_t(0xE0), + uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), + uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), + uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); + } else { + delta_values = _mm256_setr_epi8( + int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), int8_t(0x04), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), int8_t(0x00), + int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), + int8_t(0x00), int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), + int8_t(0xB9), int8_t(0xB9)); } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} + __m256i check_asso; -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } + if (base64_url) { + check_asso = + _mm256_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x3, + 0x7, 0xB, 0xE, 0xB, 0x6, 0xD, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x1, 0x1, 0x1, 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6); + } else { + + check_asso = _mm256_setr_epi8( + 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, + 0x0B, 0x0B, 0x0B, 0x0F, 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; + __m256i check_values; + if (base64_url) { + check_values = _mm256_setr_epi8( + uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), uint8_t(0xA6), + uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, uint8_t(0x80), + 0x0, uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), + uint8_t(0xA6), uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, + uint8_t(0x80), 0x0, uint8_t(0x80)); + } else { + check_values = _mm256_setr_epi8( + int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0xCF), + int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), int8_t(0x86), + int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), int8_t(0x91), + int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), + int8_t(0x86), int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), + int8_t(0x91), int8_t(0x80)); + } + const __m256i shifted = _mm256_srli_epi32(*src, 3); + const __m256i delta_hash = + _mm256_avg_epu8(_mm256_shuffle_epi8(delta_asso, *src), shifted); + const __m256i check_hash = + _mm256_avg_epu8(_mm256_shuffle_epi8(check_asso, *src), shifted); + const __m256i out = + _mm256_adds_epi8(_mm256_shuffle_epi8(delta_values, delta_hash), *src); + const __m256i chk = + _mm256_adds_epi8(_mm256_shuffle_epi8(check_values, check_hash), *src); + const int mask = _mm256_movemask_epi8(chk); + if (mask) { + __m256i ascii_space = + _mm256_cmpeq_epi8(_mm256_shuffle_epi8(ascii_space_tbl, *src), *src); + *error = (mask ^ _mm256_movemask_epi8(ascii_space)); + } + *src = out; + return (uint32_t)mask; } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16le(buf, len, utf16_output); +template +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { + uint32_t err0 = 0; + uint32_t err1 = 0; + uint64_t m0 = to_base64_mask(&b->chunks[0], &err0); + uint64_t m1 = to_base64_mask(&b->chunks[1], &err1); + *error = err0 | ((uint64_t)err1 << 32); + return m0 | (m1 << 32); } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16be(buf, len, utf16_output); +static inline void copy_block(block64 *b, char *output) { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), b->chunks[0]); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + 32), b->chunks[1]); } -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16le_to_utf32(buf, len, utf32_output); +static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { + uint64_t nmask = ~mask; + compress(b->chunks[0], uint32_t(mask), output); + compress(b->chunks[1], uint32_t(mask >> 32), + output + _mm_popcnt_u64(nmask & 0xFFFFFFFF)); + return _mm_popcnt_u64(nmask); } -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16be_to_utf32(buf, len, utf32_output); +// The caller of this function is responsible to ensure that there are 64 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char *src) { + b->chunks[0] = _mm256_loadu_si256(reinterpret_cast(src)); + b->chunks[1] = + _mm256_loadu_si256(reinterpret_cast(src + 32)); } -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - utf16::change_endianness_utf16(input, length, output); +// The caller of this function is responsible to ensure that there are 128 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char16_t *src) { + __m256i m1 = _mm256_loadu_si256(reinterpret_cast(src)); + __m256i m2 = _mm256_loadu_si256(reinterpret_cast(src + 16)); + __m256i m3 = _mm256_loadu_si256(reinterpret_cast(src + 32)); + __m256i m4 = _mm256_loadu_si256(reinterpret_cast(src + 48)); + __m256i m1p = _mm256_permute2x128_si256(m1, m2, 0x20); + __m256i m2p = _mm256_permute2x128_si256(m1, m2, 0x31); + __m256i m3p = _mm256_permute2x128_si256(m3, m4, 0x20); + __m256i m4p = _mm256_permute2x128_si256(m3, m4, 0x31); + b->chunks[0] = _mm256_packus_epi16(m1p, m2p); + b->chunks[1] = _mm256_packus_epi16(m3p, m4p); } -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} +static inline void base64_decode(char *out, __m256i str) { + // credit: aqrit + const __m256i pack_shuffle = + _mm256_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); + const __m256i t0 = _mm256_maddubs_epi16(str, _mm256_set1_epi32(0x01400140)); + const __m256i t1 = _mm256_madd_epi16(t0, _mm256_set1_epi32(0x00011000)); + const __m256i t2 = _mm256_shuffle_epi8(t1, pack_shuffle); -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); + // Store the output: + _mm_storeu_si128((__m128i *)out, _mm256_castsi256_si128(t2)); + _mm_storeu_si128((__m128i *)(out + 12), _mm256_extracti128_si256(t2, 1)); } - -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); +// decode 64 bytes and output 48 bytes +static inline void base64_decode_block(char *out, const char *src) { + base64_decode(out, + _mm256_loadu_si256(reinterpret_cast(src))); + base64_decode(out + 24, _mm256_loadu_si256( + reinterpret_cast(src + 32))); } - -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return count_utf8(buf, len); +static inline void base64_decode_block_safe(char *out, const char *src) { + base64_decode(out, + _mm256_loadu_si256(reinterpret_cast(src))); + char buffer[32]; // We enforce safety with a buffer. + base64_decode( + buffer, _mm256_loadu_si256(reinterpret_cast(src + 32))); + std::memcpy(out + 24, buffer, 24); } - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return scalar::utf16::latin1_length_from_utf16(length); +static inline void base64_decode_block(char *out, block64 *b) { + base64_decode(out, b->chunks[0]); + base64_decode(out + 24, b->chunks[1]); } - -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return scalar::utf32::latin1_length_from_utf32(length); +static inline void base64_decode_block_safe(char *out, block64 *b) { + base64_decode(out, b->chunks[0]); + char buffer[32]; // We enforce safety with a buffer. + base64_decode(buffer, b->chunks[1]); + std::memcpy(out + 24, buffer, 24); } -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t length) const noexcept { - // See - // https://lemire.me/blog/2023/05/15/computing-the-utf-8-size-of-a-latin-1-string-quickly-arm-neon-edition/ - // credit to Pete Cawley - const uint8_t *data = reinterpret_cast(input); - uint64_t result = 0; - const int lanes = sizeof(uint8x16_t); - uint8_t rem = length % lanes; - const uint8_t *simd_end = data + (length / lanes) * lanes; - const uint8x16_t threshold = vdupq_n_u8(0x80); - for (; data < simd_end; data += lanes) { - // load 16 bytes - uint8x16_t input_vec = vld1q_u8(data); - // compare to threshold (0x80) - uint8x16_t withhighbit = vcgeq_u8(input_vec, threshold); - // vertical addition - result -= vaddvq_s8(vreinterpretq_s8_u8(withhighbit)); +template +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + size_t equallocation = + srclen; // location of the first padding character if any + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; } - return result + (length / lanes) * lanes + - scalar::latin1::utf8_length_from_latin1((const char *)simd_end, rem); -} + size_t equalsigns = 0; + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 1; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 2; + } + } + if (srclen == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + char *end_of_safe_64byte_zone = + (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16(input, length); -} + const chartype *const srcinit = src; + const char *const dstinit = dst; + const chartype *const srcend = src + srclen; -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16(input, length); -} + constexpr size_t block_size = 6; + static_assert(block_size >= 2, "block_size must be at least two"); + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const chartype *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b; + load_block(&b, src); + src += 64; + uint64_t error = 0; + uint64_t badcharmask = to_base64_mask(&b, &error); + if (error) { + src -= 64; + size_t error_offset = _tzcnt_u64(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; + } + if (badcharmask != 0) { + // optimization opportunity: check for simple masks like those made of + // continuous 1s followed by continuous 0s. And masks containing a + // single bad character. + bufferptr += compress_block(&b, badcharmask, bufferptr); + } else if (bufferptr != buffer) { + copy_block(&b, bufferptr); + bufferptr += 64; + } else { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, &b); + } else { + base64_decode_block(dst, &b); + } + dst += 48; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 2); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); + } else { + base64_decode_block(dst, buffer + (block_size - 2) * 64); + } + dst += 48; + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); -} + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); -} + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if (!scalar::base64::is_eight_byte(*src) || val > 64) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + if (dst >= end_of_safe_64byte_zone) { + base64_decode_block_safe(dst, buffer_start); + } else { + base64_decode_block(dst, buffer_start); + } + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + triple = scalar::utf32::swap_bytes(triple); + std::memcpy(dst, &triple, 4); -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + triple = scalar::utf32::swap_bytes(triple); + std::memcpy(dst, &triple, 3); -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::utf16_length_from_utf8(input, length); + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r.input_count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + return r; + } else { + r.output_count += size_t(dst - dstinit); + } + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + r.error = error_code::INVALID_BASE64_CHARACTER; + r.input_count = equallocation; + } + } + return r; + } + if (equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; } +/* end file src/haswell/avx2_base64.cpp */ -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const uint32x4_t v_7f = vmovq_n_u32((uint32_t)0x7f); - const uint32x4_t v_7ff = vmovq_n_u32((uint32_t)0x7ff); - const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff); - const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1); - size_t pos = 0; - size_t count = 0; - for (; pos + 4 <= length; pos += 4) { - uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos)); - const uint32x4_t ascii_bytes_bytemask = vcleq_u32(in, v_7f); - const uint32x4_t one_two_bytes_bytemask = vcleq_u32(in, v_7ff); - const uint32x4_t two_bytes_bytemask = - veorq_u32(one_two_bytes_bytemask, ascii_bytes_bytemask); - const uint32x4_t three_bytes_bytemask = - veorq_u32(vcleq_u32(in, v_ffff), one_two_bytes_bytemask); +} // unnamed namespace +} // namespace haswell +} // namespace simdutf - const uint16x8_t reduced_ascii_bytes_bytemask = - vreinterpretq_u16_u32(vandq_u32(ascii_bytes_bytemask, v_1)); - const uint16x8_t reduced_two_bytes_bytemask = - vreinterpretq_u16_u32(vandq_u32(two_bytes_bytemask, v_1)); - const uint16x8_t reduced_three_bytes_bytemask = - vreinterpretq_u16_u32(vandq_u32(three_bytes_bytemask, v_1)); +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace haswell { +namespace { - const uint16x8_t compressed_bytemask0 = - vpaddq_u16(reduced_ascii_bytes_bytemask, reduced_two_bytes_bytemask); - const uint16x8_t compressed_bytemask1 = - vpaddq_u16(reduced_three_bytes_bytemask, reduced_three_bytes_bytemask); +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); - size_t ascii_count = count_ones( - vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 0)); - size_t two_bytes_count = count_ones( - vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 1)); - size_t three_bytes_count = count_ones( - vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask1), 0)); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; - count += 16 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); } - return count + - scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); + buf[sizeof(simd8x64)] = '\0'; + return buf; } -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff); - const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1); - size_t pos = 0; - size_t count = 0; - for (; pos + 4 <= length; pos += 4) { - uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos)); - const uint32x4_t surrogate_bytemask = vcgtq_u32(in, v_ffff); - const uint16x8_t reduced_bytemask = - vreinterpretq_u16_u32(vandq_u32(surrogate_bytemask, v_1)); - const uint16x8_t compressed_bytemask = - vpaddq_u16(reduced_bytemask, reduced_bytemask); - size_t surrogate_count = count_ones( - vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask), 0)); - count += 4 + surrogate_count; +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; + } } - return count + - scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} - -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); + buf[sizeof(simd8x64)] = '\0'; + return buf; } -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - return (options & base64_url) - ? compress_decode_base64(output, input, length, options, - last_chunk_options) - : compress_decode_base64(output, input, length, options, - last_chunk_options); +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; } -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - return (options & base64_url) - ? compress_decode_base64(output, input, length, options, - last_chunk_options) - : compress_decode_base64(output, input, length, options, - last_chunk_options); -} +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; } -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - return (options & base64_url) - ? compress_decode_base64(output, input, length, options, - last_chunk_options) - : compress_decode_base64(output, input, length, options, - last_chunk_options); +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; } -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - return (options & base64_url) - ? compress_decode_base64(output, input, length, options, - last_chunk_options) - : compress_decode_base64(output, input, length, options, - last_chunk_options); +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; } -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - return encode_base64(output, input, length, options); +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; } -} // namespace arm64 +} // unnamed namespace +} // namespace haswell } // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_validation { -/* begin file src/simdutf/arm64/end.h */ -/* end file src/simdutf/arm64/end.h */ -/* end file src/arm64/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_FALLBACK -/* begin file src/fallback/implementation.cpp */ -/* begin file src/simdutf/fallback/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "fallback" -// #define SIMDUTF_IMPLEMENTATION fallback -/* end file src/simdutf/fallback/begin.h */ - - - - - +using namespace simd; +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, -#include -#include + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, -namespace simdutf { -namespace fallback { + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - // todo: reimplement as a one-pass algorithm. - int out = 0; - if (validate_utf8(input, length)) { - out |= encoding_type::UTF8; - } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } - } - return out; -} + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return scalar::utf8::validate(buf, len); + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); } - -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return scalar::utf8::validate_with_errors(buf, len); +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; } -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return scalar::ascii::validate(buf, len); +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); } -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return scalar::ascii::validate_with_errors(buf, len); -} +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - return scalar::utf16::validate(buf, len); -} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - return scalar::utf16::validate(buf, len); -} + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; + } -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate_with_errors(buf, len); -} + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; + } + } -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate_with_errors(buf, len); -} + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - return scalar::utf32::validate(buf, len); -} +}; // struct utf8_checker +} // namespace utf8_validation -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - return scalar::utf32::validate_with_errors(buf, len); -} +using utf8_validation::utf8_checker; -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - return scalar::latin1_to_utf8::convert(buf, len, utf8_output); -} +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_validation { -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::latin1_to_utf16::convert(buf, len, - utf16_output); +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); } -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::latin1_to_utf16::convert(buf, len, - utf16_output); +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); } -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::latin1_to_utf32::convert(buf, len, utf32_output); +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; + } + reader.advance(); + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); + } } -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf8_to_latin1::convert(buf, len, latin1_output); +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); } -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf8_to_latin1::convert_with_errors(buf, len, latin1_output); +template +bool generic_validate_ascii(const uint8_t *input, size_t length) { + buf_block_reader<64> reader(input, length); + uint8_t blocks[64]{}; + simd::simd8x64 running_or(blocks); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + running_or |= in; + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + running_or |= in; + return running_or.is_ascii(); } -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf8_to_latin1::convert_valid(buf, len, latin1_output); +bool generic_validate_ascii(const char *input, size_t length) { + return generic_validate_ascii( + reinterpret_cast(input), length); } -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert(buf, len, - utf16_output); -} +template +result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert(buf, len, - utf16_output); + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } } -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert_with_errors( - buf, len, utf16_output); +result generic_validate_ascii_with_errors(const char *input, size_t length) { + return generic_validate_ascii_with_errors( + reinterpret_cast(input), length); } -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert_with_errors( - buf, len, utf16_output); -} +} // namespace utf8_validation +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +// transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert_valid(buf, len, - utf16_output); -} +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_utf16 { -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert_valid(buf, len, - utf16_output); -} +using namespace simd; -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf8_to_utf32::convert(buf, len, utf32_output); +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; } -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf8_to_utf32::convert_with_errors(buf, len, utf32_output); -} +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *input, size_t size, char32_t *utf32_output) const noexcept { - return scalar::utf8_to_utf32::convert_valid(input, size, utf32_output); -} +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_utf16 { +using namespace simd; -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert(buf, len, - latin1_output); -} +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert(buf, len, - latin1_output); -} + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert_with_errors( - buf, len, latin1_output); -} + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert_with_errors( - buf, len, latin1_output); -} + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert_valid( - buf, len, latin1_output); -} + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert_valid(buf, len, - latin1_output); + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); } - -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert(buf, len, - utf8_output); +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; } -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert(buf, len, utf8_output); -} +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_with_errors( - buf, len, utf8_output); -} + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_with_errors( - buf, len, utf8_output); -} + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; + } + utf16_output += howmany; + } + return utf16_output - start; + } -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_valid(buf, len, - utf8_output); -} + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; + } + } + return result(error_code::SUCCESS, utf16_output - start); + } -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_valid(buf, len, - utf8_output); -} + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf32_to_latin1::convert(buf, len, latin1_output); -} +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +// transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf32_to_latin1::convert_with_errors(buf, len, latin1_output); -} +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_utf32 { -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf32_to_latin1::convert_valid(buf, len, latin1_output); -} +using namespace simd; -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert(buf, len, utf8_output); +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + } + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; } -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output); -} +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output); -} +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_utf32 { +using namespace simd; -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert(buf, len, - utf16_output); -} +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert(buf, len, - utf16_output); -} + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_with_errors( - buf, len, utf16_output); -} + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_with_errors( - buf, len, utf16_output); -} + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_valid( - buf, len, utf16_output); -} + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_valid(buf, len, - utf16_output); + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); } - -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert(buf, len, - utf32_output); +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; } -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert(buf, len, - utf32_output); -} +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_with_errors( - buf, len, utf32_output); -} + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_with_errors( - buf, len, utf32_output); -} + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; + } + return utf32_output - start; + } -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_valid( - buf, len, utf32_output); -} + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; + } + } + return result(error_code::SUCCESS, utf32_output - start); + } -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_valid(buf, len, - utf32_output); -} + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - scalar::utf16::change_endianness_utf16(input, length, output); -} +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +// other functions +/* begin file src/generic/utf8.h */ -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::count_code_points(input, length); -} +namespace simdutf { +namespace haswell { +namespace { +namespace utf8 { -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::count_code_points(input, length); -} +using namespace simd; -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - return scalar::utf8::count_code_points(input, length); +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); + } + return count + scalar::utf8::count_code_points(in + pos, size - pos); } -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return scalar::utf8::count_code_points(buf, len); +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); } +} // namespace utf8 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8.h */ +/* begin file src/generic/utf16.h */ +namespace simdutf { +namespace haswell { +namespace { +namespace utf16 { -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return scalar::utf16::latin1_length_from_utf16(length); +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); + count += count_ones(not_pair) / 2; + } + return count + + scalar::utf16::count_code_points(in + pos, size - pos); } -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return length; -} +template +simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t ascii_mask = input.lteq(0x7F); + uint64_t twobyte_mask = input.lteq(0x7FF); + uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t length) const noexcept { - size_t answer = length; - size_t i = 0; - auto pop = [](uint64_t v) { - return (size_t)(((v >> 7) & UINT64_C(0x0101010101010101)) * - UINT64_C(0x0101010101010101) >> - 56); - }; - for (; i + 32 <= length; i += 32) { - uint64_t v; - memcpy(&v, input + i, 8); - answer += pop(v); - memcpy(&v, input + i + 8, sizeof(v)); - answer += pop(v); - memcpy(&v, input + i + 16, sizeof(v)); - answer += pop(v); - memcpy(&v, input + i + 24, sizeof(v)); - answer += pop(v); - } - for (; i + 8 <= length; i += 8) { - uint64_t v; - memcpy(&v, input + i, sizeof(v)); - answer += pop(v); - } - for (; i + 1 <= length; i += 1) { - answer += static_cast(input[i]) >> 7; + size_t ascii_count = count_ones(ascii_mask) / 2; + size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; + size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; + size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; + count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + + ascii_count; } - return answer; + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); } -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf8_length_from_utf16(input, - length); +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); } -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf8_length_from_utf16(input, length); -} +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf32_length_from_utf16(input, - length); -} + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf32_length_from_utf16(input, length); + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); } -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); -} +} // namespace utf16 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf16.h */ -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return scalar::utf8::utf16_length_from_utf8(input, length); -} +// transcoding from UTF-8 to Latin 1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return scalar::utf32::utf8_length_from_utf32(input, length); -} +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_latin1 { +using namespace simd; -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return scalar::utf32::utf16_length_from_utf32(input, length); -} +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); -} + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return scalar::utf8::count_code_points(input, length); -} + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); } -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; + + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; } + return latin1_output - start; } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; + + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); } - return {SUCCESS, 0}; + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; + + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } + +}; // struct utf8_checker +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace haswell +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ + +namespace simdutf { +namespace haswell { +namespace { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. } } - return r; + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; } -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; +} // namespace utf8_to_latin1 +} // namespace +} // namespace haswell +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ + +namespace simdutf { +namespace haswell { + +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } + int out = 0; + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation, 0}; + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; } - return {SUCCESS, 0, 0}; } - full_result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; } } - return r; + return out; } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return haswell::utf8_validation::generic_validate_utf8(buf, len); } -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - } - return r; +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return haswell::utf8_validation::generic_validate_utf8_with_errors(buf, len); } -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return haswell::utf8_validation::generic_validate_ascii(buf, len); +} + +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return haswell::utf8_validation::generic_validate_ascii_with_errors(buf, len); +} + +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid UTF-16. protect the implementation from + // handling nullptr + return true; } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } + const char16_t *tail = avx2_validate_utf16(buf, len); + if (tail) { + return scalar::utf16::validate(tail, + len - (tail - buf)); + } else { + return false; } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; +} + +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid UTF-16. protect the implementation from + // handling nullptr + return true; } - full_result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; - } + const char16_t *tail = avx2_validate_utf16(buf, len); + if (tail) { + return scalar::utf16::validate(tail, len - (tail - buf)); + } else { + return false; } - return r; } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + result res = avx2_validate_utf16_with_errors(buf, len); + if (res.count != len) { + result scalar_res = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } } -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - return scalar::base64::tail_encode_base64(output, input, length, options); +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + result res = avx2_validate_utf16_with_errors(buf, len); + if (res.count != len) { + result scalar_res = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } } -} // namespace fallback -} // namespace simdutf - -/* begin file src/simdutf/fallback/end.h */ -/* end file src/simdutf/fallback/end.h */ -/* end file src/fallback/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_ICELAKE -/* begin file src/icelake/implementation.cpp */ +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return true; + } + const char32_t *tail = avx2_validate_utf32le(buf, len); + if (tail) { + return scalar::utf32::validate(tail, len - (tail - buf)); + } else { + return false; + } +} -/* begin file src/simdutf/icelake/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "icelake" -// #define SIMDUTF_IMPLEMENTATION icelake - -#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE -// nothing needed. -#else -SIMDUTF_TARGET_ICELAKE -#endif +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid UTF-32. protect the implementation from + // handling nullptr + return result(error_code::SUCCESS, 0); + } + result res = avx2_validate_utf32le_with_errors(buf, len); + if (res.count != len) { + result scalar_res = + scalar::utf32::validate_with_errors(buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -// clang-format off -SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) -// clang-format on -#endif // end of workaround -/* end file src/simdutf/icelake/begin.h */ -namespace simdutf { -namespace icelake { -namespace { -#ifndef SIMDUTF_ICELAKE_H - #error "icelake.h must be included" -#endif -/* begin file src/icelake/icelake_utf8_common.inl.cpp */ -// Common procedures for both validating and non-validating conversions from -// UTF-8. -enum block_processing_mode { SIMDUTF_FULL, SIMDUTF_TAIL }; +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + avx2_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; -using utf8_to_utf16_result = std::pair; -using utf8_to_utf32_result = std::pair; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } -/* - process_block_utf8_to_utf16 converts up to 64 bytes from 'in' from UTF-8 - to UTF-16. When tail = SIMDUTF_FULL, then the full input buffer (64 bytes) - might be used. When tail = SIMDUTF_TAIL, we take into account 'gap' which - indicates how many input bytes are relevant. + return converted_chars; +} - Returns true when the result is correct, otherwise it returns false. +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx2_convert_latin1_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; + } + converted_chars += scalar_converted_chars; + } + return converted_chars; +} - The provided in and out pointers are advanced according to how many input - bytes have been processed, upon success. -*/ -template -simdutf_really_inline bool -process_block_utf8_to_utf16(const char *&in, char16_t *&out, size_t gap) { - // constants - __m512i mask_identity = _mm512_set_epi8( - 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, - 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, - 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, - 8, 7, 6, 5, 4, 3, 2, 1, 0); - __m512i mask_c0c0c0c0 = _mm512_set1_epi32(0xc0c0c0c0); - __m512i mask_80808080 = _mm512_set1_epi32(0x80808080); - __m512i mask_f0f0f0f0 = _mm512_set1_epi32(0xf0f0f0f0); - __m512i mask_dfdfdfdf_tail = _mm512_set_epi64( - 0xffffdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, - 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, - 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf); - __m512i mask_c2c2c2c2 = _mm512_set1_epi32(0xc2c2c2c2); - __m512i mask_ffffffff = _mm512_set1_epi32(0xffffffff); - __m512i mask_d7c0d7c0 = _mm512_set1_epi32(0xd7c0d7c0); - __m512i mask_dc00dc00 = _mm512_set1_epi32(0xdc00dc00); - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - // Note that 'tail' is a compile-time constant ! - __mmask64 b = - (tail == SIMDUTF_FULL) ? 0xFFFFFFFFFFFFFFFF : (uint64_t(1) << gap) - 1; - __m512i input = (tail == SIMDUTF_FULL) ? _mm512_loadu_si512(in) - : _mm512_maskz_loadu_epi8(b, in); - __mmask64 m1 = (tail == SIMDUTF_FULL) - ? _mm512_cmplt_epu8_mask(input, mask_80808080) - : _mm512_mask_cmplt_epu8_mask(b, input, mask_80808080); - if (_ktestc_mask64_u8(m1, - b)) { // NOT(m1) AND b -- if all zeroes, then all ASCII - // alternatively, we could do 'if (m1 == b) { ' - if (tail == SIMDUTF_FULL) { - in += 64; // consumed 64 bytes - // we convert a full 64-byte block, writing 128 bytes. - __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); - if (big_endian) { - input1 = _mm512_shuffle_epi8(input1, byteflip); - } - _mm512_storeu_si512(out, input1); - out += 32; - __m512i input2 = - _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1)); - if (big_endian) { - input2 = _mm512_shuffle_epi8(input2, byteflip); - } - _mm512_storeu_si512(out, input2); - out += 32; - return true; // we are done - } else { - in += gap; - if (gap <= 32) { - __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); - if (big_endian) { - input1 = _mm512_shuffle_epi8(input1, byteflip); - } - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << (gap)) - 1), - input1); - out += gap; - } else { - __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); - if (big_endian) { - input1 = _mm512_shuffle_epi8(input1, byteflip); - } - _mm512_storeu_si512(out, input1); - out += 32; - __m512i input2 = - _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1)); - if (big_endian) { - input2 = _mm512_shuffle_epi8(input2, byteflip); - } - _mm512_mask_storeu_epi16( - out, __mmask32((uint32_t(1) << (gap - 32)) - 1), input2); - out += gap - 32; - } - return true; // we are done +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx2_convert_latin1_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; } + converted_chars += scalar_converted_chars; } - // classify characters further - __mmask64 m234 = _mm512_cmp_epu8_mask( - mask_c0c0c0c0, input, - _MM_CMPINT_LE); // 0xc0 <= input, 2, 3, or 4 leading byte - __mmask64 m34 = - _mm512_cmp_epu8_mask(mask_dfdfdfdf_tail, input, - _MM_CMPINT_LT); // 0xdf < input, 3 or 4 leading byte + return converted_chars; +} - __mmask64 milltwobytes = _mm512_mask_cmp_epu8_mask( - m234, input, mask_c2c2c2c2, - _MM_CMPINT_LT); // 0xc0 <= input < 0xc2 (illegal two byte sequence) - // Overlong 2-byte sequence - if (_ktestz_mask64_u8(milltwobytes, milltwobytes) == 0) { - // Overlong 2-byte sequence - return false; +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + avx2_convert_latin1_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; } - if (_ktestz_mask64_u8(m34, m34) == 0) { - // We have a 3-byte sequence and/or a 2-byte sequence, or possibly even a - // 4-byte sequence! - __mmask64 m4 = _mm512_cmp_epu8_mask( - input, mask_f0f0f0f0, - _MM_CMPINT_NLT); // 0xf0 <= zmm0 (4 byte start bytes) + size_t converted_chars = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_converted_chars == 0) { + return 0; + } + converted_chars += scalar_converted_chars; + } + return converted_chars; +} - __mmask64 mask_not_ascii = (tail == SIMDUTF_FULL) - ? _knot_mask64(m1) - : _kand_mask64(_knot_mask64(m1), b); +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert(buf, len, latin1_output); +} - __mmask64 mp1 = _kshiftli_mask64(m234, 1); - __mmask64 mp2 = _kshiftli_mask64(m34, 2); - // We could do it as follows... - // if (_kortestz_mask64_u8(m4,m4)) { // compute the bitwise OR of the 64-bit - // masks a and b and return 1 if all zeroes but GCC generates better code - // when we do: - if (m4 == 0) { // compute the bitwise OR of the 64-bit masks a and b and - // return 1 if all zeroes - // Fast path with 1,2,3 bytes - __mmask64 mc = _kor_mask64(mp1, mp2); // expected continuation bytes - __mmask64 m1234 = _kor_mask64(m1, m234); - // mismatched continuation bytes: - if (tail == SIMDUTF_FULL) { - __mmask64 xnormcm1234 = _kxnor_mask64( - mc, - m1234); // XNOR of mc and m1234 should be all zero if they differ - // the presence of a 1 bit indicates that they overlap. - // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return - // 1 if all zeroes. - if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { - return false; - } - } else { - __mmask64 bxorm1234 = _kxor_mask64(b, m1234); - if (mc != bxorm1234) { - return false; - } - } - // mend: identifying the last bytes of each sequence to be decoded - __mmask64 mend = _kshiftri_mask64(m1234, 1); - if (tail != SIMDUTF_FULL) { - mend = _kor_mask64(mend, (uint64_t(1) << (gap - 1))); - } +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert_with_errors(buf, len, latin1_output); +} - __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity); - __m512i last_and_thirdu16 = - _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third)); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *input, size_t size, char *latin1_output) const noexcept { + return utf8_to_latin1::convert_valid(input, size, latin1_output); +} - __m512i nonasciitags = _mm512_maskz_mov_epi8( - mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000 other: 11000000 - __m512i clearedbytes = _mm512_andnot_si512( - nonasciitags, input); // high two bits cleared where not ASCII - __m512i lastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, last_and_thirdu16, - clearedbytes); // the last byte of each character +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} - __mmask64 mask_before_non_ascii = _kshiftri_mask64( - mask_not_ascii, 1); // bytes that precede non-ASCII bytes - __m512i indexofsecondlastbytes = _mm512_add_epi16( - mask_ffffffff, last_and_thirdu16); // indices of the second last bytes - __m512i beforeasciibytes = - _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes); - __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, indexofsecondlastbytes, - beforeasciibytes); // the second last bytes (of two, three byte seq, - // surrogates) - secondlastbytes = - _mm512_slli_epi16(secondlastbytes, 6); // shifted into position +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); +} - __m512i indexofthirdlastbytes = _mm512_add_epi16( - mask_ffffffff, - indexofsecondlastbytes); // indices of the second last bytes - __m512i thirdlastbyte = - _mm512_maskz_mov_epi8(m34, - clearedbytes); // only those that are the third - // last byte of a sequence - __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, indexofthirdlastbytes, - thirdlastbyte); // the third last bytes (of three byte sequences, hi - // surrogate) - thirdlastbytes = - _mm512_slli_epi16(thirdlastbytes, 12); // shifted into position - __m512i Wout = _mm512_ternarylogic_epi32(lastbytes, secondlastbytes, - thirdlastbytes, 254); - // the elements of Wout excluding the last element if it happens to be a - // high surrogate: +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); +} - __mmask64 mprocessed = - (tail == SIMDUTF_FULL) - ? _pdep_u64(0xFFFFFFFF, mend) - : _pdep_u64( - 0xFFFFFFFF, - _kand_mask64( - mend, b)); // we adjust mend at the end of the output. +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); +} - // Encodings out of range... - { - // the location of 3-byte sequence start bytes in the input - __mmask64 m3 = m34 & (b ^ m4); - // code units in Wout corresponding to 3-byte sequences. - __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend)); - __m512i mask_08000800 = _mm512_set1_epi32(0x08000800); - __mmask32 Msmall800 = - _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800); - __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800); - __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800); - __mmask32 M3s = - _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800); - if (_kor_mask32(Msmall800, M3s)) { - return false; - } - } - int64_t nout = _mm_popcnt_u64(mprocessed); - in += 64 - _lzcnt_u64(mprocessed); - if (big_endian) { - Wout = _mm512_shuffle_epi8(Wout, byteflip); - } - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout); - out += nout; - return true; // ok - } - // - // We have a 4-byte sequence, this is the general case. - // Slow! - __mmask64 mp3 = _kshiftli_mask64(m4, 3); - __mmask64 mc = - _kor_mask64(_kor_mask64(mp1, mp2), mp3); // expected continuation bytes - __mmask64 m1234 = _kor_mask64(m1, m234); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} - // mend: identifying the last bytes of each sequence to be decoded - __mmask64 mend = - _kor_mask64(_kshiftri_mask64(_kor_mask64(mp3, m1234), 1), mp3); - if (tail != SIMDUTF_FULL) { - mend = _kor_mask64(mend, __mmask64(uint64_t(1) << (gap - 1))); - } - __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity); - __m512i last_and_thirdu16 = - _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third)); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} - __m512i nonasciitags = _mm512_maskz_mov_epi8( - mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000 other: 11000000 - __m512i clearedbytes = _mm512_andnot_si512( - nonasciitags, input); // high two bits cleared where not ASCII - __m512i lastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, last_and_thirdu16, - clearedbytes); // the last byte of each character +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); +} - __mmask64 mask_before_non_ascii = _kshiftri_mask64( - mask_not_ascii, 1); // bytes that precede non-ASCII bytes - __m512i indexofsecondlastbytes = _mm512_add_epi16( - mask_ffffffff, last_and_thirdu16); // indices of the second last bytes - __m512i beforeasciibytes = - _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes); - __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, indexofsecondlastbytes, - beforeasciibytes); // the second last bytes (of two, three byte seq, - // surrogates) - secondlastbytes = - _mm512_slli_epi16(secondlastbytes, 6); // shifted into position +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); +} - __m512i indexofthirdlastbytes = _mm512_add_epi16( - mask_ffffffff, - indexofsecondlastbytes); // indices of the second last bytes - __m512i thirdlastbyte = _mm512_maskz_mov_epi8( - m34, - clearedbytes); // only those that are the third last byte of a sequence - __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, indexofthirdlastbytes, - thirdlastbyte); // the third last bytes (of three byte sequences, hi - // surrogate) - thirdlastbytes = - _mm512_slli_epi16(thirdlastbytes, 12); // shifted into position - __m512i thirdsecondandlastbytes = _mm512_ternarylogic_epi32( - lastbytes, secondlastbytes, thirdlastbytes, 254); - uint64_t Mlo_uint64 = _pext_u64(mp3, mend); - __mmask32 Mlo = __mmask32(Mlo_uint64); - __mmask32 Mhi = __mmask32(Mlo_uint64 >> 1); - __m512i lo_surr_mask = _mm512_maskz_mov_epi16( - Mlo, - mask_dc00dc00); // lo surr: 1101110000000000, other: 0000000000000000 - __m512i shifted4_thirdsecondandlastbytes = - _mm512_srli_epi16(thirdsecondandlastbytes, - 4); // hi surr: 00000WVUTSRQPNML vuts = WVUTS - 1 - __m512i tagged_lo_surrogates = _mm512_or_si512( - thirdsecondandlastbytes, - lo_surr_mask); // lo surr: 110111KJHGFEDCBA, other: unchanged - __m512i Wout = _mm512_mask_add_epi16( - tagged_lo_surrogates, Mhi, shifted4_thirdsecondandlastbytes, - mask_d7c0d7c0); // hi sur: 110110vutsRQPNML, other: unchanged - // the elements of Wout excluding the last element if it happens to be a - // high surrogate: - __mmask32 Mout = ~(Mhi & 0x80000000); - __mmask64 mprocessed = - (tail == SIMDUTF_FULL) - ? _pdep_u64(Mout, mend) - : _pdep_u64( - Mout, - _kand_mask64(mend, - b)); // we adjust mend at the end of the output. +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} - // mismatched continuation bytes: - if (tail == SIMDUTF_FULL) { - __mmask64 xnormcm1234 = _kxnor_mask64( - mc, m1234); // XNOR of mc and m1234 should be all zero if they differ - // the presence of a 1 bit indicates that they overlap. - // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return 1 - // if all zeroes. - if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { - return false; - } - } else { - __mmask64 bxorm1234 = _kxor_mask64(b, m1234); - if (mc != bxorm1234) { - return false; - } - } - // Encodings out of range... - { - // the location of 3-byte sequence start bytes in the input - __mmask64 m3 = m34 & (b ^ m4); - // code units in Wout corresponding to 3-byte sequences. - __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend)); - __m512i mask_08000800 = _mm512_set1_epi32(0x08000800); - __mmask32 Msmall800 = - _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800); - __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800); - __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800); - __mmask32 M3s = - _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800); - __m512i mask_04000400 = _mm512_set1_epi32(0x04000400); - __mmask32 M4s = - _mm512_mask_cmpge_epu16_mask(Mhi, Moutminusd800, mask_04000400); - if (!_kortestz_mask32_u8(M4s, _kor_mask32(Msmall800, M3s))) { - return false; - } +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_latin1(buf, len, + latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } - in += 64 - _lzcnt_u64(mprocessed); - int64_t nout = _mm_popcnt_u64(mprocessed); - if (big_endian) { - Wout = _mm512_shuffle_epi8(Wout, byteflip); + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_latin1(buf, len, + latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout); - out += nout; - return true; // ok + saved_bytes += scalar_saved_bytes; } - // Fast path 2: all ASCII or 2 byte - __mmask64 continuation_or_ascii = (tail == SIMDUTF_FULL) - ? _knot_mask64(m234) - : _kand_mask64(_knot_mask64(m234), b); - // on top of -0xc0 we subtract -2 which we get back later of the - // continuation byte tags - __m512i leading2byte = _mm512_maskz_sub_epi8(m234, input, mask_c2c2c2c2); - __mmask64 leading = tail == (tail == SIMDUTF_FULL) - ? _kor_mask64(m1, m234) - : _kand_mask64(_kor_mask64(m1, m234), - b); // first bytes of each sequence - if (tail == SIMDUTF_FULL) { - __mmask64 xnor234leading = - _kxnor_mask64(_kshiftli_mask64(m234, 1), leading); - if (!_kortestz_mask64_u8(xnor234leading, xnor234leading)) { - return false; + return saved_bytes; +} + +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + avx2_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; } - } else { - __mmask64 bxorleading = _kxor_mask64(b, leading); - if (_kshiftli_mask64(m234, 1) != bxorleading) { - return false; + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + avx2_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; } } - // - if (tail == SIMDUTF_FULL) { - // In the two-byte/ASCII scenario, we are easily latency bound, so we want - // to increment the input buffer as quickly as possible. - // We process 32 bytes unless the byte at index 32 is a continuation byte, - // in which case we include it as well for a total of 33 bytes. - // Note that if x is an ASCII byte, then the following is false: - // int8_t(x) <= int8_t(0xc0) under two's complement. - in += 32; - if (int8_t(*in) <= int8_t(0xc0)) - in++; - // The alternative is to do - // in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); - // but it requires loading the input, doing the mask computation, and - // converting back the mask to a general register. It just takes too long, - // leaving the processor likely to be idle. - } else { - in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function + return convert_utf16be_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function + return convert_utf16le_to_latin1(buf, len, latin1_output); +} + +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_utf8(buf, len, + utf8_output); + if (ret.first == nullptr) { + return 0; } - __m512i lead = _mm512_maskz_compress_epi8( - leading, leading2byte); // will contain zero for ascii, and the data - lead = _mm512_cvtepu8_epi16( - _mm512_castsi512_si256(lead)); // ... zero extended into code units - __m512i follow = _mm512_maskz_compress_epi8( - continuation_or_ascii, input); // the last bytes of each sequence - follow = _mm512_cvtepu8_epi16( - _mm512_castsi512_si256(follow)); // ... zero extended into code units - lead = _mm512_slli_epi16(lead, 6); // shifted into position - __m512i final = _mm512_add_epi16(follow, lead); // combining lead and follow + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - if (big_endian) { - final = _mm512_shuffle_epi8(final, byteflip); +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_utf8(buf, len, + utf8_output); + if (ret.first == nullptr) { + return 0; } - if (tail == SIMDUTF_FULL) { - // Next part is UTF-16 specific and can be generalized to UTF-32. - int nout = _mm_popcnt_u32(uint32_t(leading)); - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final); - out += nout; // UTF-8 to UTF-16 is only expansionary in this case. - } else { - int nout = int(_mm_popcnt_u64(_pdep_u64(0xFFFFFFFF, leading))); - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final); - out += nout; // UTF-8 to UTF-16 is only expansionary in this case. + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; } + return saved_bytes; +} - return true; // we are fine. +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf16_to_utf8_with_errors( + buf, len, utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; } -/* - utf32_to_utf16_masked converts `count` lower UTF-32 code units - from input `utf32` into UTF-16. It differs from utf32_to_utf16 - in that it 'masks' the writes. +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf16_to_utf8_with_errors( + buf, len, utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; +} - Returns how many 16-bit code units were stored. +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); +} - byteflip is used for flipping 16-bit code units, and it should be - __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809 - ); - We pass it to the (always inlined) function to encourage the compiler to - keep the value in a (constant) register. -*/ -template -simdutf_really_inline size_t utf32_to_utf16_masked(const __m512i byteflip, - __m512i utf32, - unsigned int count, - char16_t *output) { +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); +} - const __mmask16 valid = uint16_t((1 << count) - 1); - // 1. check if we have any surrogate pairs - const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff); - const __mmask16 sp_mask = - _mm512_mask_cmpgt_epu32_mask(valid, utf32, v_0000_ffff); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + avx2_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - if (sp_mask == 0) { - if (big_endian) { - _mm256_mask_storeu_epi16( - (__m256i *)output, valid, - _mm256_shuffle_epi8(_mm512_cvtepi32_epi16(utf32), - _mm512_castsi512_si256(byteflip))); +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + avx2_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + avx2_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; } else { - _mm256_mask_storeu_epi16((__m256i *)output, valid, - _mm512_cvtepi32_epi16(utf32)); + ret.second += scalar_res.count; } - return count; } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} - { - // build surrogate pair code units in 32-bit lanes - - // t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb] - const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000); - const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000); - - // t1 = 8 x [000000aaaaaaaaaa|bbbbbbbbbb000000] - const __m512i t1 = _mm512_slli_epi32(t0, 6); - - // t2 = 8 x [000000aaaaaaaaaa|aaaaaabbbbbbbbbb] -- copy hi word from t1 - // to t0 - // 0xe4 = (t1 and v_ffff_0000) or (t0 and not v_ffff_0000) - const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000); - const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + return convert_utf32_to_latin1(buf, len, latin1_output); +} - // t2 = 8 x [110110aaaaaaaaaa|110111bbbbbbbbbb] -- copy hi word from t1 - // to t0 - // 0xba = (t2 and not v_fc00_fc000) or v_d800_dc00 - const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00); - const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00); - const __m512i t3 = - _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba); - const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3); - __m512i t5 = _mm512_ror_epi32(t4, 16); - // Here we want to trim all of the upper 16-bit code units from the 2-byte - // characters represented as 4-byte values. We can compute it from - // sp_mask or the following... It can be more optimized! - const __mmask32 nonzero = _kor_mask32( - 0xaaaaaaaa, _mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512())); - const __mmask32 nonzero_masked = - _kand_mask32(nonzero, __mmask32((uint64_t(1) << (2 * count)) - 1)); - if (big_endian) { - t5 = _mm512_shuffle_epi8(t5, byteflip); +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; } - // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability - // (zen4) - __m512i compressed = _mm512_maskz_compress_epi16(nonzero_masked, t5); - _mm512_mask_storeu_epi16( - output, - (1 << (count + static_cast(count_ones(sp_mask)))) - 1, - compressed); - //_mm512_mask_compressstoreu_epi16(output, nonzero_masked, t5); } - - return count + static_cast(count_ones(sp_mask)); + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; } -/* - utf32_to_utf16 converts `count` lower UTF-32 code units - from input `utf32` into UTF-16. It may overflow. +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_utf32(buf, len, + utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - Returns how many 16-bit code units were stored. +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + haswell::avx2_convert_utf16_to_utf32(buf, len, + utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - byteflip is used for flipping 16-bit code units, and it should be - __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809 - ); - We pass it to the (always inlined) function to encourage the compiler to - keep the value in a (constant) register. -*/ -template -simdutf_really_inline size_t utf32_to_utf16(const __m512i byteflip, - __m512i utf32, unsigned int count, - char16_t *output) { - // check if we have any surrogate pairs - const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff); - const __mmask16 sp_mask = _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff); +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf16_to_utf32_with_errors( + buf, len, utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} - if (sp_mask == 0) { - // technically, it should be _mm256_storeu_epi16 - if (big_endian) { - _mm256_storeu_si256( - (__m256i *)output, - _mm256_shuffle_epi8(_mm512_cvtepi32_epi16(utf32), - _mm512_castsi512_si256(byteflip))); +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf16_to_utf32_with_errors( + buf, len, utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; } else { - _mm256_storeu_si256((__m256i *)output, _mm512_cvtepi32_epi16(utf32)); + ret.second += scalar_res.count; } - return count; } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} - { - // build surrogate pair code units in 32-bit lanes +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf32_to_utf8(buf, len, utf8_output); +} - // t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb] - const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000); - const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx2_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - // t1 = 8 x [000000aaaaaaaaaa|bbbbbbbbbb000000] - const __m512i t1 = _mm512_slli_epi32(t0, 6); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + avx2_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - // t2 = 8 x [000000aaaaaaaaaa|aaaaaabbbbbbbbbb] -- copy hi word from t1 - // to t0 - // 0xe4 = (t1 and v_ffff_0000) or (t0 and not v_ffff_0000) - const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000); - const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4); +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf32_to_utf16_with_errors( + buf, len, utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} - // t2 = 8 x [110110aaaaaaaaaa|110111bbbbbbbbbb] -- copy hi word from t1 - // to t0 - // 0xba = (t2 and not v_fc00_fc000) or v_d800_dc00 - const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00); - const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00); - const __m512i t3 = - _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba); - const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3); - __m512i t5 = _mm512_ror_epi32(t4, 16); - const __mmask32 nonzero = _kor_mask32( - 0xaaaaaaaa, _mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512())); - if (big_endian) { - t5 = _mm512_shuffle_epi8(t5, byteflip); +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + haswell::avx2_convert_utf32_to_utf16_with_errors( + buf, len, utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; } - // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability - // (zen4) - __m512i compressed = _mm512_maskz_compress_epi16(nonzero, t5); - _mm512_mask_storeu_epi16( - output, - (1 << (count + static_cast(count_ones(sp_mask)))) - 1, - compressed); - //_mm512_mask_compressstoreu_epi16(output, nonzero, t5); } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} - return count + static_cast(count_ones(sp_mask)); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); } -/** - * Store the last N bytes of previous followed by 512-N bytes from input. - */ -template __m512i prev(__m512i input, __m512i previous) { - static_assert(N <= 32, "N must be no larger than 32"); - const __m512i movemask = - _mm512_setr_epi32(28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); - const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous); -#if SIMDUTF_GCC8 || SIMDUTF_GCC9 - constexpr int shift = 16 - N; // workaround for GCC8,9 - return _mm512_alignr_epi8(input, rotated, shift); -#else - return _mm512_alignr_epi8(input, rotated, 16 - N); -#endif // SIMDUTF_GCC8 || SIMDUTF_GCC9 +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); } -template -__m512i shuffle_epi128(__m512i v) { - static_assert((idx0 >= 0 && idx0 <= 3), "idx0 must be in range 0..3"); - static_assert((idx1 >= 0 && idx1 <= 3), "idx1 must be in range 0..3"); - static_assert((idx2 >= 0 && idx2 <= 3), "idx2 must be in range 0..3"); - static_assert((idx3 >= 0 && idx3 <= 3), "idx3 must be in range 0..3"); +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16le_to_utf32(buf, len, utf32_output); +} - constexpr unsigned shuffle = idx0 | (idx1 << 2) | (idx2 << 4) | (idx3 << 6); - return _mm512_shuffle_i32x4(v, v, shuffle); +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16be_to_utf32(buf, len, utf32_output); } -template constexpr __m512i broadcast_epi128(__m512i v) { - return shuffle_epi128(v); +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + utf16::change_endianness_utf16(input, length, output); } -/** - * Current unused. - */ -template __m512i rotate_by_N_epi8(const __m512i input) { +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} - // lanes order: 1, 2, 3, 0 => 0b00_11_10_01 - const __m512i permuted = _mm512_shuffle_i32x4(input, input, 0x39); +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); +} - return _mm512_alignr_epi8(permuted, input, N); +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); } -/* - expanded_utf8_to_utf32 converts expanded UTF-8 characters (`utf8`) - stored at separate 32-bit lanes. +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); +} - For each lane we have also a character class (`char_class), given in form - 0x8080800N, where N is 4 highest bits from the leading byte; 0x80 resets - corresponding bytes during pshufb. -*/ -simdutf_really_inline __m512i expanded_utf8_to_utf32(__m512i char_class, - __m512i utf8) { - /* - Input: - - utf8: bytes stored at separate 32-bit code units - - valid: which code units have valid UTF-8 characters +simdutf_warn_unused size_t +implementation::latin1_length_from_utf16(size_t length) const noexcept { + return scalar::utf16::latin1_length_from_utf16(length); +} - Bit layout of single word. We show 4 cases for each possible - UTF-8 character encoding. The `?` denotes bits we must not - assume their value. +simdutf_warn_unused size_t +implementation::latin1_length_from_utf32(size_t length) const noexcept { + return scalar::utf32::latin1_length_from_utf32(length); +} - |10dd.dddd|10cc.cccc|10bb.bbbb|1111.0aaa| 4-byte char - |????.????|10cc.cccc|10bb.bbbb|1110.aaaa| 3-byte char - |????.????|????.????|10bb.bbbb|110a.aaaa| 2-byte char - |????.????|????.????|????.????|0aaa.aaaa| ASCII char - byte 3 byte 2 byte 1 byte 0 - */ +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16(input, length); +} - /* 1. Reset control bits of continuation bytes and the MSB - of the leading byte; this makes all bytes unsigned (and - does not alter ASCII char). +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16(input, length); +} - |00dd.dddd|00cc.cccc|00bb.bbbb|0111.0aaa| 4-byte char - |00??.????|00cc.cccc|00bb.bbbb|0110.aaaa| 3-byte char - |00??.????|00??.????|00bb.bbbb|010a.aaaa| 2-byte char - |00??.????|00??.????|00??.????|0aaa.aaaa| ASCII char - ^^ ^^ ^^ ^ - */ - __m512i values; - const __m512i v_3f3f_3f7f = _mm512_set1_epi32(0x3f3f3f7f); - values = _mm512_and_si512(utf8, v_3f3f_3f7f); +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} - /* 2. Swap and join fields A-B and C-D +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} - |0000.cccc|ccdd.dddd|0001.110a|aabb.bbbb| 4-byte char - |0000.cccc|cc??.????|0001.10aa|aabb.bbbb| 3-byte char - |0000.????|????.????|0001.0aaa|aabb.bbbb| 2-byte char - |0000.????|????.????|000a.aaaa|aa??.????| ASCII char */ - const __m512i v_0140_0140 = _mm512_set1_epi32(0x01400140); - values = _mm512_maddubs_epi16(values, v_0140_0140); +simdutf_warn_unused size_t +implementation::utf16_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf16_length_from_latin1(length); +} - /* 3. Swap and join fields AB & CD +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::utf16_length_from_utf8(input, length); +} - |0000.0001|110a.aabb|bbbb.cccc|ccdd.dddd| 4-byte char - |0000.0001|10aa.aabb|bbbb.cccc|cc??.????| 3-byte char - |0000.0001|0aaa.aabb|bbbb.????|????.????| 2-byte char - |0000.000a|aaaa.aa??|????.????|????.????| ASCII char */ - const __m512i v_0001_1000 = _mm512_set1_epi32(0x00011000); - values = _mm512_madd_epi16(values, v_0001_1000); +simdutf_warn_unused size_t +implementation::utf32_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf32_length_from_latin1(length); +} - /* 4. Shift left the values by variable amounts to reset highest UTF-8 bits - |aaab.bbbb|bccc.cccd|dddd.d000|0000.0000| 4-byte char -- by 11 - |aaaa.bbbb|bbcc.cccc|????.??00|0000.0000| 3-byte char -- by 10 - |aaaa.abbb|bbb?.????|????.???0|0000.0000| 2-byte char -- by 9 - |aaaa.aaa?|????.????|????.????|?000.0000| ASCII char -- by 7 */ - { - /** pshufb +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t len) const noexcept { + const uint8_t *data = reinterpret_cast(input); + size_t answer = len / sizeof(__m256i) * sizeof(__m256i); + size_t i = 0; + if (answer >= 2048) { // long strings optimization + __m256i four_64bits = _mm256_setzero_si256(); + while (i + sizeof(__m256i) <= len) { + __m256i runner = _mm256_setzero_si256(); + // We can do up to 255 loops without overflow. + size_t iterations = (len - i) / sizeof(__m256i); + if (iterations > 255) { + iterations = 255; + } + size_t max_i = i + iterations * sizeof(__m256i) - sizeof(__m256i); + for (; i + 4 * sizeof(__m256i) <= max_i; i += 4 * sizeof(__m256i)) { + __m256i input1 = _mm256_loadu_si256((const __m256i *)(data + i)); + __m256i input2 = + _mm256_loadu_si256((const __m256i *)(data + i + sizeof(__m256i))); + __m256i input3 = _mm256_loadu_si256( + (const __m256i *)(data + i + 2 * sizeof(__m256i))); + __m256i input4 = _mm256_loadu_si256( + (const __m256i *)(data + i + 3 * sizeof(__m256i))); + __m256i input12 = + _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input1), + _mm256_cmpgt_epi8(_mm256_setzero_si256(), input2)); + __m256i input23 = + _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input3), + _mm256_cmpgt_epi8(_mm256_setzero_si256(), input4)); + __m256i input1234 = _mm256_add_epi8(input12, input23); + runner = _mm256_sub_epi8(runner, input1234); + } + for (; i <= max_i; i += sizeof(__m256i)) { + __m256i input_256_chunk = + _mm256_loadu_si256((const __m256i *)(data + i)); + runner = _mm256_sub_epi8( + runner, _mm256_cmpgt_epi8(_mm256_setzero_si256(), input_256_chunk)); + } + four_64bits = _mm256_add_epi64( + four_64bits, _mm256_sad_epu8(runner, _mm256_setzero_si256())); + } + answer += _mm256_extract_epi64(four_64bits, 0) + + _mm256_extract_epi64(four_64bits, 1) + + _mm256_extract_epi64(four_64bits, 2) + + _mm256_extract_epi64(four_64bits, 3); + } else if (answer > 0) { + for (; i + sizeof(__m256i) <= len; i += sizeof(__m256i)) { + __m256i latin = _mm256_loadu_si256((const __m256i *)(data + i)); + uint32_t non_ascii = _mm256_movemask_epi8(latin); + answer += count_ones(non_ascii); + } + } + return answer + scalar::latin1::utf8_length_from_latin1( + reinterpret_cast(data + i), len - i); +} - continuation = 0 - ascii = 7 - _2_bytes = 9 - _3_bytes = 10 - _4_bytes = 11 +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const __m256i v_00000000 = _mm256_setzero_si256(); + const __m256i v_ffffff80 = _mm256_set1_epi32((uint32_t)0xffffff80); + const __m256i v_fffff800 = _mm256_set1_epi32((uint32_t)0xfffff800); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + size_t pos = 0; + size_t count = 0; + for (; pos + 8 <= length; pos += 8) { + __m256i in = _mm256_loadu_si256((__m256i *)(input + pos)); + const __m256i ascii_bytes_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffffff80), v_00000000); + const __m256i one_two_bytes_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_fffff800), v_00000000); + const __m256i two_bytes_bytemask = + _mm256_xor_si256(one_two_bytes_bytemask, ascii_bytes_bytemask); + const __m256i one_two_three_bytes_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); + const __m256i three_bytes_bytemask = + _mm256_xor_si256(one_two_three_bytes_bytemask, one_two_bytes_bytemask); + const uint32_t ascii_bytes_bitmask = + static_cast(_mm256_movemask_epi8(ascii_bytes_bytemask)); + const uint32_t two_bytes_bitmask = + static_cast(_mm256_movemask_epi8(two_bytes_bytemask)); + const uint32_t three_bytes_bitmask = + static_cast(_mm256_movemask_epi8(three_bytes_bytemask)); - shift_left_v3 = 4 * [ - ascii, # 0000 - ascii, # 0001 - ascii, # 0010 - ascii, # 0011 - ascii, # 0100 - ascii, # 0101 - ascii, # 0110 - ascii, # 0111 - continuation, # 1000 - continuation, # 1001 - continuation, # 1010 - continuation, # 1011 - _2_bytes, # 1100 - _2_bytes, # 1101 - _3_bytes, # 1110 - _4_bytes, # 1111 - ] */ - const __m512i shift_left_v3 = _mm512_setr_epi64( - 0x0707070707070707, 0x0b0a090900000000, 0x0707070707070707, - 0x0b0a090900000000, 0x0707070707070707, 0x0b0a090900000000, - 0x0707070707070707, 0x0b0a090900000000); + size_t ascii_count = count_ones(ascii_bytes_bitmask) / 4; + size_t two_bytes_count = count_ones(two_bytes_bitmask) / 4; + size_t three_bytes_count = count_ones(three_bytes_bitmask) / 4; + count += 32 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; + } + return count + + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); +} - const __m512i shift = _mm512_shuffle_epi8(shift_left_v3, char_class); - values = _mm512_sllv_epi32(values, shift); +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const __m256i v_00000000 = _mm256_setzero_si256(); + const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + size_t pos = 0; + size_t count = 0; + for (; pos + 8 <= length; pos += 8) { + __m256i in = _mm256_loadu_si256((__m256i *)(input + pos)); + const __m256i surrogate_bytemask = + _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); + const uint32_t surrogate_bitmask = + static_cast(_mm256_movemask_epi8(surrogate_bytemask)); + size_t surrogate_count = (32 - count_ones(surrogate_bitmask)) / 4; + count += 8 + surrogate_count; } + return count + + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); +} - /* 5. Shift right the values by variable amounts to reset lowest bits - |0000.0000|000a.aabb|bbbb.cccc|ccdd.dddd| 4-byte char -- by 11 - |0000.0000|0000.0000|aaaa.bbbb|bbcc.cccc| 3-byte char -- by 16 - |0000.0000|0000.0000|0000.0aaa|aabb.bbbb| 2-byte char -- by 21 - |0000.0000|0000.0000|0000.0000|0aaa.aaaa| ASCII char -- by 25 */ - { - // 4 * [25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 21, 21, 16, 11] - const __m512i shift_right = _mm512_setr_epi64( - 0x1919191919191919, 0x0b10151500000000, 0x1919191919191919, - 0x0b10151500000000, 0x1919191919191919, 0x0b10151500000000, - 0x1919191919191919, 0x0b10151500000000); +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); +} - const __m512i shift = _mm512_shuffle_epi8(shift_right, char_class); - values = _mm512_srlv_epi32(values, shift); - } +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} - return values; +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); } -simdutf_really_inline __m512i expand_and_identify(__m512i lane0, __m512i lane1, - int &count) { - const __m512i merged = _mm512_mask_mov_epi32(lane0, 0x1000, lane1); - const __m512i expand_ver2 = _mm512_setr_epi64( - 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, - 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, - 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); - const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); - const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); - const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); - const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); - const __mmask16 leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); - count = static_cast(count_ones(leading_bytes)); - return _mm512_mask_compress_epi32(_mm512_setzero_si512(), leading_bytes, - input); +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); } -simdutf_really_inline __m512i expand_utf8_to_utf32(__m512i input) { - __m512i char_class = _mm512_srli_epi32(input, 4); - /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ - const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); - const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); - char_class = - _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); - return expanded_utf8_to_utf32(char_class, input); +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); } -/* end file src/icelake/icelake_utf8_common.inl.cpp */ -/* begin file src/icelake/icelake_macros.inl.cpp */ -/* - This upcoming macro (SIMDUTF_ICELAKE_TRANSCODE16) takes 16 + 4 bytes (of a - UTF-8 string) and loads all possible 4-byte substring into an AVX512 - register. +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} - For example if we have bytes abcdefgh... we create following 32-bit lanes +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} - [abcd|bcde|cdef|defg|efgh|...] - ^ ^ - byte 0 of reg byte 63 of reg -*/ -/** pshufb - # lane{0,1,2} have got bytes: [ 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, - 11, 12, 13, 14, 15] # lane3 has got bytes: [ 16, 17, 18, 19, 4, 5, - 6, 8, 9, 10, 11, 12, 13, 14, 15] +simdutf_warn_unused size_t implementation::base64_length_from_binary( + size_t length, base64_options options) const noexcept { + return scalar::base64::base64_length_from_binary(length, options); +} - expand_ver2 = [ - # lane 0: - 0, 1, 2, 3, - 1, 2, 3, 4, - 2, 3, 4, 5, - 3, 4, 5, 6, +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } +} +} // namespace haswell +} // namespace simdutf - # lane 1: - 4, 5, 6, 7, - 5, 6, 7, 8, - 6, 7, 8, 9, - 7, 8, 9, 10, +/* begin file src/simdutf/haswell/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif - # lane 2: - 8, 9, 10, 11, - 9, 10, 11, 12, - 10, 11, 12, 13, - 11, 12, 13, 14, - # lane 3 order: 13, 14, 15, 16 14, 15, 16, 17, 15, 16, 17, 18, 16, - 17, 18, 19 12, 13, 14, 15, 13, 14, 15, 0, 14, 15, 0, 1, 15, 0, 1, 2, - ] -*/ +#if SIMDUTF_GCC11ORMORE // workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_POP_DISABLE_WARNINGS +#endif // end of workaround +/* end file src/simdutf/haswell/end.h */ +/* end file src/haswell/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_PPC64 +/* begin file src/ppc64/implementation.cpp */ -#define SIMDUTF_ICELAKE_TRANSCODE16(LANE0, LANE1, MASKED) \ - { \ - const __m512i merged = _mm512_mask_mov_epi32(LANE0, 0x1000, LANE1); \ - const __m512i expand_ver2 = _mm512_setr_epi64( \ - 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, \ - 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, \ - 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); \ - const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); \ - \ - __mmask16 leading_bytes; \ - const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); \ - const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); \ - const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); \ - leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); \ - \ - __m512i char_class; \ - char_class = _mm512_srli_epi32(input, 4); \ - /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ \ - const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); \ - const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); \ - char_class = \ - _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); \ - \ - const int valid_count = static_cast(count_ones(leading_bytes)); \ - const __m512i utf32 = expanded_utf8_to_utf32(char_class, input); \ - \ - const __m512i out = _mm512_mask_compress_epi32(_mm512_setzero_si512(), \ - leading_bytes, utf32); \ - \ - if (UTF32) { \ - if (MASKED) { \ - const __mmask16 valid = uint16_t((1 << valid_count) - 1); \ - _mm512_mask_storeu_epi32((__m512i *)output, valid, out); \ - } else { \ - _mm512_storeu_si512((__m512i *)output, out); \ - } \ - output += valid_count; \ - } else { \ - if (MASKED) { \ - output += utf32_to_utf16_masked( \ - byteflip, out, valid_count, reinterpret_cast(output)); \ - } else { \ - output += utf32_to_utf16( \ - byteflip, out, valid_count, reinterpret_cast(output)); \ - } \ - } \ - } -#define SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(INPUT, VALID_COUNT, MASKED) \ - { \ - if (UTF32) { \ - if (MASKED) { \ - const __mmask16 valid_mask = uint16_t((1 << VALID_COUNT) - 1); \ - _mm512_mask_storeu_epi32((__m512i *)output, valid_mask, INPUT); \ - } else { \ - _mm512_storeu_si512((__m512i *)output, INPUT); \ - } \ - output += VALID_COUNT; \ - } else { \ - if (MASKED) { \ - output += utf32_to_utf16_masked( \ - byteflip, INPUT, VALID_COUNT, \ - reinterpret_cast(output)); \ - } else { \ - output += \ - utf32_to_utf16(byteflip, INPUT, VALID_COUNT, \ - reinterpret_cast(output)); \ - } \ - } \ - } -#define SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) \ - if (UTF32) { \ - const __m128i t0 = _mm512_castsi512_si128(utf8); \ - const __m128i t1 = _mm512_extracti32x4_epi32(utf8, 1); \ - const __m128i t2 = _mm512_extracti32x4_epi32(utf8, 2); \ - const __m128i t3 = _mm512_extracti32x4_epi32(utf8, 3); \ - _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ - _mm512_cvtepu8_epi32(t0)); \ - _mm512_storeu_si512((__m512i *)(output + 1 * 16), \ - _mm512_cvtepu8_epi32(t1)); \ - _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ - _mm512_cvtepu8_epi32(t2)); \ - _mm512_storeu_si512((__m512i *)(output + 3 * 16), \ - _mm512_cvtepu8_epi32(t3)); \ - } else { \ - const __m256i h0 = _mm512_castsi512_si256(utf8); \ - const __m256i h1 = _mm512_extracti64x4_epi64(utf8, 1); \ - if (big_endian) { \ - _mm512_storeu_si512( \ - (__m512i *)(output + 0 * 16), \ - _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h0), byteflip)); \ - _mm512_storeu_si512( \ - (__m512i *)(output + 2 * 16), \ - _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h1), byteflip)); \ - } else { \ - _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ - _mm512_cvtepu8_epi16(h0)); \ - _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ - _mm512_cvtepu8_epi16(h1)); \ - } \ - } -/* end file src/icelake/icelake_macros.inl.cpp */ -/* begin file src/icelake/icelake_from_valid_utf8.inl.cpp */ -// file included directly -// File contains conversion procedure from VALID UTF-8 strings. -/* - valid_utf8_to_fixed_length converts a valid UTF-8 string into UTF-32. +/* begin file src/simdutf/ppc64/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "ppc64" +// #define SIMDUTF_IMPLEMENTATION ppc64 +/* end file src/simdutf/ppc64/begin.h */ +namespace simdutf { +namespace ppc64 { +namespace { +#ifndef SIMDUTF_PPC64_H + #error "ppc64.h must be included" +#endif +using namespace simd; - The `OUTPUT` template type decides what to do with UTF-32: store - it directly or convert into UTF-16 (with AVX512). +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + // careful: 0x80 is not ascii. + return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere(); +} - Input: - - str - valid UTF-8 string - - len - string length - - out_buffer - output buffer +simdutf_unused simdutf_really_inline simd8 +must_be_continuation(const simd8 prev1, const simd8 prev2, + const simd8 prev3) { + simd8 is_second_byte = + prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 + simd8 is_third_byte = + prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = + prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction + // will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > + int8_t(0); +} - Result: - - pair.first - the first unprocessed input byte - - pair.second - the first unprocessed output word -*/ -template -std::pair -valid_utf8_to_fixed_length(const char *str, size_t len, OUTPUT *dwords) { - constexpr bool UTF32 = std::is_same::value; - constexpr bool UTF16 = std::is_same::value; - static_assert( - UTF32 or UTF16, - "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); - static_assert(!(UTF32 and big_endian), - "we do not currently support big-endian UTF-32"); +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = + prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = + prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80 + // Caller requires a bool (all 1's). All values resulting from the subtraction + // will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte); +} - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - const char *ptr = str; - const char *end = ptr + len; +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf - OUTPUT *output = dwords; - /** - * In the main loop, we consume 64 bytes per iteration, - * but we access 64 + 4 bytes. - * We check for ptr + 64 + 64 <= end because - * we want to be do maskless writes without overruns. - */ - while (end - ptr >= 64 + 4) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80); - if (ascii == 0) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - continue; - } +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace ppc64 { +namespace { - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } - const __m512i lane3 = broadcast_epi128<3>(utf8); - int valid_count2; - __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); - uint32_t tmp1; - ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); - const __m512i lane4 = _mm512_set1_epi32(tmp1); - int valid_count3; - __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); - if (valid_count2 + valid_count3 <= 16) { - vec2 = _mm512_mask_expand_epi32( - vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); - valid_count2 += valid_count3; - vec2 = expand_utf8_to_utf32(vec2); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - } else { - vec2 = expand_utf8_to_utf32(vec2); - vec3 = expand_utf8_to_utf32(vec3); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) - } - ptr += 4 * 16; - } +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); - if (end - ptr >= 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80); - if (ascii == 0) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - } else { - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; - const __m512i lane3 = broadcast_epi128<3>(utf8); - SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} - ptr += 3 * 16; +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; } } - return {ptr, output}; + buf[sizeof(simd8x64)] = '\0'; + return buf; } -using utf8_to_utf16_result = std::pair; -/* end file src/icelake/icelake_from_valid_utf8.inl.cpp */ -/* begin file src/icelake/icelake_utf8_validation.inl.cpp */ -// file included directly +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} -simdutf_really_inline __m512i check_special_cases(__m512i input, - const __m512i prev1) { - __m512i mask1 = _mm512_setr_epi64(0x0202020202020202, 0x4915012180808080, - 0x0202020202020202, 0x4915012180808080, - 0x0202020202020202, 0x4915012180808080, - 0x0202020202020202, 0x4915012180808080); - const __m512i v_0f = _mm512_set1_epi8(0x0f); - __m512i index1 = _mm512_and_si512(_mm512_srli_epi16(prev1, 4), v_0f); +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} - __m512i byte_1_high = _mm512_shuffle_epi8(mask1, index1); - __m512i mask2 = _mm512_setr_epi64(0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, - 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, - 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, - 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb); - __m512i index2 = _mm512_and_si512(prev1, v_0f); +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; +} - __m512i byte_1_low = _mm512_shuffle_epi8(mask2, index2); - __m512i mask3 = - _mm512_setr_epi64(0x101010101010101, 0x1010101babaaee6, 0x101010101010101, - 0x1010101babaaee6, 0x101010101010101, 0x1010101babaaee6, - 0x101010101010101, 0x1010101babaaee6); - __m512i index3 = _mm512_and_si512(_mm512_srli_epi16(input, 4), v_0f); - __m512i byte_2_high = _mm512_shuffle_epi8(mask3, index3); - return _mm512_ternarylogic_epi64(byte_1_high, byte_1_low, byte_2_high, 128); +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; } -simdutf_really_inline __m512i check_multibyte_lengths(const __m512i input, - const __m512i prev_input, - const __m512i sc) { - __m512i prev2 = prev<2>(input, prev_input); - __m512i prev3 = prev<3>(input, prev_input); - __m512i is_third_byte = _mm512_subs_epu8( - prev2, _mm512_set1_epi8(0b11100000u - 1)); // Only 111_____ will be > 0 - __m512i is_fourth_byte = _mm512_subs_epu8( - prev3, _mm512_set1_epi8(0b11110000u - 1)); // Only 1111____ will be > 0 - __m512i is_third_or_fourth_byte = - _mm512_or_si512(is_third_byte, is_fourth_byte); - const __m512i v_7f = _mm512_set1_epi8(char(0x7f)); - is_third_or_fourth_byte = _mm512_adds_epu8(v_7f, is_third_or_fourth_byte); - // We want to compute (is_third_or_fourth_byte AND v80) XOR sc. - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - return _mm512_ternarylogic_epi32(is_third_or_fourth_byte, v_80, sc, - 0b1101010); - //__m512i is_third_or_fourth_byte_mask = - //_mm512_and_si512(is_third_or_fourth_byte, v_80); return - // _mm512_xor_si512(is_third_or_fourth_byte_mask, sc); +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; } -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline __m512i is_incomplete(const __m512i input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - __m512i max_value = _mm512_setr_epi64(0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xbfdfefffffffffff); - return _mm512_subs_epu8(input, max_value); + +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; } -struct avx512_utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - __m512i error{}; +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} - // The last input we received - __m512i prev_input_block{}; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - __m512i prev_incomplete{}; +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_validation { - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const __m512i input, - const __m512i prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - __m512i prev1 = prev<1>(input, prev_input); - __m512i sc = check_special_cases(input, prev1); - this->error = _mm512_or_si512( - check_multibyte_lengths(input, prev_input, sc), this->error); - } +using namespace simd; - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error = _mm512_or_si512(this->error, this->prev_incomplete); - } +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - // returns true if ASCII. - simdutf_really_inline bool check_next_input(const __m512i input) { - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80); - if (ascii == 0) { - this->error = _mm512_or_si512(this->error, this->prev_incomplete); - return true; - } else { - this->check_utf8_bytes(input, this->prev_input_block); - this->prev_incomplete = is_incomplete(input); - this->prev_input_block = input; - return false; - } - } - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return _mm512_test_epi8_mask(this->error, this->error) != 0; - } -}; // struct avx512_utf8_checker -/* end file src/icelake/icelake_utf8_validation.inl.cpp */ -/* begin file src/icelake/icelake_from_utf8.inl.cpp */ -// file included directly + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, -// File contains conversion procedure from possibly invalid UTF-8 strings. + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, -/** - * Attempts to convert up to len 1-byte code units from in (in UTF-8 format) to - * out. - * Returns the position of the input and output after the processing is - * completed. Upon error, the output is set to null. - */ + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, -template -utf8_to_utf16_result -fast_avx512_convert_utf8_to_utf16(const char *in, size_t len, char16_t *out) { - const char *const final_in = in + len; - bool result = true; - while (result) { - if (final_in - in >= 64) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else if (in < final_in) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else { - break; - } - } - if (!result) { - out = nullptr; - } - return std::make_pair(in, out); -} + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, -template -simdutf::result fast_avx512_convert_utf8_to_utf16_with_errors(const char *in, - size_t len, - char16_t *out) { - const char *const init_in = in; - const char16_t *const init_out = out; - const char *const final_in = in + len; - bool result = true; - while (result) { - if (final_in - in >= 64) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else if (in < final_in) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else { - break; - } - } - if (!result) { - size_t pos = size_t(in - init_in); - if (pos < len && (init_in[pos] & 0xc0) == 0x80 && pos >= 64) { - // We must check whether we are the fourth continuation byte - bool c1 = (init_in[pos - 1] & 0xc0) == 0x80; - bool c2 = (init_in[pos - 2] & 0xc0) == 0x80; - bool c3 = (init_in[pos - 3] & 0xc0) == 0x80; - if (c1 && c2 && c3) { - return {simdutf::TOO_LONG, pos}; - } - } - // rewind_and_convert_with_errors will seek a potential error from in - // onward, with the ability to go back up to in - init_in bytes, and read - // final_in - in bytes forward. - simdutf::result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - in - init_in, in, final_in - in, out); - res.count += (in - init_in); - return res; - } else { - return simdutf::result(error_code::SUCCESS, out - init_out); - } + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; } -template -// todo: replace with the utf-8 to utf-16 routine adapted to utf-32. This code -// is legacy. -std::pair -validating_utf8_to_fixed_length(const char *str, size_t len, OUTPUT *dwords) { - constexpr bool UTF32 = std::is_same::value; - constexpr bool UTF16 = std::is_same::value; - static_assert( - UTF32 or UTF16, - "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); - static_assert(!(UTF32 and big_endian), - "we do not currently support big-endian UTF-32"); - - const char *ptr = str; - const char *end = ptr + len; - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - OUTPUT *output = dwords; - avx512_utf8_checker checker{}; - /** - * In the main loop, we consume 64 bytes per iteration, - * but we access 64 + 4 bytes. - * We use masked writes to avoid overruns, see - * https://github.com/simdutf/simdutf/issues/471 - */ - while (end - ptr >= 64 + 4) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - if (checker.check_next_input(utf8)) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - continue; - } - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } - const __m512i lane3 = broadcast_epi128<3>(utf8); - int valid_count2; - __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); - uint32_t tmp1; - ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); - const __m512i lane4 = _mm512_set1_epi32(tmp1); - int valid_count3; - __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); - if (valid_count2 + valid_count3 <= 16) { - vec2 = _mm512_mask_expand_epi32( - vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); - valid_count2 += valid_count3; - vec2 = expand_utf8_to_utf32(vec2); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - } else { - vec2 = expand_utf8_to_utf32(vec2); - vec3 = expand_utf8_to_utf32(vec3); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) - } - ptr += 4 * 16; - } - const char *validatedptr = ptr; // validated up to ptr - - // For the final pass, we validate 64 bytes, but we only transcode - // 3*16 bytes, so we may end up double-validating 16 bytes. - if (end - ptr >= 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - if (checker.check_next_input(utf8)) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - } else { - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); +} - const __m512i lane3 = broadcast_epi128<3>(utf8); - SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; - ptr += 3 * 16; - } - validatedptr += 4 * 16; - } - if (end != validatedptr) { - const __m512i utf8 = - _mm512_maskz_loadu_epi8(~UINT64_C(0) >> (64 - (end - validatedptr)), - (const __m512i *)validatedptr); - checker.check_next_input(utf8); - } - checker.check_eof(); - if (checker.errors()) { - return {ptr, nullptr}; // We found an error. + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); } - return {ptr, output}; -} - -// Like validating_utf8_to_fixed_length but returns as soon as an error is -// identified todo: replace with the utf-8 to utf-16 routine adapted to utf-32. -// This code is legacy. -template -std::tuple -validating_utf8_to_fixed_length_with_constant_checks(const char *str, - size_t len, - OUTPUT *dwords) { - constexpr bool UTF32 = std::is_same::value; - constexpr bool UTF16 = std::is_same::value; - static_assert( - UTF32 or UTF16, - "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); - static_assert(!(UTF32 and big_endian), - "we do not currently support big-endian UTF-32"); - const char *ptr = str; - const char *end = ptr + len; - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - OUTPUT *output = dwords; - avx512_utf8_checker checker{}; - /** - * In the main loop, we consume 64 bytes per iteration, - * but we access 64 + 4 bytes. - */ - while (end - ptr >= 4 + 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - bool ascii = checker.check_next_input(utf8); - if (checker.errors()) { - return {ptr, output, false}; // We found an error. - } - if (ascii) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - continue; - } - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } - const __m512i lane3 = broadcast_epi128<3>(utf8); - int valid_count2; - __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); - uint32_t tmp1; - ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); - const __m512i lane4 = _mm512_set1_epi32(tmp1); - int valid_count3; - __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); - if (valid_count2 + valid_count3 <= 16) { - vec2 = _mm512_mask_expand_epi32( - vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); - valid_count2 += valid_count3; - vec2 = expand_utf8_to_utf32(vec2); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - } else { - vec2 = expand_utf8_to_utf32(vec2); - vec3 = expand_utf8_to_utf32(vec3); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) - } - ptr += 4 * 16; + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; } - const char *validatedptr = ptr; // validated up to ptr - // For the final pass, we validate 64 bytes, but we only transcode - // 3*16 bytes, so we may end up double-validating 16 bytes. - if (end - ptr >= 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - bool ascii = checker.check_next_input(utf8); - if (checker.errors()) { - return {ptr, output, false}; // We found an error. - } - if (ascii) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; } else { - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } - - const __m512i lane3 = broadcast_epi128<3>(utf8); - SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) - - ptr += 3 * 16; + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; } - validatedptr += 4 * 16; - } - if (end != validatedptr) { - const __m512i utf8 = - _mm512_maskz_loadu_epi8(~UINT64_C(0) >> (64 - (end - validatedptr)), - (const __m512i *)validatedptr); - checker.check_next_input(utf8); - } - checker.check_eof(); - if (checker.errors()) { - return {ptr, output, false}; // We found an error. } - return {ptr, output, true}; -} -/* end file src/icelake/icelake_from_utf8.inl.cpp */ -/* begin file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */ -// file included directly - -// File contains conversion procedure from possibly invalid UTF-8 strings. -template -simdutf_really_inline size_t process_block_from_utf8_to_latin1( - const char *buf, size_t len, char *latin_output, __m512i minus64, - __m512i one, __mmask64 *next_leading_ptr, __mmask64 *next_bit6_ptr) { - __mmask64 load_mask = - is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL; - __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf); - __mmask64 nonascii = _mm512_movepi8_mask(input); - if (nonascii == 0) { - if (*next_leading_ptr) { // If we ended with a leading byte, it is an error. - return 0; // Indicates error - } - is_remaining - ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input) - : _mm512_storeu_si512((__m512i *)latin_output, input); - return len; + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); } - const __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64); - - __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62)); - __mmask64 invalid_leading_bytes = - _mm512_mask_cmpgt_epu8_mask(leading, highbits, one); +}; // struct utf8_checker +} // namespace utf8_validation - if (invalid_leading_bytes) { - return 0; // Indicates error - } +using utf8_validation::utf8_checker; - __mmask64 leading_shift = (leading << 1) | *next_leading_ptr; +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_validation { - if ((nonascii ^ leading) != leading_shift) { - return 0; // Indicates error +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); +} - const __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one); - input = - _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64); +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); +} - __mmask64 retain = ~leading & load_mask; - __m512i output = _mm512_maskz_compress_epi8(retain, input); - int64_t written_out = count_ones(retain); - if (written_out == 0) { - return 0; // Indicates error +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; + } + reader.advance(); + count += 64; } - *next_bit6_ptr = bit6 >> 63; - *next_leading_ptr = leading >> 63; - - __mmask64 store_mask = ~UINT64_C(0) >> (64 - written_out); + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); + } +} - _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output); +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); +} - return written_out; +template +bool generic_validate_ascii(const uint8_t *input, size_t length) { + buf_block_reader<64> reader(input, length); + uint8_t blocks[64]{}; + simd::simd8x64 running_or(blocks); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + running_or |= in; + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + running_or |= in; + return running_or.is_ascii(); } -size_t utf8_to_latin1_avx512(const char *&inbuf, size_t len, - char *&inlatin_output) { - const char *buf = inbuf; - char *latin_output = inlatin_output; - char *start = latin_output; - size_t pos = 0; - __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000 - __m512i one = _mm512_set1_epi8(1); - __mmask64 next_leading = 0; - __mmask64 next_bit6 = 0; +bool generic_validate_ascii(const char *input, size_t length) { + return generic_validate_ascii( + reinterpret_cast(input), length); +} - while (pos + 64 <= len) { - size_t written = process_block_from_utf8_to_latin1( - buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6); - if (written == 0) { - inlatin_output = latin_output; - inbuf = buf + pos - next_leading; - return 0; // Indicates error at pos or after, or just before pos (too - // short error) +template +result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); } - latin_output += written; - pos += 64; - } + reader.advance(); - if (pos < len) { - size_t remaining = len - pos; - size_t written = process_block_from_utf8_to_latin1( - buf + pos, remaining, latin_output, minus64, one, &next_leading, - &next_bit6); - if (written == 0) { - inbuf = buf + pos - next_leading; - inlatin_output = latin_output; - return 0; // Indicates error at pos or after, or just before pos (too - // short error) - } - latin_output += written; + count += 64; } - if (next_leading) { - inbuf = buf + len - next_leading; - inlatin_output = latin_output; - return 0; // Indicates error at end of buffer + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); } - inlatin_output = latin_output; - inbuf += len; - return size_t(latin_output - start); } -/* end file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */ -/* begin file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */ -// file included directly -// File contains conversion procedure from valid UTF-8 strings. +result generic_validate_ascii_with_errors(const char *input, size_t length) { + return generic_validate_ascii_with_errors( + reinterpret_cast(input), length); +} -template -simdutf_really_inline size_t process_valid_block_from_utf8_to_latin1( - const char *buf, size_t len, char *latin_output, __m512i minus64, - __m512i one, __mmask64 *next_leading_ptr, __mmask64 *next_bit6_ptr) { - __mmask64 load_mask = - is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL; - __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf); - __mmask64 nonascii = _mm512_movepi8_mask(input); +} // namespace utf8_validation +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ +// transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ - if (nonascii == 0) { - is_remaining - ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input) - : _mm512_storeu_si512((__m512i *)latin_output, input); - return len; - } +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_utf16 { +using namespace simd; - __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64); +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, - __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62)); + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, - *next_leading_ptr = leading >> 63; + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, - __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one); - input = - _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64); - *next_bit6_ptr = bit6 >> 63; + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - __mmask64 retain = ~leading & load_mask; - __m512i output = _mm512_maskz_compress_epi8(retain, input); - int64_t written_out = count_ones(retain); - if (written_out == 0) { - return 0; // Indicates error - } - __mmask64 store_mask = ~UINT64_C(0) >> (64 - written_out); - // Optimization opportunity: sometimes, masked writes are not needed. - _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output); - return written_out; + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; } -size_t valid_utf8_to_latin1_avx512(const char *buf, size_t len, - char *latin_output) { - char *start = latin_output; - size_t pos = 0; - __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000 - __m512i one = _mm512_set1_epi8(1); - __mmask64 next_leading = 0; - __mmask64 next_bit6 = 0; - - while (pos + 64 <= len) { - size_t written = process_valid_block_from_utf8_to_latin1( - buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6); - latin_output += written; - pos += 64; - } - - if (pos < len) { - size_t remaining = len - pos; - size_t written = process_valid_block_from_utf8_to_latin1( - buf + pos, remaining, latin_output, minus64, one, &next_leading, - &next_bit6); - latin_output += written; - } +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; - return (size_t)(latin_output - start); -} -/* end file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */ -/* begin file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ -// file included directly -template -size_t icelake_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - __m512i v_0xFF = _mm512_set1_epi16(0xff); - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - __m512i shufmask = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, - 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); - while (end - buf >= 32) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); - } - if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { - return 0; - } - _mm256_storeu_si256( - (__m256i *)latin1_output, - _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); - latin1_output += 32; - buf += 32; - } - if (buf < end) { - uint32_t mask(uint32_t(1 << (end - buf)) - 1); - __m512i in = _mm512_maskz_loadu_epi16(mask, buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); - } - if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { - return 0; - } - _mm256_mask_storeu_epi8( - latin1_output, mask, - _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); } - return len; -} -template -std::pair -icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - const char16_t *start = buf; - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - __m512i v_0xFF = _mm512_set1_epi16(0xff); - __m512i shufmask = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, - 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); - while (end - buf >= 32) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); } - if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { - uint16_t word; - while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf)) - : uint16_t(*buf))) <= 0xff) { - *latin1_output++ = uint8_t(word); - buf++; + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. } - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - latin1_output); } - _mm256_storeu_si256( - (__m256i *)latin1_output, - _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); - latin1_output += 32; - buf += 32; - } - if (buf < end) { - uint32_t mask(uint32_t(1 << (end - buf)) - 1); - __m512i in = _mm512_maskz_loadu_epi16(mask, buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); + if (errors()) { + return 0; } - if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { - - uint16_t word; - while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf)) - : uint16_t(*buf))) <= 0xff) { - *latin1_output++ = uint8_t(word); - buf++; + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; } - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - latin1_output); + utf16_output += howmany; } - _mm256_mask_storeu_epi8( - latin1_output, mask, - _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); + return utf16_output - start; } - return std::make_pair(result(error_code::SUCCESS, len), latin1_output); -} -/* end file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ -/* begin file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ -// file included directly - -/** - * This function converts the input (inbuf, inlen), assumed to be valid - * UTF16 (little endian) into UTF-8 (to outbuf). The number of code units - * written is written to 'outlen' and the function reports the number of input - * word consumed. - */ -template -size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen, - unsigned char *outbuf, size_t *outlen) { - __m512i in; - __mmask32 inmask = _cvtu32_mask32(0x7fffffff); - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - const char16_t *const inbuf_orig = inbuf; - const unsigned char *const outbuf_orig = outbuf; - int adjust = 0; - int carry = 0; - while (inlen >= 32) { - in = _mm512_loadu_si512(inbuf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); } - inlen -= 31; - lastiteration: - inbuf += 31; - - failiteration: - const __mmask32 is234byte = _mm512_mask_cmp_epu16_mask( - inmask, in, _mm512_set1_epi16(0x0080), _MM_CMPINT_NLT); - - if (_ktestz_mask32_u8(inmask, is234byte)) { - // fast path for ASCII only - _mm512_mask_cvtepi16_storeu_epi8(outbuf, inmask, in); - outbuf += 31; - carry = 0; - - if (inlen < 32) { - goto tail; + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; } else { - continue; + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. } } - - const __mmask32 is12byte = - _mm512_cmp_epu16_mask(in, _mm512_set1_epi16(0x0800), _MM_CMPINT_LT); - - if (_ktestc_mask32_u8(is12byte, inmask)) { - // fast path for 1 and 2 byte only - - const __m512i twobytes = _mm512_ternarylogic_epi32( - _mm512_slli_epi16(in, 8), _mm512_srli_epi16(in, 6), - _mm512_set1_epi16(0x3f3f), 0xa8); // (A|B)&C - in = _mm512_mask_add_epi16(in, is234byte, twobytes, - _mm512_set1_epi16(int16_t(0x80c0))); - const __m512i cmpmask = - _mm512_mask_blend_epi16(inmask, _mm512_set1_epi16(int16_t(0xffff)), - _mm512_set1_epi16(0x0800)); - const __mmask64 smoosh = - _mm512_cmp_epu8_mask(in, cmpmask, _MM_CMPINT_NLT); - const __m512i out = _mm512_maskz_compress_epi8(smoosh, in); - _mm512_mask_storeu_epi8(outbuf, - _cvtu64_mask64(_pext_u64(_cvtmask64_u64(smoosh), - _cvtmask64_u64(smoosh))), - out); - outbuf += 31 + _mm_popcnt_u32(_cvtmask32_u32(is234byte)); - carry = 0; - - if (inlen < 32) { - goto tail; - } else { - continue; + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; } } - __m512i lo = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)); - __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1)); - - __m512i taglo = _mm512_set1_epi32(0x8080e000); - __m512i taghi = taglo; - - const __m512i fc00masked = - _mm512_and_epi32(in, _mm512_set1_epi16(int16_t(0xfc00))); - const __mmask32 hisurr = _mm512_mask_cmp_epu16_mask( - inmask, fc00masked, _mm512_set1_epi16(int16_t(0xd800)), _MM_CMPINT_EQ); - const __mmask32 losurr = _mm512_cmp_epu16_mask( - fc00masked, _mm512_set1_epi16(int16_t(0xdc00)), _MM_CMPINT_EQ); - - int carryout = 0; - if (!_kortestz_mask32_u8(hisurr, losurr)) { - // handle surrogates + return result(error_code::SUCCESS, utf16_output - start); + } - __m512i los = _mm512_alignr_epi32(hi, lo, 1); - __m512i his = _mm512_alignr_epi32(lo, hi, 1); + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } - const __mmask32 hisurrhi = _kshiftri_mask32(hisurr, 16); - taglo = _mm512_mask_mov_epi32(taglo, __mmask16(hisurr), - _mm512_set1_epi32(0x808080f0)); - taghi = _mm512_mask_mov_epi32(taghi, __mmask16(hisurrhi), - _mm512_set1_epi32(0x808080f0)); +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ - lo = _mm512_mask_slli_epi32(lo, __mmask16(hisurr), lo, 10); - hi = _mm512_mask_slli_epi32(hi, __mmask16(hisurrhi), hi, 10); - los = _mm512_add_epi32(los, _mm512_set1_epi32(0xfca02400)); - his = _mm512_add_epi32(his, _mm512_set1_epi32(0xfca02400)); - lo = _mm512_mask_add_epi32(lo, __mmask16(hisurr), lo, los); - hi = _mm512_mask_add_epi32(hi, __mmask16(hisurrhi), hi, his); +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_utf16 { - carryout = _cvtu32_mask32(_kshiftri_mask32(hisurr, 30)); +using namespace simd; - const uint32_t h = _cvtmask32_u32(hisurr); - const uint32_t l = _cvtmask32_u32(losurr); - // check for mismatched surrogates - if ((h + h + carry) ^ l) { - const uint32_t lonohi = l & ~(h + h + carry); - const uint32_t hinolo = h & ~(l >> 1); - inlen = _tzcnt_u32(hinolo | lonohi); - inmask = __mmask32(0x7fffffff & ((1U << inlen) - 1)); - in = _mm512_maskz_mov_epi16(inmask, in); - adjust = (int)inlen - 31; - inlen = 0; - goto failiteration; +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. } + } + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; +} - hi = _mm512_maskz_mov_epi32(_cvtu32_mask16(0x7fff), hi); - carry = carryout; - - __m512i mslo = - _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), lo); - - __m512i mshi = - _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), hi); +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +// transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ - const __mmask32 outmask = __mmask32(_kandn_mask64(losurr, inmask)); - const __mmask64 outmhi = _kshiftri_mask64(outmask, 16); +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_utf32 { +using namespace simd; - const __mmask32 is1byte = __mmask32(_knot_mask64(is234byte)); - const __mmask64 is1bhi = _kshiftri_mask64(is1byte, 16); - const __mmask64 is12bhi = _kshiftri_mask64(is12byte, 16); +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - taglo = _mm512_mask_mov_epi32(taglo, __mmask16(is12byte), - _mm512_set1_epi32(0x80c00000)); - taghi = _mm512_mask_mov_epi32(taghi, __mmask16(is12bhi), - _mm512_set1_epi32(0x80c00000)); - __m512i magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), - _mm512_set1_epi32(0xffffffff), - _mm512_set1_epi32(0x00010101)); - __m512i magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), - _mm512_set1_epi32(0xffffffff), - _mm512_set1_epi32(0x00010101)); + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, - magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), - _mm512_set1_epi32(0xffffffff), - _mm512_set1_epi32(0x00010101)); - magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), - _mm512_set1_epi32(0xffffffff), - _mm512_set1_epi32(0x00010101)); + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, - mslo = _mm512_ternarylogic_epi32(mslo, _mm512_set1_epi32(0x3f3f3f3f), taglo, - 0xea); // A&B|C - mshi = _mm512_ternarylogic_epi32(mshi, _mm512_set1_epi32(0x3f3f3f3f), taghi, - 0xea); - mslo = _mm512_mask_slli_epi32(mslo, __mmask16(is1byte), lo, 24); + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, - mshi = _mm512_mask_slli_epi32(mshi, __mmask16(is1bhi), hi, 24); + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - const __mmask64 wantlo = - _mm512_cmp_epu8_mask(mslo, magiclo, _MM_CMPINT_NLT); - const __mmask64 wanthi = - _mm512_cmp_epu8_mask(mshi, magichi, _MM_CMPINT_NLT); - const __m512i outlo = _mm512_maskz_compress_epi8(wantlo, mslo); - const __m512i outhi = _mm512_maskz_compress_epi8(wanthi, mshi); - const uint64_t wantlo_uint64 = _cvtmask64_u64(wantlo); - const uint64_t wanthi_uint64 = _cvtmask64_u64(wanthi); + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} - uint64_t advlo = _mm_popcnt_u64(wantlo_uint64); - uint64_t advhi = _mm_popcnt_u64(wanthi_uint64); +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; - _mm512_mask_storeu_epi8( - outbuf, _cvtu64_mask64(_pext_u64(wantlo_uint64, wantlo_uint64)), outlo); - _mm512_mask_storeu_epi8( - outbuf + advlo, _cvtu64_mask64(_pext_u64(wanthi_uint64, wanthi_uint64)), - outhi); - outbuf += advlo + advhi; + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); } - outbuf += -adjust; -tail: - if (inlen != 0) { - // We must have inlen < 31. - inmask = _cvtu32_mask32((1U << inlen) - 1); - in = _mm512_maskz_loadu_epi16(inmask, inbuf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); } - adjust = (int)inlen - 31; - inlen = 0; - goto lastiteration; + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; + } + return utf32_output - start; } - *outlen = (outbuf - outbuf_orig) + adjust; - return ((inbuf - inbuf_orig) + adjust); -} -/* end file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ -/* begin file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ -// file included directly -/* - Returns a pair: the first unprocessed byte from buf and utf32_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::tuple -convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const char16_t *end = buf + len; - const __m512i v_fc00 = _mm512_set1_epi16((uint16_t)0xfc00); - const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800); - const __m512i v_dc00 = _mm512_set1_epi16((uint16_t)0xdc00); - __mmask32 carry{0}; - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - while (std::distance(buf, end) >= 32) { - // Always safe because buf + 32 <= end so that end - buf >= 32 bytes: - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); } - - // H - bitmask for high surrogates - const __mmask32 H = - _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_d800); - // H - bitmask for low surrogates - const __mmask32 L = - _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_dc00); - - if ((H | L)) { - // surrogate pair(s) in a register - const __mmask32 V = - (L ^ - (carry | (H << 1))); // A high surrogate must be followed by low one - // and a low one must be preceded by a high one. - // If valid, V should be equal to 0 - - if (V == 0) { - // valid case - /* - Input surrogate pair: - |1101.11aa.aaaa.aaaa|1101.10bb.bbbb.bbbb| - low surrogate high surrogate - */ - /* 1. Expand all code units to 32-bit code units - in - |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb| - */ - const __m512i first = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)); - const __m512i second = - _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1)); - - /* 2. Shift by one 16-bit word to align low surrogates with high - surrogates in - |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb| - shifted - |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa| - */ - const __m512i shifted_first = _mm512_alignr_epi32(second, first, 1); - const __m512i shifted_second = - _mm512_alignr_epi32(_mm512_setzero_si512(), second, 1); - - /* 3. Align all high surrogates in first and second by shifting to the - left by 10 bits - |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000| - */ - const __m512i aligned_first = - _mm512_mask_slli_epi32(first, (__mmask16)H, first, 10); - const __m512i aligned_second = - _mm512_mask_slli_epi32(second, (__mmask16)(H >> 16), second, 10); - - /* 4. Remove surrogate prefixes and add offset 0x10000 by adding in, - shifted and constant in - |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000| - shifted - |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa| - constant|1111.1100.1010.0000.0010.0100.0000.0000|1111.1100.1010.0000.0010.0100.0000.0000| - */ - const __m512i constant = _mm512_set1_epi32((uint32_t)0xfca02400); - const __m512i added_first = _mm512_mask_add_epi32( - aligned_first, (__mmask16)H, aligned_first, shifted_first); - const __m512i utf32_first = _mm512_mask_add_epi32( - added_first, (__mmask16)H, added_first, constant); - - const __m512i added_second = - _mm512_mask_add_epi32(aligned_second, (__mmask16)(H >> 16), - aligned_second, shifted_second); - const __m512i utf32_second = _mm512_mask_add_epi32( - added_second, (__mmask16)(H >> 16), added_second, constant); - - // 5. Store all valid UTF-32 code units (low surrogate positions and - // 32nd word are invalid) - const __mmask32 valid = ~L & 0x7fffffff; - // We deliberately do a _mm512_maskz_compress_epi32 followed by - // storeu_epi32 to ease performance portability to Zen 4. - const __m512i compressed_first = - _mm512_maskz_compress_epi32((__mmask16)(valid), utf32_first); - const size_t howmany1 = count_ones((uint16_t)(valid)); - _mm512_storeu_si512((__m512i *)utf32_output, compressed_first); - utf32_output += howmany1; - const __m512i compressed_second = - _mm512_maskz_compress_epi32((__mmask16)(valid >> 16), utf32_second); - const size_t howmany2 = count_ones((uint16_t)(valid >> 16)); - // The following could be unsafe in some cases? - //_mm512_storeu_epi32((__m512i *) utf32_output, compressed_second); - _mm512_mask_storeu_epi32((__m512i *)utf32_output, - __mmask16((1 << howmany2) - 1), - compressed_second); - utf32_output += howmany2; - // Only process 31 code units, but keep track if the 31st word is a high - // surrogate as a carry - buf += 31; - carry = (H >> 30) & 0x1; + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; } else { - // invalid case - return std::make_tuple(buf + carry, utf32_output, false); + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. } - } else { - // no surrogates - // extend all thirty-two 16-bit code units to thirty-two 32-bit code units - _mm512_storeu_si512((__m512i *)(utf32_output), - _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in))); - _mm512_storeu_si512( - (__m512i *)(utf32_output) + 1, - _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1))); - utf32_output += 32; - buf += 32; - carry = 0; - } - } // while - return std::make_tuple(buf + carry, utf32_output, true); -} -/* end file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ -/* begin file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ -// file included directly -size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *end = buf + len; - __m512i v_0xFF = _mm512_set1_epi32(0xff); - __m512i shufmask = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, - 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0); - while (end - buf >= 16) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { - return 0; - } - _mm_storeu_si128( - (__m128i *)latin1_output, - _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); - latin1_output += 16; - buf += 16; - } - if (buf < end) { - uint16_t mask = uint16_t((1 << (end - buf)) - 1); - __m512i in = _mm512_maskz_loadu_epi32(mask, buf); - if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { - return 0; } - _mm_mask_storeu_epi8( - latin1_output, mask, - _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); - } - return len; -} - -std::pair -icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *end = buf + len; - const char32_t *start = buf; - __m512i v_0xFF = _mm512_set1_epi32(0xff); - __m512i shufmask = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, - 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0); - while (end - buf >= 16) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { - while (uint32_t(*buf) <= 0xff) { - *latin1_output++ = uint8_t(*buf++); - } - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - latin1_output); + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; } - _mm_storeu_si128( - (__m128i *)latin1_output, - _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); - latin1_output += 16; - buf += 16; - } - if (buf < end) { - uint16_t mask = uint16_t((1 << (end - buf)) - 1); - __m512i in = _mm512_maskz_loadu_epi32(mask, buf); - if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { - while (uint32_t(*buf) <= 0xff) { - *latin1_output++ = uint8_t(*buf++); + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; } - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - latin1_output); } - _mm_mask_storeu_epi8( - latin1_output, mask, - _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); + return result(error_code::SUCCESS, utf32_output - start); } - return std::make_pair(result(error_code::SUCCESS, len), latin1_output); -} -/* end file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ -/* begin file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ -// file included directly -// Todo: currently, this is just the haswell code, optimize for icelake kernel. -std::pair -avx512_convert_utf32_to_utf8(const char32_t *buf, size_t len, - char *utf8_output) { - const char32_t *end = buf + len; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); - const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); - const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); - const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); - __m256i running_max = _mm256_setzero_si256(); - __m256i forbidden_bytemask = _mm256_setzero_si256(); - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); - running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin); +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), - _mm256_and_si256(nextin, v_7fffffff)); - in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8_to_utf32 { - // Try to apply UTF-16 => UTF-8 routine on 256 bits - // (haswell/avx2_convert_utf16_to_utf8.cpp) +using namespace simd; - if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } } - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in_16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in_16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; +} - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +// other functions +/* begin file src/generic/utf16.h */ +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf16 { - // 6. adjust pointers - buf += 16; - continue; +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); } - // Must check for overflow in packing - const __m256i saturation_bytemask = _mm256_cmpeq_epi32( - _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); - forbidden_bytemask = _mm256_or_si256( - forbidden_bytemask, - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800)); - - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. + uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); + count += count_ones(not_pair) / 2; + } + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. +template +simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t ascii_mask = input.lteq(0x7F); + uint64_t twobyte_mask = input.lteq(0x7FF); + uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + size_t ascii_count = count_ones(ascii_mask) / 2; + size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; + size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; + size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; + count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + + ascii_count; + } + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); -#undef simdutf_vec +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} - // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); +} // namespace utf16 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf16.h */ +/* begin file src/generic/utf8.h */ - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); +namespace simdutf { +namespace ppc64 { +namespace { +namespace utf8 { - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; - } else { - // case: at least one 32-bit word is larger than 0xFFFF <=> it will - // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD may require - // large, non-trivial tables? - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { // 2-byte - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { // 3-byte - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, utf8_output); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { // 4-byte - if (word > 0x10FFFF) { - return std::make_pair(nullptr, utf8_output); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while +using namespace simd; - // check for invalid input - const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); - if (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi32( - _mm256_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffffffff) { - return std::make_pair(nullptr, utf8_output); +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); } + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(nullptr, utf8_output); +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); } - - return std::make_pair(buf, utf8_output); + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); } +} // namespace utf8 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdutf +/* end file src/generic/utf8.h */ -// Todo: currently, this is just the haswell code, optimize for icelake kernel. -std::pair -avx512_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_output) { - const char32_t *end = buf + len; - const char32_t *start = buf; - - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); - const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); - const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); - const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); - const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 +// +// Implementation-specific overrides +// +namespace simdutf { +namespace ppc64 { - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); - // Check for too large input - const __m256i max_input = - _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff); - if (static_cast(_mm256_movemask_epi8( - _mm256_cmpeq_epi32(max_input, v_10ffff))) != 0xffffffff) { - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - utf8_output); +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; + } + // todo: reimplement as a one-pass algorithm. + int out = 0; + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16(reinterpret_cast(input), length / 2)) { + out |= encoding_type::UTF16_LE; } - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), - _mm256_and_si256(nextin, v_7fffffff)); - in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); - - // Try to apply UTF-16 => UTF-8 routine on 256 bits - // (haswell/avx2_convert_utf16_to_utf8.cpp) - - if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; } - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + } - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in_16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in_16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); + return out; +} - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return ppc64::utf8_validation::generic_validate_utf8(buf, len); +} - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return ppc64::utf8_validation::generic_validate_utf8_with_errors(buf, len); +} - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return ppc64::utf8_validation::generic_validate_ascii(buf, len); +} - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return ppc64::utf8_validation::generic_validate_ascii_with_errors(buf, len); +} - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + return scalar::utf16::validate(buf, len); +} - // 6. adjust pointers - buf += 16; - continue; - } - // Must check for overflow in packing - const __m256i saturation_bytemask = _mm256_cmpeq_epi32( - _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + return scalar::utf16::validate(buf, len); +} - // Check for illegal surrogate code units - const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); - const __m256i forbidden_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800); - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != - 0x0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - utf8_output); - } +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + return scalar::utf16::validate_with_errors(buf, len); +} - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + return scalar::utf16::validate_with_errors(buf, len); +} - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + return scalar::utf32::validate_with_errors(buf, len); +} - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. +simdutf_warn_unused bool +implementation::validate_utf32(const char16_t *buf, size_t len) const noexcept { + return scalar::utf32::validate(buf, len); +} - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char * /*buf*/, size_t /*len*/, + char16_t * /*utf16_output*/) const noexcept { + return 0; // stub +} - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char * /*buf*/, size_t /*len*/, + char16_t * /*utf16_output*/) const noexcept { + return 0; // stub +} - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char * /*buf*/, size_t /*len*/, + char16_t * /*utf16_output*/) const noexcept { + return result(error_code::OTHER, 0); // stub +} - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); -#undef simdutf_vec +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char * /*buf*/, size_t /*len*/, + char16_t * /*utf16_output*/) const noexcept { + return result(error_code::OTHER, 0); // stub +} - // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char * /*buf*/, size_t /*len*/, + char16_t * /*utf16_output*/) const noexcept { + return 0; // stub +} - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char * /*buf*/, size_t /*len*/, + char16_t * /*utf16_output*/) const noexcept { + return 0; // stub +} - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char * /*buf*/, size_t /*len*/, + char32_t * /*utf16_output*/) const noexcept { + return 0; // stub +} - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char * /*buf*/, size_t /*len*/, + char32_t * /*utf16_output*/) const noexcept { + return result(error_code::OTHER, 0); // stub +} - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char * /*buf*/, size_t /*len*/, + char32_t * /*utf16_output*/) const noexcept { + return 0; // stub +} - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; - } else { - // case: at least one 32-bit word is larger than 0xFFFF <=> it will - // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD may require - // large, non-trivial tables? - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { // 2-byte - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { // 3-byte - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf8_output); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { // 4-byte - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf8_output); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert(buf, len, + utf8_output); +} - return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert(buf, len, utf8_output); } -/* end file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ -/* begin file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ -// file included directly -// Todo: currently, this is just the haswell code, optimize for icelake kernel. -template -std::pair -avx512_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const char32_t *end = buf + len; +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_with_errors( + buf, len, utf8_output); +} - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - __m256i forbidden_bytemask = _mm256_setzero_si256(); +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_with_errors( + buf, len, utf8_output); +} - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_valid(buf, len, + utf8_output); +} - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf16_to_utf8::convert_valid(buf, len, + utf8_output); +} - // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __m256i saturation_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf32_to_utf8::convert(buf, len, utf8_output); +} - if (saturation_bitmask == 0xffffffff) { - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); - forbidden_bytemask = _mm256_or_si256( - forbidden_bytemask, - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800)); +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output); +} - __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), - _mm256_extractf128_si256(in, 1)); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); - } - _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; - } else { - size_t forward = 7; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, utf16_output); - } - *utf16_output++ = - big_endian - ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(nullptr, utf16_output); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (big_endian) { - high_surrogate = - uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); - low_surrogate = - uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output); +} - // check for invalid input - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(nullptr, utf16_output); - } +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert(buf, len, + utf16_output); +} - return std::make_pair(buf, utf16_output); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert(buf, len, + utf16_output); } -// Todo: currently, this is just the haswell code, optimize for icelake kernel. -template -std::pair -avx512_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const char32_t *start = buf; - const char32_t *end = buf + len; +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_with_errors( + buf, len, utf16_output); +} - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_with_errors( + buf, len, utf16_output); +} - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_valid( + buf, len, utf16_output); +} - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return scalar::utf32_to_utf16::convert_valid(buf, len, + utf16_output); +} - // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __m256i saturation_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert(buf, len, + utf32_output); +} - if (saturation_bitmask == 0xffffffff) { - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); - const __m256i forbidden_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800); - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != - 0x0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - utf16_output); - } +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert(buf, len, + utf32_output); +} - __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), - _mm256_extractf128_si256(in, 1)); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); - } - _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; - } else { - size_t forward = 7; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf16_output); - } - *utf16_output++ = - big_endian - ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf16_output); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (big_endian) { - high_surrogate = - uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); - low_surrogate = - uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_with_errors( + buf, len, utf32_output); +} - return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_with_errors( + buf, len, utf32_output); } -/* end file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ -/* begin file src/icelake/icelake_ascii_validation.inl.cpp */ -// file included directly -bool validate_ascii(const char *buf, size_t len) { - const char *end = buf + len; - const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80); - __m512i running_or = _mm512_setzero_si512(); - for (; end - buf >= 64; buf += 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)buf); - running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, - 0xf8); // running_or | (utf8 & ascii) - } - if (buf < end) { - const __m512i utf8 = _mm512_maskz_loadu_epi8( - (uint64_t(1) << (end - buf)) - 1, (const __m512i *)buf); - running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, - 0xf8); // running_or | (utf8 & ascii) - } - return (_mm512_test_epi8_mask(running_or, running_or) == 0); +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_valid( + buf, len, utf32_output); } -/* end file src/icelake/icelake_ascii_validation.inl.cpp */ -/* begin file src/icelake/icelake_utf32_validation.inl.cpp */ -// file included directly -const char32_t *validate_utf32(const char32_t *buf, size_t len) { - if (len < 16) { - return buf; - } - const char32_t *end = buf + len - 16; +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return scalar::utf16_to_utf32::convert_valid(buf, len, + utf32_output); +} - const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000); - __m512i currentmax = _mm512_setzero_si512(); - __m512i currentoffsetmax = _mm512_setzero_si512(); +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + scalar::utf16::change_endianness_utf16(input, length, output); +} - while (buf <= end) { - __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); - buf += 16; - currentoffsetmax = - _mm512_max_epu32(_mm512_add_epi32(utf32, offset), currentoffsetmax); - currentmax = _mm512_max_epu32(utf32, currentmax); - } +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::count_code_points(input, length); +} - const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); - const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); - __m512i is_zero = - _mm512_xor_si512(_mm512_max_epu32(currentmax, standardmax), standardmax); - if (_mm512_test_epi8_mask(is_zero, is_zero) != 0) { - return nullptr; - } - is_zero = _mm512_xor_si512( - _mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); - if (_mm512_test_epi8_mask(is_zero, is_zero) != 0) { - return nullptr; - } +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::count_code_points(input, length); +} - return buf; +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); } -/* end file src/icelake/icelake_utf32_validation.inl.cpp */ -/* begin file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ -// file included directly -static inline size_t latin1_to_utf8_avx512_vec(__m512i input, size_t input_len, - char *utf8_output, - int mask_output) { - __mmask64 nonascii = _mm512_movepi8_mask(input); - size_t output_size = input_len + (size_t)count_ones(nonascii); +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16(input, + length); +} - // Mask to denote whether the byte is a leading byte that is not ascii - __mmask64 sixth = _mm512_cmpge_epu8_mask( - input, _mm512_set1_epi8(-64)); // binary representation of -64: 1100 0000 +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf8_length_from_utf16(input, length); +} - const uint64_t alternate_bits = UINT64_C(0x5555555555555555); - uint64_t ascii = ~nonascii; - // the bits in ascii are inverted and zeros are interspersed in between them - uint64_t maskA = ~_pdep_u64(ascii, alternate_bits); - uint64_t maskB = ~_pdep_u64(ascii >> 32, alternate_bits); +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf32_length_from_utf16(input, + length); +} - // interleave bytes from top and bottom halves (abcd...ABCD -> aAbBcCdD) - __m512i input_interleaved = _mm512_permutexvar_epi8( - _mm512_set_epi32(0x3f1f3e1e, 0x3d1d3c1c, 0x3b1b3a1a, 0x39193818, - 0x37173616, 0x35153414, 0x33133212, 0x31113010, - 0x2f0f2e0e, 0x2d0d2c0c, 0x2b0b2a0a, 0x29092808, - 0x27072606, 0x25052404, 0x23032202, 0x21012000), - input); +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return scalar::utf16::utf32_length_from_utf16(input, length); +} - // double size of each byte, and insert the leading byte 1100 0010 +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return scalar::utf8::utf16_length_from_utf8(input, length); +} - /* - upscale the bytes to 16-bit value, adding the 0b11000000 leading byte in the - process. We adjust for the bytes that have their two most significant bits. - This takes care of the first 32 bytes, assuming we interleaved the bytes. */ - __m512i outputA = - _mm512_shldi_epi16(input_interleaved, _mm512_set1_epi8(-62), 8); - outputA = _mm512_mask_add_epi16( - outputA, (__mmask32)sixth, outputA, - _mm512_set1_epi16(1 - 0x4000)); // 1- 0x4000 = 1100 0000 0000 0001???? +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return scalar::utf32::utf8_length_from_utf32(input, length); +} - // in the second 32-bit half, set first or second option based on whether - // original input is leading byte (second case) or not (first case) - __m512i leadingB = - _mm512_mask_blend_epi16((__mmask32)(sixth >> 32), - _mm512_set1_epi16(0x00c2), // 0000 0000 1101 0010 - _mm512_set1_epi16(0x40c3)); // 0100 0000 1100 0011 - __m512i outputB = _mm512_ternarylogic_epi32( - input_interleaved, leadingB, _mm512_set1_epi16((short)0xff00), - (240 & 170) ^ 204); // (input_interleaved & 0xff00) ^ leadingB +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + return scalar::utf32::utf16_length_from_utf32(input, length); +} - // prune redundant bytes - outputA = _mm512_maskz_compress_epi8(maskA, outputA); - outputB = _mm512_maskz_compress_epi8(maskB, outputB); +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return scalar::utf8::count_code_points(input, length); +} - size_t output_sizeA = (size_t)count_ones((uint32_t)nonascii) + 32; +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); +} - if (mask_output) { - if (input_len > 32) { // is the second half of the input vector used? - __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_sizeA); - _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA); - utf8_output += output_sizeA; - write_mask = _bzhi_u64(~0ULL, (unsigned int)(output_size - output_sizeA)); - _mm512_mask_storeu_epi8(utf8_output, write_mask, outputB); - } else { - __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_size); - _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA); +simdutf_warn_unused result implementation::base64_to_binary( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + // skip trailing spaces + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; } - } else { - _mm512_storeu_si512(utf8_output, outputA); - utf8_output += output_sizeA; - _mm512_storeu_si512(utf8_output, outputB); } - return output_size; + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + return {SUCCESS, 0}; + } + result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + } + return r; } -static inline size_t latin1_to_utf8_avx512_branch(__m512i input, - char *utf8_output) { - __mmask64 nonascii = _mm512_movepi8_mask(input); - if (nonascii) { - return latin1_to_utf8_avx512_vec(input, 64, utf8_output, 0); - } else { - _mm512_storeu_si512(utf8_output, input); - return 64; - } +simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( + const char16_t *input, size_t length) const noexcept { + return scalar::base64::maximal_binary_length_from_base64(input, length); } -size_t latin1_to_utf8_avx512_start(const char *buf, size_t len, - char *utf8_output) { - char *start = utf8_output; - size_t pos = 0; - // if there's at least 128 bytes remaining, we don't need to mask the output - for (; pos + 128 <= len; pos += 64) { - __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos)); - utf8_output += latin1_to_utf8_avx512_branch(input, utf8_output); - } - // in the last 128 bytes, the first 64 may require masking the output - if (pos + 64 <= len) { - __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos)); - utf8_output += latin1_to_utf8_avx512_vec(input, 64, utf8_output, 1); - pos += 64; +simdutf_warn_unused result implementation::base64_to_binary( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + // skip trailing spaces + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; } - // with the last 64 bytes, the input also needs to be masked - if (pos < len) { - __mmask64 load_mask = _bzhi_u64(~0ULL, (unsigned int)(len - pos)); - __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)(buf + pos)); - utf8_output += latin1_to_utf8_avx512_vec(input, len - pos, utf8_output, 1); + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } } - return (size_t)(utf8_output - start); -} -/* end file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ -/* begin file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ -// file included directly -template -size_t icelake_convert_latin1_to_utf16(const char *latin1_input, size_t len, - char16_t *utf16_output) { - size_t rounded_len = len & ~0x1F; // Round down to nearest multiple of 32 - - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - for (size_t i = 0; i < rounded_len; i += 32) { - // Load 32 Latin1 characters into a 256-bit register - __m256i in = _mm256_loadu_si256((__m256i *)&latin1_input[i]); - // Zero extend each set of 8 Latin1 characters to 32 16-bit integers - __m512i out = _mm512_cvtepu8_epi16(in); - if (big_endian) { - out = _mm512_shuffle_epi8(out, byteflip); + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation}; } - // Store the results back to memory - _mm512_storeu_si512((__m512i *)&utf16_output[i], out); + return {SUCCESS, 0}; } - if (rounded_len != len) { - uint32_t mask = uint32_t(1 << (len - rounded_len)) - 1; - __m256i in = _mm256_maskz_loadu_epi8(mask, latin1_input + rounded_len); - - // Zero extend each set of 8 Latin1 characters to 32 16-bit integers - __m512i out = _mm512_cvtepu8_epi16(in); - if (big_endian) { - out = _mm512_shuffle_epi8(out, byteflip); + result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation}; } - // Store the results back to memory - _mm512_mask_storeu_epi16(utf16_output + rounded_len, mask, out); } + return r; +} - return len; +simdutf_warn_unused size_t implementation::base64_length_from_binary( + size_t length, base64_options options) const noexcept { + return scalar::base64::base64_length_from_binary(length, options); } -/* end file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ -/* begin file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ -std::pair -avx512_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - for (size_t i = 0; i < rounded_len; i += 16) { - // Load 16 Latin1 characters into a 128-bit register - __m128i in = _mm_loadu_si128((__m128i *)&buf[i]); +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + return scalar::base64::binary_to_base64(input, length, output, options); +} +} // namespace ppc64 +} // namespace simdutf - // Zero extend each set of 8 Latin1 characters to 16 32-bit integers using - // vpmovzxbd - __m512i out = _mm512_cvtepu8_epi32(in); +/* begin file src/simdutf/ppc64/end.h */ +/* end file src/simdutf/ppc64/end.h */ +/* end file src/ppc64/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_RVV +/* begin file src/rvv/implementation.cpp */ - // Store the results back to memory - _mm512_storeu_si512((__m512i *)&utf32_output[i], out); - } - // Return pointers pointing to where we left off - return std::make_pair(buf + rounded_len, utf32_output + rounded_len); -} -/* end file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ -/* begin file src/icelake/icelake_base64.inl.cpp */ -// file included directly -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ -struct block64 { - __m512i chunks[1]; + + +/* begin file src/simdutf/rvv/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "rvv" +// #define SIMDUTF_IMPLEMENTATION rvv + +#if SIMDUTF_CAN_ALWAYS_RUN_RVV +// nothing needed. +#else +SIMDUTF_TARGET_RVV +#endif +/* end file src/simdutf/rvv/begin.h */ +namespace simdutf { +namespace rvv { +namespace { +#ifndef SIMDUTF_RVV_H + #error "rvv.h must be included" +#endif + +} // unnamed namespace +} // namespace rvv +} // namespace simdutf + +// +// Implementation-specific overrides +// +namespace simdutf { +namespace rvv { +/* begin file src/rvv/rvv_helpers.inl.cpp */ +template +simdutf_really_inline static size_t +rvv_utf32_store_utf16_m4(uint16_t *dst, vuint32m4_t utf32, size_t vl, + vbool4_t m4even) { + /* convert [000000000000aaaa|aaaaaabbbbbbbbbb] + * to [110111bbbbbbbbbb|110110aaaaaaaaaa] */ + vuint32m4_t sur = __riscv_vsub_vx_u32m4(utf32, 0x10000, vl); + sur = __riscv_vor_vv_u32m4(__riscv_vsll_vx_u32m4(sur, 16, vl), + __riscv_vsrl_vx_u32m4(sur, 10, vl), vl); + sur = __riscv_vand_vx_u32m4(sur, 0x3FF03FF, vl); + sur = __riscv_vor_vx_u32m4(sur, 0xDC00D800, vl); + /* merge 1 byte utf32 and 2 byte sur */ + vbool8_t m4 = __riscv_vmsgtu_vx_u32m4_b8(utf32, 0xFFFF, vl); + vuint16m4_t utf32_16 = __riscv_vreinterpret_v_u32m4_u16m4( + __riscv_vmerge_vvm_u32m4(utf32, sur, m4, vl)); + /* compress and store */ + vbool4_t mOut = __riscv_vmor_mm_b4( + __riscv_vmsne_vx_u16m4_b4(utf32_16, 0, vl * 2), m4even, vl * 2); + vuint16m4_t vout = __riscv_vcompress_vm_u16m4(utf32_16, mOut, vl * 2); + vl = __riscv_vcpop_m_b4(mOut, vl * 2); + __riscv_vse16_v_u16m4(dst, simdutf_byteflip(vout, vl), vl); + return vl; }; +/* end file src/rvv/rvv_helpers.inl.cpp */ -template -size_t encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // credit: Wojciech Muła - const uint8_t *input = (const uint8_t *)src; +/* begin file src/rvv/rvv_length_from.inl.cpp */ - uint8_t *out = (uint8_t *)dst; - static const char *lookup_tbl = - base64_url - ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" - : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +simdutf_warn_unused size_t +implementation::count_utf16le(const char16_t *src, size_t len) const noexcept { + return utf32_length_from_utf16le(src, len); +} - const __m512i shuffle_input = _mm512_setr_epi32( - 0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, 0x0d0e0c0d, 0x10110f10, - 0x13141213, 0x16171516, 0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122, - 0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e); - const __m512i lookup = - _mm512_loadu_si512(reinterpret_cast(lookup_tbl)); - const __m512i multi_shifts = _mm512_set1_epi64(UINT64_C(0x3036242a1016040a)); - size_t size = srclen; - __mmask64 input_mask = 0xffffffffffff; // (1 << 48) - 1 - while (size >= 48) { - const __m512i v = _mm512_maskz_loadu_epi8( - input_mask, reinterpret_cast(input)); - const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); - const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); - const __m512i result = _mm512_permutexvar_epi8(indices, lookup); - _mm512_storeu_si512(reinterpret_cast<__m512i *>(out), result); - out += 64; - input += 48; - size -= 48; - } - input_mask = ((__mmask64)1 << size) - 1; - const __m512i v = _mm512_maskz_loadu_epi8( - input_mask, reinterpret_cast(input)); - const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); - const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); - bool padding_needed = - (((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding)); - size_t padding_amount = ((size % 3) > 0) ? (3 - (size % 3)) : 0; - size_t output_len = ((size + 2) / 3) * 4; - size_t non_padded_output_len = output_len - padding_amount; - if (!padding_needed) { - output_len = non_padded_output_len; - } - __mmask64 output_mask = output_len == 64 ? (__mmask64)UINT64_MAX - : ((__mmask64)1 << output_len) - 1; - __m512i result = _mm512_mask_permutexvar_epi8( - _mm512_set1_epi8('='), ((__mmask64)1 << non_padded_output_len) - 1, - indices, lookup); - _mm512_mask_storeu_epi8(reinterpret_cast<__m512i *>(out), output_mask, - result); - return (size_t)(out - (uint8_t *)dst) + output_len; +simdutf_warn_unused size_t +implementation::count_utf16be(const char16_t *src, size_t len) const noexcept { + return utf32_length_from_utf16be(src, len); } -template -static inline uint64_t to_base64_mask(block64 *b, bool *error) { - __m512i input = b->chunks[0]; - const __m512i ascii_space_tbl = _mm512_set_epi8( - 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10, - 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, - 0, 0, 32, 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32); - __m512i lookup0; - if (base64_url) { - lookup0 = _mm512_set_epi8( - -128, -128, -128, -128, -128, -128, 61, 60, 59, 58, 57, 56, 55, 54, 53, - 52, -128, -128, 62, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -1, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -1, - -128, -128, -1, -1, -128, -128, -128, -128, -128, -128, -128, -128, -1); - } else { - lookup0 = _mm512_set_epi8( - -128, -128, -128, -128, -128, -128, 61, 60, 59, 58, 57, 56, 55, 54, 53, - 52, 63, -128, -128, -128, 62, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -1, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -1, -128, - -128, -1, -1, -128, -128, -128, -128, -128, -128, -128, -128, -128); - } - __m512i lookup1; - if (base64_url) { - lookup1 = _mm512_set_epi8( - -128, -128, -128, -128, -128, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, - 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, -128, - 63, -128, -128, -128, -128, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, - 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -128); - } else { - lookup1 = _mm512_set_epi8( - -128, -128, -128, -128, -128, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, - 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, -128, - -128, -128, -128, -128, -128, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -128); - } +simdutf_warn_unused size_t +implementation::count_utf8(const char *src, size_t len) const noexcept { + return utf32_length_from_utf8(src, len); +} - const __m512i translated = _mm512_permutex2var_epi8(lookup0, input, lookup1); - const __m512i combined = _mm512_or_si512(translated, input); - const __mmask64 mask = _mm512_movepi8_mask(combined); - if (mask) { - const __mmask64 spaces = _mm512_cmpeq_epi8_mask( - _mm512_shuffle_epi8(ascii_space_tbl, input), input); - *error |= (mask != spaces); - } - b->chunks[0] = translated; +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *src, size_t len) const noexcept { + return utf32_length_from_utf8(src, len); +} - return mask; +simdutf_warn_unused size_t +implementation::latin1_length_from_utf16(size_t len) const noexcept { + return len; } -static inline void copy_block(block64 *b, char *output) { - _mm512_storeu_si512(reinterpret_cast<__m512i *>(output), b->chunks[0]); +simdutf_warn_unused size_t +implementation::latin1_length_from_utf32(size_t len) const noexcept { + return len; } -static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { - uint64_t nmask = ~mask; - __m512i c = _mm512_maskz_compress_epi8(nmask, b->chunks[0]); - _mm512_storeu_si512(reinterpret_cast<__m512i *>(output), c); - return _mm_popcnt_u64(nmask); +simdutf_warn_unused size_t +implementation::utf16_length_from_latin1(size_t len) const noexcept { + return len; } -// The caller of this function is responsible to ensure that there are 64 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = _mm512_loadu_si512(reinterpret_cast(src)); +simdutf_warn_unused size_t +implementation::utf32_length_from_latin1(size_t len) const noexcept { + return len; } -// The caller of this function is responsible to ensure that there are 128 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char16_t *src) { - __m512i m1 = _mm512_loadu_si512(reinterpret_cast(src)); - __m512i m2 = _mm512_loadu_si512(reinterpret_cast(src + 32)); - __m512i p = _mm512_packus_epi16(m1, m2); - b->chunks[0] = - _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), p); +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *src, size_t len) const noexcept { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); + count += __riscv_vcpop_m_b1(mask, vl); + } + return count; } -static inline void base64_decode(char *out, __m512i str) { - const __m512i merge_ab_and_bc = - _mm512_maddubs_epi16(str, _mm512_set1_epi32(0x01400140)); - const __m512i merged = - _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000)); - const __m512i pack = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 61, 62, 56, 57, 58, - 52, 53, 54, 48, 49, 50, 44, 45, 46, 40, 41, 42, 36, 37, 38, 32, 33, 34, - 28, 29, 30, 24, 25, 26, 20, 21, 22, 16, 17, 18, 12, 13, 14, 8, 9, 10, 4, - 5, 6, 0, 1, 2); - const __m512i shuffled = _mm512_permutexvar_epi8(pack, merged); - _mm512_mask_storeu_epi8( - (__m512i *)out, 0xffffffffffff, - shuffled); // mask would be 0xffffffffffff since we write 48 bytes. +template +simdutf_really_inline static size_t +rvv_utf32_length_from_utf16(const char16_t *src, size_t len) { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + v = simdutf_byteflip(v, vl); + vbool2_t notHigh = + __riscv_vmor_mm_b2(__riscv_vmsgtu_vx_u16m8_b2(v, 0xDFFF, vl), + __riscv_vmsltu_vx_u16m8_b2(v, 0xDC00, vl), vl); + count += __riscv_vcpop_m_b2(notHigh, vl); + } + return count; } -// decode 64 bytes and output 48 bytes -static inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, - _mm512_loadu_si512(reinterpret_cast(src))); + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *src, size_t len) const noexcept { + return rvv_utf32_length_from_utf16(src, len); } -static inline void base64_decode_block(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); + +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *src, size_t len) const noexcept { + if (supports_zvbb()) + return rvv_utf32_length_from_utf16(src, len); + else + return rvv_utf32_length_from_utf16(src, len); } -template -full_result -compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - size_t equalsigns = 0; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *src, size_t len) const noexcept { + size_t count = len; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + count += __riscv_vcpop_m_b1(__riscv_vmslt_vx_i8m8_b1(v, 0, vl), vl); } - const chartype *const srcinit = src; - const char *const dstinit = dst; - const chartype *const srcend = src + srclen; + return count; +} - // figure out why block_size == 2 is sometimes best??? - constexpr size_t block_size = 6; - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const chartype *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b; - load_block(&b, src); - src += 64; - bool error = false; - uint64_t badcharmask = to_base64_mask(&b, &error); - if (error) { - src -= 64; - while (src < srcend && scalar::base64::is_eight_byte(*src) && - to_base64[uint8_t(*src)] <= 64) { - src++; - } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); - } else if (bufferptr != buffer) { - copy_block(&b, bufferptr); - bufferptr += 64; - } else { - base64_decode_block(dst, &b); - dst += 48; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 1); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } +template +simdutf_really_inline static size_t +rvv_utf8_length_from_utf16(const char16_t *src, size_t len) { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + v = simdutf_byteflip(v, vl); + vbool2_t m234 = __riscv_vmsgtu_vx_u16m8_b2(v, 0x7F, vl); + vbool2_t m34 = __riscv_vmsgtu_vx_u16m8_b2(v, 0x7FF, vl); + vbool2_t notSur = + __riscv_vmor_mm_b2(__riscv_vmsltu_vx_u16m8_b2(v, 0xD800, vl), + __riscv_vmsgtu_vx_u16m8_b2(v, 0xDFFF, vl), vl); + vbool2_t m3 = __riscv_vmand_mm_b2(m34, notSur, vl); + count += vl + __riscv_vcpop_m_b2(m234, vl) + __riscv_vcpop_m_b2(m3, vl); } + return count; +} - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *src, size_t len) const noexcept { + return rvv_utf8_length_from_utf16(src, len); +} - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if (!scalar::base64::is_eight_byte(*src) || val > 64) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - bufferptr += (val <= 63); - src++; - } - } +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *src, size_t len) const noexcept { + if (supports_zvbb()) + return rvv_utf8_length_from_utf16(src, len); + else + return rvv_utf8_length_from_utf16(src, len); +} - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - base64_decode_block(dst, buffer_start); - dst += 48; - } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::utf32::swap_bytes(triple); - std::memcpy(dst, &triple, 4); - dst += 3; - buffer_start += 4; - } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::utf32::swap_bytes(triple); - std::memcpy(dst, &triple, 3); - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - src--; - leftover--; - } - } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - r.input_count += size_t(src - srcinit); - return r; - } else { - r.output_count += size_t(dst - dstinit); - } - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } - } - return r; - } - if (equalsigns > 0) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *src, size_t len) const noexcept { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + vbool4_t m234 = __riscv_vmsgtu_vx_u32m8_b4(v, 0x7F, vl); + vbool4_t m34 = __riscv_vmsgtu_vx_u32m8_b4(v, 0x7FF, vl); + vbool4_t m4 = __riscv_vmsgtu_vx_u32m8_b4(v, 0xFFFF, vl); + count += vl + __riscv_vcpop_m_b4(m234, vl) + __riscv_vcpop_m_b4(m34, vl) + + __riscv_vcpop_m_b4(m4, vl); } - return {SUCCESS, srclen, size_t(dst - dstinit)}; + return count; } -/* end file src/icelake/icelake_base64.inl.cpp */ -#include +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *src, size_t len) const noexcept { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + vbool1_t m1234 = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); + vbool1_t m4 = __riscv_vmsgtu_vx_u8m8_b1(__riscv_vreinterpret_u8m8(v), + (uint8_t)0b11101111, vl); + count += __riscv_vcpop_m_b1(m1234, vl) + __riscv_vcpop_m_b1(m4, vl); + } + return count; +} -} // namespace -} // namespace icelake -} // namespace simdutf +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *src, size_t len) const noexcept { + size_t count = 0; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + vbool4_t m4 = __riscv_vmsgtu_vx_u32m8_b4(v, 0xFFFF, vl); + count += vl + __riscv_vcpop_m_b4(m4, vl); + } + return count; +} +/* end file src/rvv/rvv_length_from.inl.cpp */ +/* begin file src/rvv/rvv_validate.inl.cpp */ -namespace simdutf { -namespace icelake { -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - // todo: convert to a one-pass algorithm - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - int out = 0; - if (validate_utf8(input, length)) { - out |= encoding_type::UTF8; - } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } +simdutf_warn_unused bool +implementation::validate_ascii(const char *src, size_t len) const noexcept { + size_t vlmax = __riscv_vsetvlmax_e8m8(); + vint8m8_t mask = __riscv_vmv_v_x_i8m8(0, vlmax); + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + mask = __riscv_vor_vv_i8m8_tu(mask, mask, v, vl); } - return out; + return __riscv_vfirst_m_b1(__riscv_vmslt_vx_i8m8_b1(mask, 0, vlmax), vlmax) < + 0; } -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return true; - } - avx512_utf8_checker checker{}; - const char *ptr = buf; - const char *end = ptr + len; - for (; end - ptr >= 64; ptr += 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - checker.check_next_input(utf8); - } - if (end != ptr) { - const __m512i utf8 = _mm512_maskz_loadu_epi8( - ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr); - checker.check_next_input(utf8); +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *src, size_t len) const noexcept { + const char *beg = src; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); + long idx = __riscv_vfirst_m_b1(__riscv_vmslt_vx_i8m8_b1(v, 0, vl), vl); + if (idx >= 0) + return result(error_code::TOO_LARGE, src - beg + idx); } - checker.check_eof(); - return !checker.errors(); + return result(error_code::SUCCESS, src - beg); } -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, len); - } - avx512_utf8_checker checker{}; - const char *ptr = buf; - const char *end = ptr + len; - size_t count{0}; - for (; end - ptr >= 64; ptr += 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - checker.check_next_input(utf8); - if (checker.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(buf), - reinterpret_cast(buf + count), len - count); - res.count += count; - return res; - } - count += 64; - } - if (end != ptr) { - const __m512i utf8 = _mm512_maskz_loadu_epi8( - ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr); - checker.check_next_input(utf8); +/* Returns a close estimation of the number of valid UTF-8 bytes up to the + * first invalid one, but never overestimating. */ +simdutf_really_inline static size_t rvv_count_valid_utf8(const char *src, + size_t len) { + const char *beg = src; + if (len < 32) + return 0; + + /* validate first three bytes */ + { + size_t idx = 3; + while (idx < len && (src[idx] >> 6) == 0b10) + ++idx; + if (idx > 3 + 3 || !scalar::utf8::validate(src, idx)) + return 0; } - checker.check_eof(); - if (checker.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(buf), - reinterpret_cast(buf + count), len - count); - res.count += count; - return res; + + static const uint64_t err1m[] = {0x0202020202020202, 0x4915012180808080}; + static const uint64_t err2m[] = {0xCBCBCB8B8383A3E7, 0xCBCBDBCBCBCBCBCB}; + static const uint64_t err3m[] = {0x0101010101010101, 0X01010101BABAAEE6}; + + const vuint8m1_t err1tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err1m, 2)); + const vuint8m1_t err2tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err2m, 2)); + const vuint8m1_t err3tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err3m, 2)); + + size_t tail = 3; + size_t n = len - tail; + + for (size_t vl; n > 0; n -= vl, src += vl) { + vl = __riscv_vsetvl_e8m4(n); + vuint8m4_t v0 = __riscv_vle8_v_u8m4((uint8_t const *)src, vl); + + uint8_t next0 = src[vl + 0]; + uint8_t next1 = src[vl + 1]; + uint8_t next2 = src[vl + 2]; + + /* fast path: ASCII */ + if (__riscv_vfirst_m_b2(__riscv_vmsgtu_vx_u8m4_b2(v0, 0b01111111, vl), vl) < + 0 && + (next0 | next1 | next2) < 0b10000000) + continue; + + /* see "Validating UTF-8 In Less Than One Instruction Per Byte" + * https://arxiv.org/abs/2010.03090 */ + vuint8m4_t v1 = __riscv_vslide1down_vx_u8m4(v0, next0, vl); + vuint8m4_t v2 = __riscv_vslide1down_vx_u8m4(v1, next1, vl); + vuint8m4_t v3 = __riscv_vslide1down_vx_u8m4(v2, next2, vl); + + vuint8m4_t s1 = __riscv_vreinterpret_v_u16m4_u8m4(__riscv_vsrl_vx_u16m4( + __riscv_vreinterpret_v_u8m4_u16m4(v2), 4, __riscv_vsetvlmax_e16m4())); + vuint8m4_t s3 = __riscv_vreinterpret_v_u16m4_u8m4(__riscv_vsrl_vx_u16m4( + __riscv_vreinterpret_v_u8m4_u16m4(v3), 4, __riscv_vsetvlmax_e16m4())); + + vuint8m4_t idx2 = __riscv_vand_vx_u8m4(v2, 0xF, vl); + vuint8m4_t idx1 = __riscv_vand_vx_u8m4(s1, 0xF, vl); + vuint8m4_t idx3 = __riscv_vand_vx_u8m4(s3, 0xF, vl); + + vuint8m4_t err1 = simdutf_vrgather_u8m1x4(err1tbl, idx1); + vuint8m4_t err2 = simdutf_vrgather_u8m1x4(err2tbl, idx2); + vuint8m4_t err3 = simdutf_vrgather_u8m1x4(err3tbl, idx3); + vint8m4_t errs = __riscv_vreinterpret_v_u8m4_i8m4( + __riscv_vand_vv_u8m4(__riscv_vand_vv_u8m4(err1, err2, vl), err3, vl)); + + vbool2_t is_3 = __riscv_vmsgtu_vx_u8m4_b2(v1, 0b11100000 - 1, vl); + vbool2_t is_4 = __riscv_vmsgtu_vx_u8m4_b2(v0, 0b11110000 - 1, vl); + vbool2_t is_34 = __riscv_vmor_mm_b2(is_3, is_4, vl); + vbool2_t err34 = + __riscv_vmxor_mm_b2(is_34, __riscv_vmslt_vx_i8m4_b2(errs, 0, vl), vl); + vbool2_t errm = + __riscv_vmor_mm_b2(__riscv_vmsgt_vx_i8m4_b2(errs, 0, vl), err34, vl); + if (__riscv_vfirst_m_b2(errm, vl) >= 0) + break; } - return result(error_code::SUCCESS, len); + + /* we need to validate the last character */ + while (tail < len && (src[0] >> 6) == 0b10) + --src, ++tail; + return src - beg; } simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return icelake::validate_ascii(buf, len); +implementation::validate_utf8(const char *src, size_t len) const noexcept { + size_t count = rvv_count_valid_utf8(src, len); + return scalar::utf8::validate(src + count, len - count); } -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - const char *buf_orig = buf; - const char *end = buf + len; - const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80); - for (; end - buf >= 64; buf += 64) { - const __m512i input = _mm512_loadu_si512((const __m512i *)buf); - __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT); - if (notascii) { - return result(error_code::TOO_LARGE, - buf - buf_orig + _tzcnt_u64(notascii)); - } - } - if (end != buf) { - const __m512i input = _mm512_maskz_loadu_epi8( - ~UINT64_C(0) >> (64 - (end - buf)), (const __m512i *)buf); - __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT); - if (notascii) { - return result(error_code::TOO_LARGE, - buf - buf_orig + _tzcnt_u64(notascii)); - } - } - return result(error_code::SUCCESS, len); +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *src, size_t len) const noexcept { + size_t count = rvv_count_valid_utf8(src, len); + result res = scalar::utf8::validate_with_errors(src + count, len - count); + return result(res.error, count + res.count); } simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, +implementation::validate_utf16le(const char16_t *src, size_t len) const noexcept { - const char16_t *end = buf + len; - - for (; end - buf >= 32;) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - return false; - } - bool ends_with_high = ((highsurrogates & 0x80000000) != 0); - if (ends_with_high) { - buf += 31; // advance only by 31 code units so that we start with the - // high surrogate on the next round. - } else { - buf += 32; - } - } else { - buf += 32; - } - } - if (buf < end) { - __m512i in = - _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - return false; - } - } - } - return true; + return validate_utf16le_with_errors(src, len).error == error_code::SUCCESS; } simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, +implementation::validate_utf16be(const char16_t *src, size_t len) const noexcept { - const char16_t *end = buf + len; - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - for (; end - buf >= 32;) { - __m512i in = - _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)buf), byteflip); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - return false; - } - bool ends_with_high = ((highsurrogates & 0x80000000) != 0); - if (ends_with_high) { - buf += 31; // advance only by 31 code units so that we start with the - // high surrogate on the next round. - } else { - buf += 32; - } - } else { - buf += 32; + return validate_utf16be_with_errors(src, len).error == error_code::SUCCESS; +} + +template +simdutf_really_inline static result +rvv_validate_utf16_with_errors(const char16_t *src, size_t len) { + const char16_t *beg = src; + uint16_t last = 0; + for (size_t vl; len > 0; + len -= vl, src += vl, last = simdutf_byteflip(src[-1])) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v1 = __riscv_vle16_v_u16m8((const uint16_t *)src, vl); + v1 = simdutf_byteflip(v1, vl); + vuint16m8_t v0 = __riscv_vslide1up_vx_u16m8(v1, last, vl); + + vbool2_t surhi = __riscv_vmseq_vx_u16m8_b2( + __riscv_vand_vx_u16m8(v0, 0xFC00, vl), 0xD800, vl); + vbool2_t surlo = __riscv_vmseq_vx_u16m8_b2( + __riscv_vand_vx_u16m8(v1, 0xFC00, vl), 0xDC00, vl); + + long idx = __riscv_vfirst_m_b2(__riscv_vmxor_mm_b2(surhi, surlo, vl), vl); + if (idx >= 0) { + last = idx > 0 ? simdutf_byteflip(src[idx - 1]) : last; + return result(error_code::SURROGATE, + src - beg + idx - (last - 0xD800u < 0x400u)); + break; } } - if (buf < end) { - __m512i in = _mm512_shuffle_epi8( - _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), - byteflip); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - return false; - } - } + if (last - 0xD800u < 0x400u) { + return result(error_code::SURROGATE, + src - beg - 1); /* end on high surrogate */ + } else { + return result(error_code::SUCCESS, src - beg); } - return true; } simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - const char16_t *start_buf = buf; - const char16_t *end = buf + len; - for (; end - buf >= 32;) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); - uint32_t extra_high = - _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); - return result(error_code::SURROGATE, - (buf - start_buf) + - (extra_low < extra_high ? extra_low : extra_high)); - } - bool ends_with_high = ((highsurrogates & 0x80000000) != 0); - if (ends_with_high) { - buf += 31; // advance only by 31 code units so that we start with the - // high surrogate on the next round. - } else { - buf += 32; - } - } else { - buf += 32; - } - } - if (buf < end) { - __m512i in = - _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); - uint32_t extra_high = - _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); - return result(error_code::SURROGATE, - (buf - start_buf) + - (extra_low < extra_high ? extra_low : extra_high)); - } - } - } - return result(error_code::SUCCESS, len); + const char16_t *src, size_t len) const noexcept { + return rvv_validate_utf16_with_errors(src, len); } simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - const char16_t *start_buf = buf; - const char16_t *end = buf + len; - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - for (; end - buf >= 32;) { - __m512i in = - _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)buf), byteflip); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); - uint32_t extra_high = - _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); - return result(error_code::SURROGATE, - (buf - start_buf) + - (extra_low < extra_high ? extra_low : extra_high)); - } - bool ends_with_high = ((highsurrogates & 0x80000000) != 0); - if (ends_with_high) { - buf += 31; // advance only by 31 code units so that we start with the - // high surrogate on the next round. - } else { - buf += 32; - } - } else { - buf += 32; - } - } - if (buf < end) { - __m512i in = _mm512_shuffle_epi8( - _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), - byteflip); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); - uint32_t extra_high = - _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); - return result(error_code::SURROGATE, - (buf - start_buf) + - (extra_low < extra_high ? extra_low : extra_high)); - } - } - } - return result(error_code::SUCCESS, len); + const char16_t *src, size_t len) const noexcept { + if (supports_zvbb()) + return rvv_validate_utf16_with_errors(src, len); + else + return rvv_validate_utf16_with_errors(src, len); } simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - const char32_t *tail = icelake::validate_utf32(buf, len); - if (tail) { - return scalar::utf32::validate(tail, len - (tail - buf)); - } else { - // we come here if there was an error, or buf was nullptr which may happen - // for empty input. - return len == 0; +implementation::validate_utf32(const char32_t *src, size_t len) const noexcept { + size_t vlmax = __riscv_vsetvlmax_e32m8(); + vuint32m8_t max = __riscv_vmv_v_x_u32m8(0x10FFFF, vlmax); + vuint32m8_t maxOff = __riscv_vmv_v_x_u32m8(0xFFFFF7FF, vlmax); + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + vuint32m8_t off = __riscv_vadd_vx_u32m8(v, 0xFFFF2000, vl); + max = __riscv_vmaxu_vv_u32m8_tu(max, max, v, vl); + maxOff = __riscv_vmaxu_vv_u32m8_tu(maxOff, maxOff, off, vl); } + return __riscv_vfirst_m_b4( + __riscv_vmor_mm_b4( + __riscv_vmsne_vx_u32m8_b4(max, 0x10FFFF, vlmax), + __riscv_vmsne_vx_u32m8_b4(maxOff, 0xFFFFF7FF, vlmax), vlmax), + vlmax) < 0; } simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - const char32_t *buf_orig = buf; - if (len >= 16) { - const char32_t *end = buf + len - 16; - while (buf <= end) { - __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); - __mmask16 outside_range = _mm512_cmp_epu32_mask( - utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT); - - __m512i utf32_off = - _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); - - __mmask16 surrogate_range = _mm512_cmp_epu32_mask( - utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT); - if ((outside_range | surrogate_range)) { - auto outside_idx = _tzcnt_u32(outside_range); - auto surrogate_idx = _tzcnt_u32(surrogate_range); - - if (outside_idx < surrogate_idx) { - return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); - } - - return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); + const char32_t *src, size_t len) const noexcept { + const char32_t *beg = src; + for (size_t vl; len > 0; len -= vl, src += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + vuint32m8_t off = __riscv_vadd_vx_u32m8(v, 0xFFFF2000, vl); + long idx1 = + __riscv_vfirst_m_b4(__riscv_vmsgtu_vx_u32m8_b4(v, 0x10FFFF, vl), vl); + long idx2 = __riscv_vfirst_m_b4( + __riscv_vmsgtu_vx_u32m8_b4(off, 0xFFFFF7FF, vl), vl); + if (idx1 >= 0 && idx2 >= 0) { + if (idx1 <= idx2) { + return result(error_code::TOO_LARGE, src - beg + idx1); + } else { + return result(error_code::SURROGATE, src - beg + idx2); } - - buf += 16; } - } - if (len > 0) { - __m512i utf32 = _mm512_maskz_loadu_epi32( - __mmask16((1U << (buf_orig + len - buf)) - 1), (const __m512i *)buf); - __mmask16 outside_range = _mm512_cmp_epu32_mask( - utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT); - __m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); - - __mmask16 surrogate_range = _mm512_cmp_epu32_mask( - utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT); - if ((outside_range | surrogate_range)) { - auto outside_idx = _tzcnt_u32(outside_range); - auto surrogate_idx = _tzcnt_u32(surrogate_range); - - if (outside_idx < surrogate_idx) { - return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); - } - - return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); + if (idx1 >= 0) { + return result(error_code::TOO_LARGE, src - beg + idx1); + } + if (idx2 >= 0) { + return result(error_code::SURROGATE, src - beg + idx2); } } - - return result(error_code::SUCCESS, len); + return result(error_code::SUCCESS, src - beg); } +/* end file src/rvv/rvv_validate.inl.cpp */ + +/* begin file src/rvv/rvv_latin1_to.inl.cpp */ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - return icelake::latin1_to_utf8_avx512_start(buf, len, utf8_output); + const char *src, size_t len, char *dst) const noexcept { + char *beg = dst; + for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { + vl = __riscv_vsetvl_e8m2(len); + vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); + vbool4_t nascii = + __riscv_vmslt_vx_i8m2_b4(__riscv_vreinterpret_v_u8m2_i8m2(v1), 0, vl); + size_t cnt = __riscv_vcpop_m_b4(nascii, vl); + vlOut = vl + cnt; + if (cnt == 0) { + __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); + continue; + } + + vuint8m2_t v0 = + __riscv_vor_vx_u8m2(__riscv_vsrl_vx_u8m2(v1, 6, vl), 0b11000000, vl); + v1 = __riscv_vand_vx_u8m2_mu(nascii, v1, v1, 0b10111111, vl); + + vuint8m4_t wide = + __riscv_vreinterpret_v_u16m4_u8m4(__riscv_vwmaccu_vx_u16m4( + __riscv_vwaddu_vv_u16m4(v0, v1, vl), 0xFF, v1, vl)); + vbool2_t mask = __riscv_vmsgtu_vx_u8m4_b2( + __riscv_vsub_vx_u8m4(wide, 0b11000000, vl * 2), 1, vl * 2); + vuint8m4_t comp = __riscv_vcompress_vm_u8m4(wide, mask, vl * 2); + + __riscv_vse8_v_u8m4((uint8_t *)dst, comp, vlOut); + } + return dst - beg; } simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return icelake_convert_latin1_to_utf16(buf, len, - utf16_output); + const char *src, size_t len, char16_t *dst) const noexcept { + char16_t *beg = dst; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e8m4(len); + vuint8m4_t v = __riscv_vle8_v_u8m4((uint8_t *)src, vl); + __riscv_vse16_v_u16m8((uint16_t *)dst, __riscv_vzext_vf2_u16m8(v, vl), vl); + } + return dst - beg; } simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return icelake_convert_latin1_to_utf16(buf, len, - utf16_output); + const char *src, size_t len, char16_t *dst) const noexcept { + char16_t *beg = dst; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e8m4(len); + vuint8m4_t v = __riscv_vle8_v_u8m4((uint8_t *)src, vl); + __riscv_vse16_v_u16m8( + (uint16_t *)dst, + __riscv_vsll_vx_u16m8(__riscv_vzext_vf2_u16m8(v, vl), 8, vl), vl); + } + return dst - beg; } simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - avx512_convert_latin1_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; + const char *src, size_t len, char32_t *dst) const noexcept { + char32_t *beg = dst; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e8m2(len); + vuint8m2_t v = __riscv_vle8_v_u8m2((uint8_t *)src, vl); + __riscv_vse32_v_u32m8((uint32_t *)dst, __riscv_vzext_vf4_u32m8(v, vl), vl); } - size_t converted_chars = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } - converted_chars += scalar_converted_chars; + return dst - beg; +} +/* end file src/rvv/rvv_latin1_to.inl.cpp */ +/* begin file src/rvv/rvv_utf16_to.inl.cpp */ +#include + +template +simdutf_really_inline static result +rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) { + const char16_t *const beg = src; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + v = simdutf_byteflip(v, vl); + long idx = __riscv_vfirst_m_b2(__riscv_vmsgtu_vx_u16m8_b2(v, 255, vl), vl); + if (idx >= 0) + return result(error_code::TOO_LARGE, src - beg + idx); + __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m4(v, vl), vl); } - return converted_chars; + return result(error_code::SUCCESS, src - beg); } -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return icelake::utf8_to_latin1_avx512(buf, len, latin1_output); +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf16le_to_latin1_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; } -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - // First, try to convert as much as possible using the SIMD implementation. - const char *obuf = buf; - char *olatin1_output = latin1_output; - size_t written = icelake::utf8_to_latin1_avx512(obuf, len, olatin1_output); +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf16be_to_latin1_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} - // If we have completely converted the string - if (obuf == buf + len) { - return {simdutf::SUCCESS, written}; - } - size_t pos = obuf - buf; - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, buf + pos, len - pos, latin1_output); - res.count += pos; - return res; +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *src, size_t len, char *dst) const noexcept { + return rvv_utf16_to_latin1_with_errors(src, len, dst); } -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return icelake::valid_utf8_to_latin1_avx512(buf, len, latin1_output); +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *src, size_t len, char *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf16_to_latin1_with_errors(src, len, + dst); + else + return rvv_utf16_to_latin1_with_errors(src, len, dst); } -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16_result ret = - fast_avx512_convert_utf8_to_utf16(buf, len, - utf16_output); - if (ret.second == nullptr) { - return 0; +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *src, size_t len, char *dst) const noexcept { + const char16_t *const beg = src; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m4(v, vl), vl); } - return ret.second - utf16_output; + return src - beg; } -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16_result ret = fast_avx512_convert_utf8_to_utf16( - buf, len, utf16_output); - if (ret.second == nullptr) { - return 0; +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *src, size_t len, char *dst) const noexcept { + const char16_t *const beg = src; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vnsrl_wx_u8m4(v, 8, vl), vl); } - return ret.second - utf16_output; + return src - beg; } -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return fast_avx512_convert_utf8_to_utf16_with_errors( - buf, len, utf16_output); -} +template +simdutf_really_inline static result +rvv_utf16_to_utf8_with_errors(const char16_t *src, size_t len, char *dst) { + size_t n = len; + const char16_t *srcBeg = src; + const char *dstBeg = dst; + size_t vl8m4 = __riscv_vsetvlmax_e8m4(); + vbool2_t m4mulp2 = __riscv_vmseq_vx_u8m4_b2( + __riscv_vand_vx_u8m4(__riscv_vid_v_u8m4(vl8m4), 3, vl8m4), 2, vl8m4); -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return fast_avx512_convert_utf8_to_utf16_with_errors( - buf, len, utf16_output); -} + for (size_t vl, vlOut; n > 0;) { + vl = __riscv_vsetvl_e16m2(n); -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16_result ret = - icelake::valid_utf8_to_fixed_length( - buf, len, utf16_output); - size_t saved_bytes = ret.second - utf16_output; - const char *end = buf + len; - if (ret.first == end) { - return saved_bytes; - } + vuint16m2_t v = __riscv_vle16_v_u16m2((uint16_t const *)src, vl); + v = simdutf_byteflip(v, vl); + vbool8_t m234 = __riscv_vmsgtu_vx_u16m2_b8(v, 0x80 - 1, vl); - // Note: AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outsiede 16-byte window. - // It meas, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { - ret.first += 1; - } + if (__riscv_vfirst_m_b8(m234, vl) < 0) { /* 1 byte utf8 */ + vlOut = vl; + __riscv_vse8_v_u8m1((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m1(v, vlOut), + vlOut); + n -= vl, src += vl, dst += vlOut; + continue; + } - if (ret.first != end) { - const size_t scalar_saved_bytes = - scalar::utf8_to_utf16::convert_valid( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; + vbool8_t m34 = __riscv_vmsgtu_vx_u16m2_b8(v, 0x800 - 1, vl); + + if (__riscv_vfirst_m_b8(m34, vl) < 0) { /* 1/2 byte utf8 */ + /* 0: [ aaa|aabbbbbb] + * 1: [aabbbbbb| ] vsll 8 + * 2: [ | aaaaa] vsrl 6 + * 3: [00111111|00011111] + * 4: [ bbbbbb|000aaaaa] (1|2)&3 + * 5: [11000000|11000000] + * 6: [10bbbbbb|110aaaaa] 4|5 */ + vuint16m2_t twoByte = __riscv_vand_vx_u16m2( + __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(v, 8, vl), + __riscv_vsrl_vx_u16m2(v, 6, vl), vl), + 0b0011111100011111, vl); + vuint16m2_t vout16 = + __riscv_vor_vx_u16m2_mu(m234, v, twoByte, 0b1000000011000000, vl); + vuint8m2_t vout = __riscv_vreinterpret_v_u16m2_u8m2(vout16); + + /* Every high byte that is zero should be compressed + * low bytes should never be compressed, so we set them + * to all ones, and then create a non-zero bytes mask */ + vbool4_t mcomp = + __riscv_vmsne_vx_u8m2_b4(__riscv_vreinterpret_v_u16m2_u8m2( + __riscv_vor_vx_u16m2(vout16, 0xFF, vl)), + 0, vl * 2); + vlOut = __riscv_vcpop_m_b4(mcomp, vl * 2); + + vout = __riscv_vcompress_vm_u8m2(vout, mcomp, vl * 2); + __riscv_vse8_v_u8m2((uint8_t *)dst, vout, vlOut); + + n -= vl, src += vl, dst += vlOut; + continue; } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} + vbool8_t sur = __riscv_vmseq_vx_u16m2_b8( + __riscv_vand_vx_u16m2(v, 0xF800, vl), 0xD800, vl); + long first = __riscv_vfirst_m_b8(sur, vl); + size_t tail = vl - first; + vl = first < 0 ? vl : first; -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16_result ret = - icelake::valid_utf8_to_fixed_length( - buf, len, utf16_output); - size_t saved_bytes = ret.second - utf16_output; - const char *end = buf + len; - if (ret.first == end) { - return saved_bytes; - } + if (vl > 0) { /* 1/2/3 byte utf8 */ + /* in: [aaaabbbb|bbcccccc] + * v1: [0bcccccc| ] vsll 8 + * v1: [10cccccc| ] vsll 8 & 0b00111111 | 0b10000000 + * v2: [ |110bbbbb] vsrl 6 & 0b00111111 | 0b11000000 + * v2: [ |10bbbbbb] vsrl 6 & 0b00111111 | 0b10000000 + * v3: [ |1110aaaa] vsrl 12 | 0b11100000 + * 1: [00000000|0bcccccc|00000000|00000000] => [0bcccccc] + * 2: [00000000|10cccccc|110bbbbb|00000000] => [110bbbbb] [10cccccc] + * 3: [00000000|10cccccc|10bbbbbb|1110aaaa] => [1110aaaa] [10bbbbbb] + * [10cccccc] + */ + vuint16m2_t v1, v2, v3, v12; + v1 = __riscv_vor_vx_u16m2_mu( + m234, v, __riscv_vand_vx_u16m2(v, 0b00111111, vl), 0b10000000, vl); + v1 = __riscv_vsll_vx_u16m2(v1, 8, vl); - // Note: AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outsiede 16-byte window. - // It meas, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { - ret.first += 1; - } + v2 = __riscv_vor_vx_u16m2( + __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 6, vl), 0b00111111, + vl), + 0b10000000, vl); + v2 = __riscv_vor_vx_u16m2_mu(__riscv_vmnot_m_b8(m34, vl), v2, v2, + 0b01000000, vl); + v3 = __riscv_vor_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 12, vl), 0b11100000, + vl); + v12 = __riscv_vor_vv_u16m2_mu(m234, v1, v1, v2, vl); - if (ret.first != end) { - const size_t scalar_saved_bytes = - scalar::utf8_to_utf16::convert_valid( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; + vuint32m4_t w12 = __riscv_vwmulu_vx_u32m4(v12, 1 << 8, vl); + vuint32m4_t w123 = __riscv_vwaddu_wv_u32m4_mu(m34, w12, w12, v3, vl); + vuint8m4_t vout = __riscv_vreinterpret_v_u32m4_u8m4(w123); + + vbool2_t mcomp = __riscv_vmor_mm_b2( + m4mulp2, __riscv_vmsne_vx_u8m4_b2(vout, 0, vl * 4), vl * 4); + vlOut = __riscv_vcpop_m_b2(mcomp, vl * 4); + + vout = __riscv_vcompress_vm_u8m4(vout, mcomp, vl * 4); + __riscv_vse8_v_u8m4((uint8_t *)dst, vout, vlOut); + + n -= vl, src += vl, dst += vlOut; } - saved_bytes += scalar_saved_bytes; + + if (tail) + while (n) { + uint16_t word = simdutf_byteflip(src[0]); + if ((word & 0xFF80) == 0) { + break; + } else if ((word & 0xF800) == 0) { + break; + } else if ((word & 0xF800) != 0xD800) { + break; + } else { + // must be a surrogate pair + if (n <= 1) + return result(error_code::SURROGATE, src - srcBeg); + uint16_t diff = word - 0xD800; + if (diff > 0x3FF) + return result(error_code::SURROGATE, src - srcBeg); + uint16_t diff2 = simdutf_byteflip(src[1]) - 0xDC00; + if (diff2 > 0x3FF) + return result(error_code::SURROGATE, src - srcBeg); + + uint32_t value = ((diff + 0x40) << 10) + diff2; + + // will generate four UTF-8 bytes + // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX + *dst++ = (char)((value >> 18) | 0b11110000); + *dst++ = (char)(((value >> 12) & 0b111111) | 0b10000000); + *dst++ = (char)(((value >> 6) & 0b111111) | 0b10000000); + *dst++ = (char)((value & 0b111111) | 0b10000000); + src += 2; + n -= 2; + } + } } - return saved_bytes; + return result(error_code::SUCCESS, dst - dstBeg); } -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_out) const noexcept { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - utf8_to_utf32_result ret = - icelake::validating_utf8_to_fixed_length( - buf, len, utf32_output); - if (ret.second == nullptr) - return 0; +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf16le_to_utf8_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} - size_t saved_bytes = ret.second - utf32_output; - const char *end = buf + len; - if (ret.first == end) { - return saved_bytes; - } +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf16be_to_utf8_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; +} - // Note: the AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outside 16-byte window. - // It means, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { - ret.first += 1; - } - if (ret.first != end) { - const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert( - ret.first, len - (ret.first - buf), utf32_out + saved_bytes); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *src, size_t len, char *dst) const noexcept { + return rvv_utf16_to_utf8_with_errors(src, len, dst); +} - return saved_bytes; +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *src, size_t len, char *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf16_to_utf8_with_errors(src, len, dst); + else + return rvv_utf16_to_utf8_with_errors(src, len, dst); } -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32) const noexcept { - if (simdutf_unlikely(len == 0)) { - return {error_code::SUCCESS, 0}; - } - uint32_t *utf32_output = reinterpret_cast(utf32); - auto ret = icelake::validating_utf8_to_fixed_length_with_constant_checks< - endianness::LITTLE, uint32_t>(buf, len, utf32_output); +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *src, size_t len, char *dst) const noexcept { + return convert_utf16le_to_utf8(src, len, dst); +} - if (!std::get<2>(ret)) { - size_t pos = std::get<0>(ret) - buf; - // We might have an error that occurs right before pos. - // This is only a concern if buf[pos] is not a continuation byte. - if ((buf[pos] & 0xc0) != 0x80 && pos >= 64) { - pos -= 1; - } else if ((buf[pos] & 0xc0) == 0x80 && pos >= 64) { - // We must check whether we are the fourth continuation byte - bool c1 = (buf[pos - 1] & 0xc0) == 0x80; - bool c2 = (buf[pos - 2] & 0xc0) == 0x80; - bool c3 = (buf[pos - 3] & 0xc0) == 0x80; - if (c1 && c2 && c3) { - return {simdutf::TOO_LONG, pos}; +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *src, size_t len, char *dst) const noexcept { + return convert_utf16be_to_utf8(src, len, dst); +} + +template +simdutf_really_inline static result +rvv_utf16_to_utf32_with_errors(const char16_t *src, size_t len, char32_t *dst) { + const char16_t *const srcBeg = src; + char32_t *const dstBeg = dst; + + constexpr const uint16_t ANY_SURROGATE_MASK = 0xf800; + constexpr const uint16_t ANY_SURROGATE_VALUE = 0xd800; + constexpr const uint16_t LO_SURROGATE_MASK = 0xfc00; + constexpr const uint16_t LO_SURROGATE_VALUE = 0xdc00; + constexpr const uint16_t HI_SURROGATE_MASK = 0xfc00; + constexpr const uint16_t HI_SURROGATE_VALUE = 0xd800; + + uint16_t last = 0; + while (len > 0) { + size_t vl = __riscv_vsetvl_e16m2(len); + vuint16m2_t v0 = __riscv_vle16_v_u16m2((uint16_t const *)src, vl); + v0 = simdutf_byteflip(v0, vl); + + { // check fast-path + const vuint16m2_t v = __riscv_vand_vx_u16m2(v0, ANY_SURROGATE_MASK, vl); + const vbool8_t any_surrogate = + __riscv_vmseq_vx_u16m2_b8(v, ANY_SURROGATE_VALUE, vl); + if (__riscv_vfirst_m_b8(any_surrogate, vl) < 0) { + /* no surrogates */ + __riscv_vse32_v_u32m4((uint32_t *)dst, __riscv_vzext_vf2_u32m4(v0, vl), + vl); + len -= vl; + src += vl; + dst += vl; + continue; } } - // todo: we reset the output to utf32 instead of using std::get<2.(ret) as - // you'd expect. that is because - // validating_utf8_to_fixed_length_with_constant_checks may have processed - // data beyond the error. - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, buf + pos, len - pos, utf32); - res.count += pos; - return res; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - const char *end = buf + len; - if (std::get<0>(ret) == end) { - return {simdutf::SUCCESS, saved_bytes}; - } - // Note: the AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outside 16-byte window. - // It means, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (std::get<0>(ret) != end and - ((uint8_t(*std::get<0>(ret)) & 0xc0) == 0x80)) { - std::get<0>(ret) += 1; - } + if ((simdutf_byteflip(src[0]) & LO_SURROGATE_MASK) == + LO_SURROGATE_VALUE) { + return result(error_code::SURROGATE, src - srcBeg); + } + + // decode surrogates + vuint16m2_t v1 = __riscv_vslide1down_vx_u16m2(v0, 0, vl); + vl = __riscv_vsetvl_e16m2(vl - 1); + if (vl == 0) { + return result(error_code::SURROGATE, src - srcBeg); + } + + const vbool8_t surhi = __riscv_vmseq_vx_u16m2_b8( + __riscv_vand_vx_u16m2(v0, HI_SURROGATE_MASK, vl), HI_SURROGATE_VALUE, + vl); + const vbool8_t surlo = __riscv_vmseq_vx_u16m2_b8( + __riscv_vand_vx_u16m2(v1, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, + vl); + + // compress everything but lo surrogates + const vbool8_t compress = __riscv_vmsne_vx_u16m2_b8( + __riscv_vand_vx_u16m2(v0, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, + vl); - if (std::get<0>(ret) != end) { - auto scalar_result = scalar::utf8_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), - reinterpret_cast(utf32_output) + saved_bytes); - if (scalar_result.error != simdutf::SUCCESS) { - scalar_result.count += (std::get<0>(ret) - buf); - } else { - scalar_result.count += saved_bytes; + { + const vbool8_t diff = __riscv_vmxor_mm_b8(surhi, surlo, vl); + const long idx = __riscv_vfirst_m_b8(diff, vl); + if (idx >= 0) { + uint16_t word = simdutf_byteflip(src[idx]); + if (word < 0xD800 || word > 0xDBFF) { + return result(error_code::SURROGATE, src - srcBeg + idx + 1); + } + return result(error_code::SURROGATE, src - srcBeg + idx); + } } - return scalar_result; - } - return {simdutf::SUCCESS, size_t(std::get<1>(ret) - utf32_output)}; -} + last = simdutf_byteflip(src[vl]); + vuint32m4_t utf32 = __riscv_vzext_vf2_u32m4(v0, vl); -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_out) const noexcept { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - utf8_to_utf32_result ret = - icelake::valid_utf8_to_fixed_length( - buf, len, utf32_output); - size_t saved_bytes = ret.second - utf32_output; - const char *end = buf + len; - if (ret.first == end) { - return saved_bytes; - } + // v0 = 110110yyyyyyyyyy (0xd800 + yyyyyyyyyy) --- hi surrogate + // v1 = 110111xxxxxxxxxx (0xdc00 + xxxxxxxxxx) --- lo surrogate - // Note: AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outsiede 16-byte window. - // It meas, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { - ret.first += 1; - } + // t0 = u16( 0000_00yy_yyyy_yyyy) + const vuint32m4_t t0 = + __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v0, 0x03ff, vl), vl); + // t1 = u32(0000_0000_0000_yyyy_yyyy_yy00_0000_0000) + const vuint32m4_t t1 = __riscv_vsll_vx_u32m4(t0, 10, vl); - if (ret.first != end) { - const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert_valid( - ret.first, len - (ret.first - buf), utf32_out + saved_bytes); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } + // t2 = u32(0000_0000_0000_0000_0000_00xx_xxxx_xxxx) + const vuint32m4_t t2 = + __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v1, 0x03ff, vl), vl); - return saved_bytes; -} + // t3 = u32(0000_0000_0000_yyyy_yyyy_yyxx_xxxx_xxxx) + const vuint32m4_t t3 = __riscv_vor_vv_u32m4(t1, t2, vl); -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf16_to_latin1(buf, len, - latin1_output); -} + // t4 = utf32 from surrogate pairs + const vuint32m4_t t4 = __riscv_vadd_vx_u32m4(t3, 0x10000, vl); -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf16_to_latin1(buf, len, - latin1_output); -} + const vuint32m4_t result = __riscv_vmerge_vvm_u32m4(utf32, t4, surhi, vl); -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output) - .first; -} + const vuint32m4_t comp = __riscv_vcompress_vm_u32m4(result, compress, vl); + const size_t vlOut = __riscv_vcpop_m_b8(compress, vl); + __riscv_vse32_v_u32m4((uint32_t *)dst, comp, vlOut); -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output) - .first; -} + len -= vl; + src += vl; + dst += vlOut; -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement custom function - return convert_utf16be_to_latin1(buf, len, latin1_output); -} + if ((last & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) { + // last item is lo surrogate and got already consumed + len -= 1; + src += 1; + } + } -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement custom function - return convert_utf16le_to_latin1(buf, len, latin1_output); + return result(error_code::SUCCESS, dst - dstBeg); } -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - size_t outlen; - size_t inlen = utf16_to_utf8_avx512i( - buf, len, (unsigned char *)utf8_output, &outlen); - if (inlen != len) { - return 0; - } - return outlen; +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + result res = convert_utf16le_to_utf32_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; } -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - size_t outlen; - size_t inlen = utf16_to_utf8_avx512i( - buf, len, (unsigned char *)utf8_output, &outlen); - if (inlen != len) { - return 0; - } - return outlen; +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + result res = convert_utf16be_to_utf32_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; } -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - size_t outlen; - size_t inlen = utf16_to_utf8_avx512i( - buf, len, (unsigned char *)utf8_output, &outlen); - if (inlen != len) { - result res = scalar::utf16_to_utf8::convert_with_errors( - buf + inlen, len - inlen, utf8_output + outlen); - res.count += inlen; - return res; - } - return {simdutf::SUCCESS, outlen}; +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + return rvv_utf16_to_utf32_with_errors(src, len, dst); } -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - size_t outlen; - size_t inlen = utf16_to_utf8_avx512i( - buf, len, (unsigned char *)utf8_output, &outlen); - if (inlen != len) { - result res = scalar::utf16_to_utf8::convert_with_errors( - buf + inlen, len - inlen, utf8_output + outlen); - res.count += inlen; - return res; - } - return {simdutf::SUCCESS, outlen}; +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf16_to_utf32_with_errors(src, len, + dst); + else + return rvv_utf16_to_utf32_with_errors(src, len, dst); } -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16le_to_utf8(buf, len, utf8_output); +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + return convert_utf16le_to_utf32(src, len, dst); } -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16be_to_utf8(buf, len, utf8_output); +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *src, size_t len, char32_t *dst) const noexcept { + return convert_utf16be_to_utf32(src, len, dst); } +/* end file src/rvv/rvv_utf16_to.inl.cpp */ +/* begin file src/rvv/rvv_utf32_to.inl.cpp */ simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf32_to_latin1(buf, len, latin1_output); + const char32_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf32_to_latin1_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; } simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf32_to_latin1_with_errors(buf, len, latin1_output) - .first; + const char32_t *src, size_t len, char *dst) const noexcept { + const char32_t *const beg = src; + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e32m8(len); + vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); + long idx = __riscv_vfirst_m_b4(__riscv_vmsgtu_vx_u32m8_b4(v, 255, vl), vl); + if (idx >= 0) + return result(error_code::TOO_LARGE, src - beg + idx); + /* We don't use vcompress here, because its performance varies widely on + * current platforms. This might be worth reconsidering once there is more + * hardware available. */ + __riscv_vse8_v_u8m2( + (uint8_t *)dst, + __riscv_vncvt_x_x_w_u8m2(__riscv_vncvt_x_x_w_u16m4(v, vl), vl), vl); + } + return result(error_code::SUCCESS, src - beg); } simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf32_to_latin1(buf, len, latin1_output); + const char32_t *src, size_t len, char *dst) const noexcept { + return convert_utf32_to_latin1(src, len, dst); } -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - avx512_convert_utf32_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *src, size_t len, char *dst) const noexcept { + size_t n = len; + const char32_t *srcBeg = src; + const char *dstBeg = dst; + size_t vl8m4 = __riscv_vsetvlmax_e8m4(); + vbool2_t m4mulp2 = __riscv_vmseq_vx_u8m4_b2( + __riscv_vand_vx_u8m4(__riscv_vid_v_u8m4(vl8m4), 3, vl8m4), 2, vl8m4); + + for (size_t vl, vlOut; n > 0;) { + vl = __riscv_vsetvl_e32m4(n); + + vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t const *)src, vl); + vbool8_t m234 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x80 - 1, vl); + vuint16m2_t vn = __riscv_vncvt_x_x_w_u16m2(v, vl); + + if (__riscv_vfirst_m_b8(m234, vl) < 0) { /* 1 byte utf8 */ + vlOut = vl; + __riscv_vse8_v_u8m1((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m1(vn, vlOut), + vlOut); + n -= vl, src += vl, dst += vlOut; + continue; } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - icelake::avx512_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; + vbool8_t m34 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x800 - 1, vl); + + if (__riscv_vfirst_m_b8(m34, vl) < 0) { /* 1/2 byte utf8 */ + /* 0: [ aaa|aabbbbbb] + * 1: [aabbbbbb| ] vsll 8 + * 2: [ | aaaaa] vsrl 6 + * 3: [00111111|00111111] + * 4: [ bbbbbb|000aaaaa] (1|2)&3 + * 5: [10000000|11000000] + * 6: [10bbbbbb|110aaaaa] 4|5 */ + vuint16m2_t twoByte = __riscv_vand_vx_u16m2( + __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(vn, 8, vl), + __riscv_vsrl_vx_u16m2(vn, 6, vl), vl), + 0b0011111100111111, vl); + vuint16m2_t vout16 = + __riscv_vor_vx_u16m2_mu(m234, vn, twoByte, 0b1000000011000000, vl); + vuint8m2_t vout = __riscv_vreinterpret_v_u16m2_u8m2(vout16); + + /* Every high byte that is zero should be compressed + * low bytes should never be compressed, so we set them + * to all ones, and then create a non-zero bytes mask */ + vbool4_t mcomp = + __riscv_vmsne_vx_u8m2_b4(__riscv_vreinterpret_v_u16m2_u8m2( + __riscv_vor_vx_u16m2(vout16, 0xFF, vl)), + 0, vl * 2); + vlOut = __riscv_vcpop_m_b4(mcomp, vl * 2); + + vout = __riscv_vcompress_vm_u8m2(vout, mcomp, vl * 2); + __riscv_vse8_v_u8m2((uint8_t *)dst, vout, vlOut); + + n -= vl, src += vl, dst += vlOut; + continue; + } + long idx1 = + __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0x10FFFF, vl), vl); + vbool8_t sur = __riscv_vmseq_vx_u32m4_b8( + __riscv_vand_vx_u32m4(v, 0xFFFFF800, vl), 0xD800, vl); + long idx2 = __riscv_vfirst_m_b8(sur, vl); + if (idx1 >= 0 && idx2 >= 0) { + if (idx1 <= idx2) { + return result(error_code::TOO_LARGE, src - srcBeg + idx1); + } else { + return result(error_code::SURROGATE, src - srcBeg + idx2); + } + } + if (idx1 >= 0) { + return result(error_code::TOO_LARGE, src - srcBeg + idx1); + } + if (idx2 >= 0) { + return result(error_code::SURROGATE, src - srcBeg + idx2); } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf32_to_utf8(buf, len, utf8_output); -} + vbool8_t m4 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x10000 - 1, vl); + long first = __riscv_vfirst_m_b8(m4, vl); + size_t tail = vl - first; + vl = first < 0 ? vl : first; -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - avx512_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; + if (vl > 0) { /* 1/2/3 byte utf8 */ + /* vn: [aaaabbbb|bbcccccc] + * v1: [0bcccccc| ] vsll 8 + * v1: [10cccccc| ] vsll 8 & 0b00111111 | 0b10000000 + * v2: [ |110bbbbb] vsrl 6 & 0b00111111 | 0b11000000 + * v2: [ |10bbbbbb] vsrl 6 & 0b00111111 | 0b10000000 + * v3: [ |1110aaaa] vsrl 12 | 0b11100000 + * 1: [00000000|0bcccccc|00000000|00000000] => [0bcccccc] + * 2: [00000000|10cccccc|110bbbbb|00000000] => [110bbbbb] [10cccccc] + * 3: [00000000|10cccccc|10bbbbbb|1110aaaa] => [1110aaaa] [10bbbbbb] + * [10cccccc] + */ + vuint16m2_t v1, v2, v3, v12; + v1 = __riscv_vor_vx_u16m2_mu( + m234, vn, __riscv_vand_vx_u16m2(vn, 0b00111111, vl), 0b10000000, vl); + v1 = __riscv_vsll_vx_u16m2(v1, 8, vl); + + v2 = __riscv_vor_vx_u16m2( + __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(vn, 6, vl), 0b00111111, + vl), + 0b10000000, vl); + v2 = __riscv_vor_vx_u16m2_mu(__riscv_vmnot_m_b8(m34, vl), v2, v2, + 0b01000000, vl); + v3 = __riscv_vor_vx_u16m2(__riscv_vsrl_vx_u16m2(vn, 12, vl), 0b11100000, + vl); + v12 = __riscv_vor_vv_u16m2_mu(m234, v1, v1, v2, vl); + + vuint32m4_t w12 = __riscv_vwmulu_vx_u32m4(v12, 1 << 8, vl); + vuint32m4_t w123 = __riscv_vwaddu_wv_u32m4_mu(m34, w12, w12, v3, vl); + vuint8m4_t vout = __riscv_vreinterpret_v_u32m4_u8m4(w123); + + vbool2_t mcomp = __riscv_vmor_mm_b2( + m4mulp2, __riscv_vmsne_vx_u8m4_b2(vout, 0, vl * 4), vl * 4); + vlOut = __riscv_vcpop_m_b2(mcomp, vl * 4); + + vout = __riscv_vcompress_vm_u8m4(vout, mcomp, vl * 4); + __riscv_vse8_v_u8m4((uint8_t *)dst, vout, vlOut); + + n -= vl, src += vl, dst += vlOut; } - saved_bytes += scalar_saved_bytes; + + if (tail) + while (n) { + uint32_t word = src[0]; + if (word < 0x10000) + break; + if (word > 0x10FFFF) + return result(error_code::TOO_LARGE, src - srcBeg); + *dst++ = (uint8_t)((word >> 18) | 0b11110000); + *dst++ = (uint8_t)(((word >> 12) & 0b111111) | 0b10000000); + *dst++ = (uint8_t)(((word >> 6) & 0b111111) | 0b10000000); + *dst++ = (uint8_t)((word & 0b111111) | 0b10000000); + ++src; + --n; + } } - return saved_bytes; + + return result(error_code::SUCCESS, dst - dstBeg); } -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - avx512_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *src, size_t len, char *dst) const noexcept { + result res = convert_utf32_to_utf8_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; } -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - avx512_convert_utf32_to_utf16_with_errors( - buf, len, utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *src, size_t len, char *dst) const noexcept { + return convert_utf32_to_utf8(src, len, dst); } -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - avx512_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; +template +simdutf_really_inline static result +rvv_convert_utf32_to_utf16_with_errors(const char32_t *src, size_t len, + char16_t *dst) { + size_t vl8m2 = __riscv_vsetvlmax_e8m2(); + vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( + __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); + const char16_t *dstBeg = dst; + const char32_t *srcBeg = src; + for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { + vl = __riscv_vsetvl_e32m4(len); + vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t *)src, vl); + vuint32m4_t off = __riscv_vadd_vx_u32m4(v, 0xFFFF2000, vl); + long idx1 = + __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0x10FFFF, vl), vl); + long idx2 = __riscv_vfirst_m_b8( + __riscv_vmsgtu_vx_u32m4_b8(off, 0xFFFFF7FF, vl), vl); + if (idx1 >= 0 && idx2 >= 0) { + if (idx1 <= idx2) + return result(error_code::TOO_LARGE, src - srcBeg + idx1); + return result(error_code::SURROGATE, src - srcBeg + idx2); + } + if (idx1 >= 0) + return result(error_code::TOO_LARGE, src - srcBeg + idx1); + if (idx2 >= 0) + return result(error_code::SURROGATE, src - srcBeg + idx2); + long idx = + __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0xFFFF, vl), vl); + if (idx < 0) { + vlOut = vl; + vuint16m2_t n = + simdutf_byteflip(__riscv_vncvt_x_x_w_u16m2(v, vlOut), vlOut); + __riscv_vse16_v_u16m2((uint16_t *)dst, n, vlOut); + continue; } + vlOut = rvv_utf32_store_utf16_m4((uint16_t *)dst, v, vl, m4even); } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; + return result(error_code::SUCCESS, dst - dstBeg); } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16le(buf, len, utf16_output); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + result res = convert_utf32_to_utf16le_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16be(buf, len, utf16_output); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + result res = convert_utf32_to_utf16be_with_errors(src, len, dst); + return res.error == error_code::SUCCESS ? res.count : 0; } -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, - utf32_output); - if (!std::get<2>(ret)) { - return 0; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + return rvv_convert_utf32_to_utf16_with_errors( + src, len, dst); } -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, utf32_output); - if (!std::get<2>(ret)) { - return 0; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_convert_utf32_to_utf16_with_errors( + src, len, dst); + else + return rvv_convert_utf32_to_utf16_with_errors(src, len, + dst); } -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, - utf32_output); - if (!std::get<2>(ret)) { - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - scalar_res.count += (std::get<0>(ret) - buf); - return scalar_res; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_res.error) { - scalar_res.count += (std::get<0>(ret) - buf); - return scalar_res; - } else { - scalar_res.count += saved_bytes; - return scalar_res; +template +simdutf_really_inline static size_t +rvv_convert_valid_utf32_to_utf16(const char32_t *src, size_t len, + char16_t *dst) { + size_t vl8m2 = __riscv_vsetvlmax_e8m2(); + vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( + __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); + char16_t *dstBeg = dst; + for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { + vl = __riscv_vsetvl_e32m4(len); + vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t *)src, vl); + if (__riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0xFFFF, vl), vl) < + 0) { + vlOut = vl; + vuint16m2_t n = + simdutf_byteflip(__riscv_vncvt_x_x_w_u16m2(v, vlOut), vlOut); + __riscv_vse16_v_u16m2((uint16_t *)dst, n, vlOut); + continue; } + vlOut = rvv_utf32_store_utf16_m4((uint16_t *)dst, v, vl, m4even); } - return simdutf::result(simdutf::SUCCESS, saved_bytes); + return dst - dstBeg; } -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, utf32_output); - if (!std::get<2>(ret)) { - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - scalar_res.count += (std::get<0>(ret) - buf); - return scalar_res; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_res.error) { - scalar_res.count += (std::get<0>(ret) - buf); - return scalar_res; - } else { - scalar_res.count += saved_bytes; - return scalar_res; - } - } - return simdutf::result(simdutf::SUCCESS, saved_bytes); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + return rvv_convert_valid_utf32_to_utf16(src, len, + dst); } -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, - utf32_output); - if (!std::get<2>(ret)) { - return 0; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *src, size_t len, char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_convert_valid_utf32_to_utf16(src, len, + dst); + else + return rvv_convert_valid_utf32_to_utf16(src, len, dst); } +/* end file src/rvv/rvv_utf32_to.inl.cpp */ +/* begin file src/rvv/rvv_utf8_to.inl.cpp */ +template +simdutf_really_inline static size_t rvv_utf8_to_common(char const *src, + size_t len, Tdst *dst) { + static_assert(std::is_same() || + std::is_same(), + "invalid type"); + constexpr bool is16 = std::is_same(); + constexpr endianness endian = + bflip == simdutf_ByteFlip::NONE ? endianness::LITTLE : endianness::BIG; + const auto scalar = [](char const *in, size_t count, Tdst *out) { + return is16 ? scalar::utf8_to_utf16::convert(in, count, + (char16_t *)out) + : scalar::utf8_to_utf32::convert(in, count, (char32_t *)out); + }; -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, utf32_output); - if (!std::get<2>(ret)) { - return 0; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_saved_bytes == 0) { + if (len < 32) + return scalar(src, len, dst); + + /* validate first three bytes */ + if (validate) { + size_t idx = 3; + while (idx < len && (src[idx] >> 6) == 0b10) + ++idx; + if (idx > 3 + 3 || !scalar::utf8::validate(src, idx)) return 0; - } - saved_bytes += scalar_saved_bytes; } - return saved_bytes; -} -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - size_t pos = 0; - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - while (pos + 32 <= length) { - __m512i utf16 = _mm512_loadu_si512((const __m512i *)(input + pos)); - utf16 = _mm512_shuffle_epi8(utf16, byteflip); - _mm512_storeu_si512(output + pos, utf16); - pos += 32; - } - if (pos < length) { - __mmask32 m((1U << (length - pos)) - 1); - __m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i *)(input + pos)); - utf16 = _mm512_shuffle_epi8(utf16, byteflip); - _mm512_mask_storeu_epi16(output + pos, m, utf16); - } -} + size_t tail = 3; + size_t n = len - tail; + Tdst *beg = dst; -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - const char16_t *ptr = input; - size_t count{0}; + static const uint64_t err1m[] = {0x0202020202020202, 0x4915012180808080}; + static const uint64_t err2m[] = {0xCBCBCB8B8383A3E7, 0xCBCBDBCBCBCBCBCB}; + static const uint64_t err3m[] = {0x0101010101010101, 0X01010101BABAAEE6}; - if (length >= 32) { - const char16_t *end = input + length - 32; + const vuint8m1_t err1tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err1m, 2)); + const vuint8m1_t err2tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err2m, 2)); + const vuint8m1_t err3tbl = + __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err3m, 2)); - const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00); - const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff); + size_t vl8m2 = __riscv_vsetvlmax_e8m2(); + vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( + __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); - while (ptr <= end) { - __m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr); - ptr += 32; - uint64_t not_high_surrogate = - static_cast(_mm512_cmpgt_epu16_mask(utf16, high) | - _mm512_cmplt_epu16_mask(utf16, low)); - count += count_ones(not_high_surrogate); + for (size_t vl, vlOut; n > 0; n -= vl, src += vl, dst += vlOut) { + vl = __riscv_vsetvl_e8m2(n); + + vuint8m2_t v0 = __riscv_vle8_v_u8m2((uint8_t const *)src, vl); + uint64_t max = __riscv_vmv_x_s_u8m1_u8( + __riscv_vredmaxu_vs_u8m2_u8m1(v0, __riscv_vmv_s_x_u8m1(0, vl), vl)); + + uint8_t next0 = src[vl + 0]; + uint8_t next1 = src[vl + 1]; + uint8_t next2 = src[vl + 2]; + + /* fast path: ASCII */ + if ((max | next0 | next1 | next2) < 0b10000000) { + vlOut = vl; + if (is16) + __riscv_vse16_v_u16m4( + (uint16_t *)dst, + simdutf_byteflip(__riscv_vzext_vf2_u16m4(v0, vlOut), vlOut), + vlOut); + else + __riscv_vse32_v_u32m8((uint32_t *)dst, + __riscv_vzext_vf4_u32m8(v0, vlOut), vlOut); + continue; } - } - return count + scalar::utf16::count_code_points( - ptr, length - (ptr - input)); -} + /* see "Validating UTF-8 In Less Than One Instruction Per Byte" + * https://arxiv.org/abs/2010.03090 */ + vuint8m2_t v1 = __riscv_vslide1down_vx_u8m2(v0, next0, vl); + vuint8m2_t v2 = __riscv_vslide1down_vx_u8m2(v1, next1, vl); + vuint8m2_t v3 = __riscv_vslide1down_vx_u8m2(v2, next2, vl); -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - const char16_t *ptr = input; - size_t count{0}; - if (length >= 32) { + if (validate) { + vuint8m2_t s1 = __riscv_vreinterpret_v_u16m2_u8m2(__riscv_vsrl_vx_u16m2( + __riscv_vreinterpret_v_u8m2_u16m2(v2), 4, __riscv_vsetvlmax_e16m2())); + vuint8m2_t s3 = __riscv_vreinterpret_v_u16m2_u8m2(__riscv_vsrl_vx_u16m2( + __riscv_vreinterpret_v_u8m2_u16m2(v3), 4, __riscv_vsetvlmax_e16m2())); - const char16_t *end = input + length - 32; + vuint8m2_t idx2 = __riscv_vand_vx_u8m2(v2, 0xF, vl); + vuint8m2_t idx1 = __riscv_vand_vx_u8m2(s1, 0xF, vl); + vuint8m2_t idx3 = __riscv_vand_vx_u8m2(s3, 0xF, vl); - const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00); - const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff); + vuint8m2_t err1 = simdutf_vrgather_u8m1x2(err1tbl, idx1); + vuint8m2_t err2 = simdutf_vrgather_u8m1x2(err2tbl, idx2); + vuint8m2_t err3 = simdutf_vrgather_u8m1x2(err3tbl, idx3); + vint8m2_t errs = __riscv_vreinterpret_v_u8m2_i8m2( + __riscv_vand_vv_u8m2(__riscv_vand_vv_u8m2(err1, err2, vl), err3, vl)); - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - while (ptr <= end) { - __m512i utf16 = - _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)ptr), byteflip); - ptr += 32; - uint64_t not_high_surrogate = - static_cast(_mm512_cmpgt_epu16_mask(utf16, high) | - _mm512_cmplt_epu16_mask(utf16, low)); - count += count_ones(not_high_surrogate); + vbool4_t is_3 = __riscv_vmsgtu_vx_u8m2_b4(v1, 0b11100000 - 1, vl); + vbool4_t is_4 = __riscv_vmsgtu_vx_u8m2_b4(v0, 0b11110000 - 1, vl); + vbool4_t is_34 = __riscv_vmor_mm_b4(is_3, is_4, vl); + vbool4_t err34 = + __riscv_vmxor_mm_b4(is_34, __riscv_vmslt_vx_i8m2_b4(errs, 0, vl), vl); + vbool4_t errm = + __riscv_vmor_mm_b4(__riscv_vmsgt_vx_i8m2_b4(errs, 0, vl), err34, vl); + if (__riscv_vfirst_m_b4(errm, vl) >= 0) + return 0; } - } - return count + scalar::utf16::count_code_points( - ptr, length - (ptr - input)); -} + /* decoding */ -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - const uint8_t *str = reinterpret_cast(input); - size_t answer = - length / sizeof(__m512i) * - sizeof(__m512i); // Number of 512-bit chunks that fits into the length. - size_t i = 0; - __m512i unrolled_popcount{0}; + /* mask of non continuation bytes */ + vbool4_t m = + __riscv_vmsgt_vx_i8m2_b4(__riscv_vreinterpret_v_u8m2_i8m2(v0), -65, vl); + vlOut = __riscv_vcpop_m_b4(m, vl); - const __m512i continuation = _mm512_set1_epi8(char(0b10111111)); + /* extract first and second bytes */ + vuint8m2_t b1 = __riscv_vcompress_vm_u8m2(v0, m, vl); + vuint8m2_t b2 = __riscv_vcompress_vm_u8m2(v1, m, vl); - while (i + sizeof(__m512i) <= length) { - size_t iterations = (length - i) / sizeof(__m512i); + /* fast path: one and two byte */ + if (max < 0b11100000) { + b2 = __riscv_vand_vx_u8m2(b2, 0b00111111, vlOut); - size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i); - for (; i + 8 * sizeof(__m512i) <= max_i; i += 8 * sizeof(__m512i)) { - __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i)); - __m512i input2 = - _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i))); - __m512i input3 = - _mm512_loadu_si512((const __m512i *)(str + i + 2 * sizeof(__m512i))); - __m512i input4 = - _mm512_loadu_si512((const __m512i *)(str + i + 3 * sizeof(__m512i))); - __m512i input5 = - _mm512_loadu_si512((const __m512i *)(str + i + 4 * sizeof(__m512i))); - __m512i input6 = - _mm512_loadu_si512((const __m512i *)(str + i + 5 * sizeof(__m512i))); - __m512i input7 = - _mm512_loadu_si512((const __m512i *)(str + i + 6 * sizeof(__m512i))); - __m512i input8 = - _mm512_loadu_si512((const __m512i *)(str + i + 7 * sizeof(__m512i))); + vbool4_t m1 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b10111111, vlOut); + b1 = __riscv_vand_vx_u8m2_mu(m1, b1, b1, 63, vlOut); - __mmask64 mask1 = _mm512_cmple_epi8_mask(input1, continuation); - __mmask64 mask2 = _mm512_cmple_epi8_mask(input2, continuation); - __mmask64 mask3 = _mm512_cmple_epi8_mask(input3, continuation); - __mmask64 mask4 = _mm512_cmple_epi8_mask(input4, continuation); - __mmask64 mask5 = _mm512_cmple_epi8_mask(input5, continuation); - __mmask64 mask6 = _mm512_cmple_epi8_mask(input6, continuation); - __mmask64 mask7 = _mm512_cmple_epi8_mask(input7, continuation); - __mmask64 mask8 = _mm512_cmple_epi8_mask(input8, continuation); + vuint16m4_t b12 = __riscv_vwmulu_vv_u16m4( + b1, + __riscv_vmerge_vxm_u8m2(__riscv_vmv_v_x_u8m2(1, vlOut), 1 << 6, m1, + vlOut), + vlOut); + b12 = __riscv_vwaddu_wv_u16m4_mu(m1, b12, b12, b2, vlOut); + if (is16) + __riscv_vse16_v_u16m4((uint16_t *)dst, + simdutf_byteflip(b12, vlOut), vlOut); + else + __riscv_vse32_v_u32m8((uint32_t *)dst, + __riscv_vzext_vf2_u32m8(b12, vlOut), vlOut); + continue; + } - __m512i mask_register = _mm512_set_epi64(mask8, mask7, mask6, mask5, - mask4, mask3, mask2, mask1); + /* fast path: one, two and three byte */ + if (max < 0b11110000) { + vuint8m2_t b3 = __riscv_vcompress_vm_u8m2(v2, m, vl); - unrolled_popcount = _mm512_add_epi64(unrolled_popcount, - _mm512_popcnt_epi64(mask_register)); - } + b2 = __riscv_vand_vx_u8m2(b2, 0b00111111, vlOut); + b3 = __riscv_vand_vx_u8m2(b3, 0b00111111, vlOut); - for (; i <= max_i; i += sizeof(__m512i)) { - __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i)); - uint64_t continuation_bitmask = static_cast( - _mm512_cmple_epi8_mask(more_input, continuation)); - answer -= count_ones(continuation_bitmask); + vbool4_t m1 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b10111111, vlOut); + vbool4_t m3 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b11011111, vlOut); + + vuint8m2_t t1 = __riscv_vand_vx_u8m2_mu(m1, b1, b1, 63, vlOut); + b1 = __riscv_vand_vx_u8m2_mu(m3, t1, b1, 15, vlOut); + + vuint16m4_t b12 = __riscv_vwmulu_vv_u16m4( + b1, + __riscv_vmerge_vxm_u8m2(__riscv_vmv_v_x_u8m2(1, vlOut), 1 << 6, m1, + vlOut), + vlOut); + b12 = __riscv_vwaddu_wv_u16m4_mu(m1, b12, b12, b2, vlOut); + vuint16m4_t b123 = __riscv_vwaddu_wv_u16m4_mu( + m3, b12, __riscv_vsll_vx_u16m4_mu(m3, b12, b12, 6, vlOut), b3, vlOut); + if (is16) + __riscv_vse16_v_u16m4((uint16_t *)dst, + simdutf_byteflip(b123, vlOut), vlOut); + else + __riscv_vse32_v_u32m8((uint32_t *)dst, + __riscv_vzext_vf2_u32m8(b123, vlOut), vlOut); + continue; } - } - __m256i first_half = _mm512_extracti64x4_epi64(unrolled_popcount, 0); - __m256i second_half = _mm512_extracti64x4_epi64(unrolled_popcount, 1); - answer -= (size_t)_mm256_extract_epi64(first_half, 0) + - (size_t)_mm256_extract_epi64(first_half, 1) + - (size_t)_mm256_extract_epi64(first_half, 2) + - (size_t)_mm256_extract_epi64(first_half, 3) + - (size_t)_mm256_extract_epi64(second_half, 0) + - (size_t)_mm256_extract_epi64(second_half, 1) + - (size_t)_mm256_extract_epi64(second_half, 2) + - (size_t)_mm256_extract_epi64(second_half, 3); + /* extract third and fourth bytes */ + vuint8m2_t b3 = __riscv_vcompress_vm_u8m2(v2, m, vl); + vuint8m2_t b4 = __riscv_vcompress_vm_u8m2(v3, m, vl); + + /* remove prefix from leading bytes + * + * We could also use vrgather here, but it increases register pressure, + * and its performance varies widely on current platforms. It might be + * worth reconsidering, though, once there is more hardware available. + * Same goes for the __riscv_vsrl_vv_u32m4 correction step. + * + * We shift left and then right by the number of bytes in the prefix, + * which can be calculated as follows: + * x max(x-10, 0) + * 0xxx -> 0000-0111 -> sift by 0 or 1 -> 0 + * 10xx -> 1000-1011 -> don't care + * 110x -> 1100,1101 -> sift by 3 -> 2,3 + * 1110 -> 1110 -> sift by 4 -> 4 + * 1111 -> 1111 -> sift by 5 -> 5 + * + * vssubu.vx v, 10, (max(x-10, 0)) almost gives us what we want, we + * just need to manually detect and handle the one special case: + */ +#define SIMDUTF_RVV_UTF8_TO_COMMON_M1(idx) \ + vuint8m1_t c1 = __riscv_vget_v_u8m2_u8m1(b1, idx); \ + vuint8m1_t c2 = __riscv_vget_v_u8m2_u8m1(b2, idx); \ + vuint8m1_t c3 = __riscv_vget_v_u8m2_u8m1(b3, idx); \ + vuint8m1_t c4 = __riscv_vget_v_u8m2_u8m1(b4, idx); \ + /* remove prefix from trailing bytes */ \ + c2 = __riscv_vand_vx_u8m1(c2, 0b00111111, vlOut); \ + c3 = __riscv_vand_vx_u8m1(c3, 0b00111111, vlOut); \ + c4 = __riscv_vand_vx_u8m1(c4, 0b00111111, vlOut); \ + vuint8m1_t shift = __riscv_vsrl_vx_u8m1(c1, 4, vlOut); \ + shift = __riscv_vmerge_vxm_u8m1(__riscv_vssubu_vx_u8m1(shift, 10, vlOut), 3, \ + __riscv_vmseq_vx_u8m1_b8(shift, 12, vlOut), \ + vlOut); \ + c1 = __riscv_vsll_vv_u8m1(c1, shift, vlOut); \ + c1 = __riscv_vsrl_vv_u8m1(c1, shift, vlOut); \ + /* unconditionally widen and combine to c1234 */ \ + vuint16m2_t c34 = __riscv_vwaddu_wv_u16m2( \ + __riscv_vwmulu_vx_u16m2(c3, 1 << 6, vlOut), c4, vlOut); \ + vuint16m2_t c12 = __riscv_vwaddu_wv_u16m2( \ + __riscv_vwmulu_vx_u16m2(c1, 1 << 6, vlOut), c2, vlOut); \ + vuint32m4_t c1234 = __riscv_vwaddu_wv_u32m4( \ + __riscv_vwmulu_vx_u32m4(c12, 1 << 12, vlOut), c34, vlOut); \ + /* derive required right-shift amount from `shift` to reduce \ + * c1234 to the required number of bytes */ \ + c1234 = __riscv_vsrl_vv_u32m4( \ + c1234, \ + __riscv_vzext_vf4_u32m4( \ + __riscv_vmul_vx_u8m1( \ + __riscv_vrsub_vx_u8m1(__riscv_vssubu_vx_u8m1(shift, 2, vlOut), \ + 3, vlOut), \ + 6, vlOut), \ + vlOut), \ + vlOut); \ + /* store result in desired format */ \ + if (is16) \ + vlDst = rvv_utf32_store_utf16_m4((uint16_t *)dst, c1234, vlOut, \ + m4even); \ + else \ + vlDst = vlOut, __riscv_vse32_v_u32m4((uint32_t *)dst, c1234, vlOut); - return answer + scalar::utf8::count_code_points( - reinterpret_cast(str + i), length - i); -} + /* Unrolling this manually reduces register pressure and allows + * us to terminate early. */ + { + size_t vlOutm2 = vlOut, vlDst; + vlOut = __riscv_vsetvl_e8m1(vlOut); + SIMDUTF_RVV_UTF8_TO_COMMON_M1(0) + if (vlOutm2 == vlOut) { + vlOut = vlDst; + continue; + } -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return count_utf8(buf, len); -} + dst += vlDst; + vlOut = vlOutm2 - vlOut; + } + { + size_t vlDst; + SIMDUTF_RVV_UTF8_TO_COMMON_M1(1) + vlOut = vlDst; + } -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t length) const noexcept { - return scalar::utf16::latin1_length_from_utf16(length); -} +#undef SIMDUTF_RVV_UTF8_TO_COMMON_M1 + } -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t length) const noexcept { - return scalar::utf32::latin1_length_from_utf32(length); + /* validate the last character and reparse it + tail */ + if (len > tail) { + if ((src[0] >> 6) == 0b10) + --dst; + while ((src[0] >> 6) == 0b10 && tail < len) + --src, ++tail; + if (is16) { + /* go back one more, when on high surrogate */ + if (simdutf_byteflip((uint16_t)dst[-1]) >= 0xD800 && + simdutf_byteflip((uint16_t)dst[-1]) <= 0xDBFF) + --dst; + } + } + size_t ret = scalar(src, tail, dst); + if (ret == 0) + return 0; + return (size_t)(dst - beg) + ret; } -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - const char16_t *ptr = input; - size_t count{0}; - if (length >= 32) { - const char16_t *end = input + length - 32; - - const __m512i v_007f = _mm512_set1_epi16((uint16_t)0x007f); - const __m512i v_07ff = _mm512_set1_epi16((uint16_t)0x07ff); - const __m512i v_dfff = _mm512_set1_epi16((uint16_t)0xdfff); - const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800); +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *src, size_t len, char *dst) const noexcept { + const char *beg = dst; + uint8_t last = 0; + for (size_t vl, vlOut; len > 0; + len -= vl, src += vl, dst += vlOut, last = src[-1]) { + vl = __riscv_vsetvl_e8m2(len); + vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); + // check which bytes are ASCII + vbool4_t ascii = __riscv_vmsltu_vx_u8m2_b4(v1, 0b10000000, vl); + // count ASCII bytes + vlOut = __riscv_vcpop_m_b4(ascii, vl); + // The original code would only enter the next block after this check: + // vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); + // vlOut = __riscv_vcpop_m_b4(m, vl); + // if (vlOut != vl || last > 0b01111111) {...}q + // So that everything is ASCII or continuation bytes, we just proceeded + // without any processing, going straight to __riscv_vse8_v_u8m2. + // But you need the __riscv_vslide1up_vx_u8m2 whenever there is a non-ASCII + // byte. + if (vlOut != vl) { // If not pure ASCII + // Non-ASCII characters + // We now want to mark the ascii and continuation bytes + vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); + // We count them, that's our new vlOut (output vector length) + vlOut = __riscv_vcpop_m_b4(m, vl); - while (ptr <= end) { - __m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr); - ptr += 32; - __mmask32 ascii_bitmask = _mm512_cmple_epu16_mask(utf16, v_007f); - __mmask32 two_bytes_bitmask = - _mm512_mask_cmple_epu16_mask(~ascii_bitmask, utf16, v_07ff); - __mmask32 not_one_two_bytes = ~(ascii_bitmask | two_bytes_bitmask); - __mmask32 surrogates_bitmask = - _mm512_mask_cmple_epu16_mask(not_one_two_bytes, utf16, v_dfff) & - _mm512_mask_cmpge_epu16_mask(not_one_two_bytes, utf16, v_d800); + vuint8m2_t v0 = __riscv_vslide1up_vx_u8m2(v1, last, vl); - size_t ascii_count = count_ones(ascii_bitmask); - size_t two_bytes_count = count_ones(two_bytes_bitmask); - size_t surrogate_bytes_count = count_ones(surrogates_bitmask); - size_t three_bytes_count = - 32 - ascii_count - two_bytes_count - surrogate_bytes_count; + vbool4_t leading0 = __riscv_vmsgtu_vx_u8m2_b4(v0, 0b10111111, vl); + vbool4_t trailing1 = __riscv_vmslt_vx_i8m2_b4( + __riscv_vreinterpret_v_u8m2_i8m2(v1), (uint8_t)0b11000000, vl); + // -62 i 0b11000010, so we check whether any of v0 is too big + vbool4_t tobig = __riscv_vmand_mm_b4( + leading0, + __riscv_vmsgtu_vx_u8m2_b4(__riscv_vxor_vx_u8m2(v0, (uint8_t)-62, vl), + 1, vl), + vl); + if (__riscv_vfirst_m_b4( + __riscv_vmor_mm_b4( + tobig, __riscv_vmxor_mm_b4(leading0, trailing1, vl), vl), + vl) >= 0) + return 0; - count += ascii_count + 2 * two_bytes_count + 3 * three_bytes_count + - 2 * surrogate_bytes_count; + v1 = __riscv_vor_vx_u8m2_mu(__riscv_vmseq_vx_u8m2_b4(v0, 0b11000011, vl), + v1, v1, 0b01000000, vl); + v1 = __riscv_vcompress_vm_u8m2(v1, m, vl); + } else if (last >= 0b11000000) { // If last byte is a leading byte and we + // got only ASCII, error! + return 0; } + __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); } - - return count + scalar::utf16::utf8_length_from_utf16( - ptr, length - (ptr - input)); + if (last > 0b10111111) + return 0; + return dst - beg; } -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - const char16_t *ptr = input; - size_t count{0}; - - if (length >= 32) { - const char16_t *end = input + length - 32; - - const __m512i v_007f = _mm512_set1_epi16((uint16_t)0x007f); - const __m512i v_07ff = _mm512_set1_epi16((uint16_t)0x07ff); - const __m512i v_dfff = _mm512_set1_epi16((uint16_t)0xdfff); - const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800); - - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - while (ptr <= end) { - __m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr); - utf16 = _mm512_shuffle_epi8(utf16, byteflip); - ptr += 32; - __mmask32 ascii_bitmask = _mm512_cmple_epu16_mask(utf16, v_007f); - __mmask32 two_bytes_bitmask = - _mm512_mask_cmple_epu16_mask(~ascii_bitmask, utf16, v_07ff); - __mmask32 not_one_two_bytes = ~(ascii_bitmask | two_bytes_bitmask); - __mmask32 surrogates_bitmask = - _mm512_mask_cmple_epu16_mask(not_one_two_bytes, utf16, v_dfff) & - _mm512_mask_cmpge_epu16_mask(not_one_two_bytes, utf16, v_d800); +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *src, size_t len, char *dst) const noexcept { + size_t res = convert_utf8_to_latin1(src, len, dst); + if (res) + return result(error_code::SUCCESS, res); + return scalar::utf8_to_latin1::convert_with_errors(src, len, dst); +} - size_t ascii_count = count_ones(ascii_bitmask); - size_t two_bytes_count = count_ones(two_bytes_bitmask); - size_t surrogate_bytes_count = count_ones(surrogates_bitmask); - size_t three_bytes_count = - 32 - ascii_count - two_bytes_count - surrogate_bytes_count; - count += ascii_count + 2 * two_bytes_count + 3 * three_bytes_count + - 2 * surrogate_bytes_count; +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *src, size_t len, char *dst) const noexcept { + const char *beg = dst; + uint8_t last = 0; + for (size_t vl, vlOut; len > 0; + len -= vl, src += vl, dst += vlOut, last = src[-1]) { + vl = __riscv_vsetvl_e8m2(len); + vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); + vbool4_t ascii = __riscv_vmsltu_vx_u8m2_b4(v1, 0b10000000, vl); + vlOut = __riscv_vcpop_m_b4(ascii, vl); + if (vlOut != vl) { // If not pure ASCII + vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); + vlOut = __riscv_vcpop_m_b4(m, vl); + vuint8m2_t v0 = __riscv_vslide1up_vx_u8m2(v1, last, vl); + v1 = __riscv_vor_vx_u8m2_mu(__riscv_vmseq_vx_u8m2_b4(v0, 0b11000011, vl), + v1, v1, 0b01000000, vl); + v1 = __riscv_vcompress_vm_u8m2(v1, m, vl); } + __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); } - - return count + scalar::utf16::utf8_length_from_utf16( - ptr, length - (ptr - input)); + return dst - beg; } -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return implementation::count_utf16le(input, length); +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *src, size_t len, char16_t *dst) const noexcept { + return rvv_utf8_to_common(src, len, + (uint16_t *)dst); } -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return implementation::count_utf16be(input, length); +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *src, size_t len, char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf8_to_common( + src, len, (uint16_t *)dst); + else + return rvv_utf8_to_common(src, len, + (uint16_t *)dst); } -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *src, size_t len, char16_t *dst) const noexcept { + size_t res = convert_utf8_to_utf16le(src, len, dst); + if (res) + return result(error_code::SUCCESS, res); + return scalar::utf8_to_utf16::convert_with_errors( + src, len, dst); } -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *src, size_t len, char16_t *dst) const noexcept { + size_t res = convert_utf8_to_utf16be(src, len, dst); + if (res) + return result(error_code::SUCCESS, res); + return scalar::utf8_to_utf16::convert_with_errors(src, len, + dst); } -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t length) const noexcept { - const uint8_t *str = reinterpret_cast(input); - size_t answer = length / sizeof(__m512i) * sizeof(__m512i); - size_t i = 0; - if (answer >= 2048) { // long strings optimization - unsigned char v_0xFF = 0xff; - __m512i eight_64bits = _mm512_setzero_si512(); - while (i + sizeof(__m512i) <= length) { - __m512i runner = _mm512_setzero_si512(); - size_t iterations = (length - i) / sizeof(__m512i); - if (iterations > 255) { - iterations = 255; - } - size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i); - for (; i + 4 * sizeof(__m512i) <= max_i; i += 4 * sizeof(__m512i)) { - // Load four __m512i vectors - __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i)); - __m512i input2 = - _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i))); - __m512i input3 = _mm512_loadu_si512( - (const __m512i *)(str + i + 2 * sizeof(__m512i))); - __m512i input4 = _mm512_loadu_si512( - (const __m512i *)(str + i + 3 * sizeof(__m512i))); - - // Generate four masks - __mmask64 mask1 = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input1); - __mmask64 mask2 = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input2); - __mmask64 mask3 = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input3); - __mmask64 mask4 = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input4); - // Apply the masks and subtract from the runner - __m512i not_ascii1 = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask1, v_0xFF); - __m512i not_ascii2 = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask2, v_0xFF); - __m512i not_ascii3 = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask3, v_0xFF); - __m512i not_ascii4 = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask4, v_0xFF); - - runner = _mm512_sub_epi8(runner, not_ascii1); - runner = _mm512_sub_epi8(runner, not_ascii2); - runner = _mm512_sub_epi8(runner, not_ascii3); - runner = _mm512_sub_epi8(runner, not_ascii4); - } - - for (; i <= max_i; i += sizeof(__m512i)) { - __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i)); - - __mmask64 mask = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), more_input); - __m512i not_ascii = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask, v_0xFF); - runner = _mm512_sub_epi8(runner, not_ascii); - } - - eight_64bits = _mm512_add_epi64( - eight_64bits, _mm512_sad_epu8(runner, _mm512_setzero_si512())); - } - - __m256i first_half = _mm512_extracti64x4_epi64(eight_64bits, 0); - __m256i second_half = _mm512_extracti64x4_epi64(eight_64bits, 1); - answer += (size_t)_mm256_extract_epi64(first_half, 0) + - (size_t)_mm256_extract_epi64(first_half, 1) + - (size_t)_mm256_extract_epi64(first_half, 2) + - (size_t)_mm256_extract_epi64(first_half, 3) + - (size_t)_mm256_extract_epi64(second_half, 0) + - (size_t)_mm256_extract_epi64(second_half, 1) + - (size_t)_mm256_extract_epi64(second_half, 2) + - (size_t)_mm256_extract_epi64(second_half, 3); - } else if (answer > 0) { - for (; i + sizeof(__m512i) <= length; i += sizeof(__m512i)) { - __m512i latin = _mm512_loadu_si512((const __m512i *)(str + i)); - uint64_t non_ascii = _mm512_movepi8_mask(latin); - answer += count_ones(non_ascii); - } - } - return answer + scalar::latin1::utf8_length_from_latin1( - reinterpret_cast(str + i), length - i); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *src, size_t len, char16_t *dst) const noexcept { + return rvv_utf8_to_common( + src, len, (uint16_t *)dst); } -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= length; pos += 64) { - __m512i utf8 = _mm512_loadu_si512((const __m512i *)(input + pos)); - uint64_t utf8_continuation_mask = - _mm512_cmplt_epi8_mask(utf8, _mm512_set1_epi8(-65 + 1)); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - uint64_t utf8_4byte = - _mm512_cmpge_epu8_mask(utf8, _mm512_set1_epi8(int8_t(240))); - count += count_ones(utf8_4byte); - } - return count + - scalar::utf8::utf16_length_from_utf8(input + pos, length - pos); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *src, size_t len, char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_utf8_to_common( + src, len, (uint16_t *)dst); + else + return rvv_utf8_to_common( + src, len, (uint16_t *)dst); } -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const char32_t *ptr = input; - size_t count{0}; - - if (length >= 16) { - const char32_t *end = input + length - 16; +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *src, size_t len, char32_t *dst) const noexcept { + return rvv_utf8_to_common(src, len, + (uint32_t *)dst); +} - const __m512i v_0000_007f = _mm512_set1_epi32((uint32_t)0x7f); - const __m512i v_0000_07ff = _mm512_set1_epi32((uint32_t)0x7ff); - const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff); +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *src, size_t len, char32_t *dst) const noexcept { + size_t res = convert_utf8_to_utf32(src, len, dst); + if (res) + return result(error_code::SUCCESS, res); + return scalar::utf8_to_utf32::convert_with_errors(src, len, dst); +} - while (ptr <= end) { - __m512i utf32 = _mm512_loadu_si512((const __m512i *)ptr); - ptr += 16; - __mmask16 ascii_bitmask = _mm512_cmple_epu32_mask(utf32, v_0000_007f); - __mmask16 two_bytes_bitmask = _mm512_mask_cmple_epu32_mask( - _knot_mask16(ascii_bitmask), utf32, v_0000_07ff); - __mmask16 three_bytes_bitmask = _mm512_mask_cmple_epu32_mask( - _knot_mask16(_mm512_kor(ascii_bitmask, two_bytes_bitmask)), utf32, - v_0000_ffff); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *src, size_t len, char32_t *dst) const noexcept { + return rvv_utf8_to_common( + src, len, (uint32_t *)dst); +} +/* end file src/rvv/rvv_utf8_to.inl.cpp */ - size_t ascii_count = count_ones(ascii_bitmask); - size_t two_bytes_count = count_ones(two_bytes_bitmask); - size_t three_bytes_count = count_ones(three_bytes_bitmask); - size_t four_bytes_count = - 16 - ascii_count - two_bytes_count - three_bytes_count; - count += ascii_count + 2 * two_bytes_count + 3 * three_bytes_count + - 4 * four_bytes_count; - } +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + if (bom_encoding != encoding_type::unspecified) + return bom_encoding; + // todo: reimplement as a one-pass algorithm. + int out = 0; + if (validate_utf8(input, length)) + out |= encoding_type::UTF8; + if (length % 2 == 0) { + if (validate_utf16(reinterpret_cast(input), length / 2)) + out |= encoding_type::UTF16_LE; + } + if (length % 4 == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) + out |= encoding_type::UTF32_LE; } - return count + - scalar::utf32::utf8_length_from_utf32(ptr, length - (ptr - input)); + return out; } -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const char32_t *ptr = input; - size_t count{0}; - - if (length >= 16) { - const char32_t *end = input + length - 16; - - const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff); - - while (ptr <= end) { - __m512i utf32 = _mm512_loadu_si512((const __m512i *)ptr); - ptr += 16; - __mmask16 surrogates_bitmask = - _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff); - - count += 16 + count_ones(surrogates_bitmask); - } +template +simdutf_really_inline static void +rvv_change_endianness_utf16(const char16_t *src, size_t len, char16_t *dst) { + for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { + vl = __riscv_vsetvl_e16m8(len); + vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); + __riscv_vse16_v_u16m8((uint16_t *)dst, simdutf_byteflip(v, vl), vl); } - - return count + - scalar::utf32::utf16_length_from_utf32(ptr, length - (ptr - input)); } -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return implementation::count_utf8(input, length); +void implementation::change_endianness_utf16(const char16_t *src, size_t len, + char16_t *dst) const noexcept { + if (supports_zvbb()) + return rvv_change_endianness_utf16(src, len, dst); + else + return rvv_change_endianness_utf16(src, len, dst); } simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( @@ -25294,21 +37828,86 @@ simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - return (options & base64_url) - ? compress_decode_base64(output, input, length, options, - last_chunk_options) - : compress_decode_base64(output, input, length, options, - last_chunk_options); + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + return {SUCCESS, 0}; + } + result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + } + return r; } simdutf_warn_unused full_result implementation::base64_to_binary_details( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - return (options & base64_url) - ? compress_decode_base64(output, input, length, options, - last_chunk_options) - : compress_decode_base64(output, input, length, options, - last_chunk_options); + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + return r; } simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( @@ -25319,21 +37918,86 @@ simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - return (options & base64_url) - ? compress_decode_base64(output, input, length, options, - last_chunk_options) - : compress_decode_base64(output, input, length, options, - last_chunk_options); + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + auto equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + return {SUCCESS, 0}; + } + result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation}; + } + } + return r; } simdutf_warn_unused full_result implementation::base64_to_binary_details( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - return (options & base64_url) - ? compress_decode_base64(output, input, length, options, - last_chunk_options) - : compress_decode_base64(output, input, length, options, - last_chunk_options); + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + return r; } simdutf_warn_unused size_t implementation::base64_length_from_binary( @@ -25344,56 +38008,38 @@ simdutf_warn_unused size_t implementation::base64_length_from_binary( size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { - if (options & base64_url) { - return encode_base64(output, input, length, options); - } else { - return encode_base64(output, input, length, options); - } + return scalar::base64::tail_encode_base64(output, input, length, options); } - -} // namespace icelake +} // namespace rvv } // namespace simdutf -/* begin file src/simdutf/icelake/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +/* begin file src/simdutf/rvv/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_RVV // nothing needed. #else SIMDUTF_UNTARGET_REGION #endif - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_POP_DISABLE_WARNINGS -#endif // end of workaround -/* end file src/simdutf/icelake/end.h */ -/* end file src/icelake/implementation.cpp */ +/* end file src/simdutf/rvv/end.h */ +/* end file src/rvv/implementation.cpp */ #endif -#if SIMDUTF_IMPLEMENTATION_HASWELL -/* begin file src/haswell/implementation.cpp */ - -/* begin file src/simdutf/haswell/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "haswell" -// #define SIMDUTF_IMPLEMENTATION haswell +#if SIMDUTF_IMPLEMENTATION_WESTMERE +/* begin file src/westmere/implementation.cpp */ +/* begin file src/simdutf/westmere/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "westmere" +// #define SIMDUTF_IMPLEMENTATION westmere -#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE // nothing needed. #else -SIMDUTF_TARGET_HASWELL +SIMDUTF_TARGET_WESTMERE #endif - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -// clang-format off -SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) -// clang-format on -#endif // end of workaround -/* end file src/simdutf/haswell/begin.h */ +/* end file src/simdutf/westmere/begin.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { -#ifndef SIMDUTF_HASWELL_H - #error "haswell.h must be included" +#ifndef SIMDUTF_WESTMERE_H + #error "westmere.h must be included" #endif using namespace simd; @@ -25420,13 +38066,90 @@ simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { simd8 is_third_byte = - prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be > 0x80 + prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80 simd8 is_fourth_byte = - prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be > 0x80 + prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80 return simd8(is_third_byte | is_fourth_byte); } -/* begin file src/haswell/avx2_validate_utf16.cpp */ +/* begin file src/westmere/internal/loader.cpp */ +namespace internal { +namespace westmere { + +/* begin file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */ +/* + * reads a vector of uint16 values + * bits after 11th are ignored + * first 11 bits are encoded into utf8 + * !important! utf8_output must have at least 16 writable bytes + */ + +inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, + const __m128i one_byte_bytemask, + const uint16_t one_byte_bitmask) { + // 0b1100_0000_1000_0000 + const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080); + // 0b0001_1111_0000_0000 + const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); + // 0b0000_0000_0011_1111 + const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); + + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + + // t0 = [000a|aaaa|bbbb|bb00] + const __m128i t0 = _mm_slli_epi16(v_u16, 2); + // t1 = [000a|aaaa|0000|0000] + const __m128i t1 = _mm_and_si128(t0, v_1f00); + // t2 = [0000|0000|00bb|bbbb] + const __m128i t2 = _mm_and_si128(v_u16, v_003f); + // t3 = [000a|aaaa|00bb|bbbb] + const __m128i t3 = _mm_or_si128(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m128i t4 = _mm_or_si128(t3, v_c080); + + // 2. merge ASCII and 2-byte codewords + const __m128i utf8_unpacked = _mm_blendv_epi8(t4, v_u16, one_byte_bytemask); + + // 3. prepare bitmask for 8-bit lookup + // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a + // - LSB) + const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a + const uint16_t m1 = static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 + const uint8_t m2 = static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); + + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + + // 6. adjust pointers + utf8_output += row[0]; +} + +inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, + const __m128i v_0000, + const __m128i v_ff80) { + // no bits set above 7th bit + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(v_u16, v_ff80), v_0000); + const uint16_t one_byte_bitmask = + static_cast(_mm_movemask_epi8(one_byte_bytemask)); + + write_v_u16_11bits_to_utf8(v_u16, utf8_output, one_byte_bytemask, + one_byte_bitmask); +} +/* end file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */ + +} // namespace westmere +} // namespace internal +/* end file src/westmere/internal/loader.cpp */ + +/* begin file src/westmere/sse_validate_utf16.cpp */ /* In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. @@ -25447,7 +38170,7 @@ must_be_2_3_continuation(const simd8 prev2, - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - there must not be sole low surrogate nor high surrogate - We're going to build three bitmasks based on the 3rd nibble: + We are going to build three bitmasks based on the 3rd nibble: - V = valid word, - L = low surrogate (0xd800 .. 0xdbff) - H = high surrogate (0xdc00 .. 0xdfff) @@ -25474,7 +38197,7 @@ must_be_2_3_continuation(const simd8 prev2, - nullptr if an error was detected. */ template -const char16_t *avx2_validate_utf16(const char16_t *input, size_t size) { +const char16_t *sse_validate_utf16(const char16_t *input, size_t size) { const char16_t *end = input + size; const auto v_d8 = simd8::splat(0xd8); @@ -25482,13 +38205,13 @@ const char16_t *avx2_validate_utf16(const char16_t *input, size_t size) { const auto v_fc = simd8::splat(0xfc); const auto v_dc = simd8::splat(0xdc); - while (input + simd16::ELEMENTS * 2 < end) { + while (input + simd16::SIZE * 2 < end) { // 0. Load data: since the validation takes into account only higher // byte of each word, we compress the two vectors into one which // consists only the higher bytes. auto in0 = simd16(input); - auto in1 = simd16(input + simd16::ELEMENTS); - + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); if (big_endian) { in0 = in0.swap_bytes(); in1 = in1.swap_bytes(); @@ -25501,9 +38224,10 @@ const char16_t *avx2_validate_utf16(const char16_t *input, size_t size) { // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); - if (surrogates_bitmask == 0x0) { - input += simd16::ELEMENTS * 2; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; } else { // 2. We have some surrogates that have to be distinguished: // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) @@ -25513,35 +38237,36 @@ const char16_t *avx2_validate_utf16(const char16_t *input, size_t size) { // V - non-surrogate code units // V = not surrogates_wordmask - const uint32_t V = ~surrogates_bitmask; + const uint16_t V = static_cast(~surrogates_bitmask); // H - word-mask for high surrogates: the six highest bits are 0b1101'11 const auto vH = (in & v_fc) == v_dc; - const uint32_t H = vH.to_bitmask(); + const uint16_t H = static_cast(vH.to_bitmask()); // L - word mask for low surrogates // L = not H and surrogates_wordmask - const uint32_t L = ~H & surrogates_bitmask; + const uint16_t L = static_cast(~H & surrogates_bitmask); - const uint32_t a = - L & (H >> 1); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint32_t b = - a << 1; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint32_t c = V | a | b; // Combine all the masks into the final one. + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. - if (c == 0xffffffff) { + if (c == 0xffff) { // The whole input register contains valid UTF-16, i.e., // either single code units or proper surrogate pairs. - input += simd16::ELEMENTS * 2; - } else if (c == 0x7fffffff) { - // The 31 lower code units of the input register contains valid UTF-16. - // The 31 word may be either a low or high surrogate. It the next + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next // iteration we 1) check if the low surrogate is followed by a high // one, 2) reject sole high surrogate. - input += simd16::ELEMENTS * 2 - 1; + input += 15; } else { return nullptr; } @@ -25552,8 +38277,8 @@ const char16_t *avx2_validate_utf16(const char16_t *input, size_t size) { } template -const result avx2_validate_utf16_with_errors(const char16_t *input, - size_t size) { +const result sse_validate_utf16_with_errors(const char16_t *input, + size_t size) { if (simdutf_unlikely(size == 0)) { return result(error_code::SUCCESS, 0); } @@ -25565,12 +38290,13 @@ const result avx2_validate_utf16_with_errors(const char16_t *input, const auto v_fc = simd8::splat(0xfc); const auto v_dc = simd8::splat(0xdc); - while (input + simd16::ELEMENTS * 2 < end) { + while (input + simd16::SIZE * 2 < end) { // 0. Load data: since the validation takes into account only higher // byte of each word, we compress the two vectors into one which // consists only the higher bytes. auto in0 = simd16(input); - auto in1 = simd16(input + simd16::ELEMENTS); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); if (big_endian) { in0 = in0.swap_bytes(); @@ -25584,9 +38310,10 @@ const result avx2_validate_utf16_with_errors(const char16_t *input, // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); - if (surrogates_bitmask == 0x0) { - input += simd16::ELEMENTS * 2; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; } else { // 2. We have some surrogates that have to be distinguished: // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) @@ -25596,35 +38323,36 @@ const result avx2_validate_utf16_with_errors(const char16_t *input, // V - non-surrogate code units // V = not surrogates_wordmask - const uint32_t V = ~surrogates_bitmask; + const uint16_t V = static_cast(~surrogates_bitmask); // H - word-mask for high surrogates: the six highest bits are 0b1101'11 const auto vH = (in & v_fc) == v_dc; - const uint32_t H = vH.to_bitmask(); + const uint16_t H = static_cast(vH.to_bitmask()); // L - word mask for low surrogates // L = not H and surrogates_wordmask - const uint32_t L = ~H & surrogates_bitmask; + const uint16_t L = static_cast(~H & surrogates_bitmask); - const uint32_t a = - L & (H >> 1); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint32_t b = - a << 1; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint32_t c = V | a | b; // Combine all the masks into the final one. + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. - if (c == 0xffffffff) { + if (c == 0xffff) { // The whole input register contains valid UTF-16, i.e., // either single code units or proper surrogate pairs. - input += simd16::ELEMENTS * 2; - } else if (c == 0x7fffffff) { - // The 31 lower code units of the input register contains valid UTF-16. - // The 31 word may be either a low or high surrogate. It the next + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next // iteration we 1) check if the low surrogate is followed by a high // one, 2) reject sole high surrogate. - input += simd16::ELEMENTS * 2 - 1; + input += 15; } else { return result(error_code::SURROGATE, input - start); } @@ -25633,228 +38361,210 @@ const result avx2_validate_utf16_with_errors(const char16_t *input, return result(error_code::SUCCESS, input - start); } -/* end file src/haswell/avx2_validate_utf16.cpp */ -/* begin file src/haswell/avx2_validate_utf32le.cpp */ +/* end file src/westmere/sse_validate_utf16.cpp */ +/* begin file src/westmere/sse_validate_utf32le.cpp */ /* Returns: - pointer to the last unprocessed character (a scalar fallback should check the rest); - nullptr if an error was detected. */ -const char32_t *avx2_validate_utf32le(const char32_t *input, size_t size) { +const char32_t *sse_validate_utf32le(const char32_t *input, size_t size) { const char32_t *end = input + size; - const __m256i standardmax = _mm256_set1_epi32(0x10ffff); - const __m256i offset = _mm256_set1_epi32(0xffff2000); - const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff); - __m256i currentmax = _mm256_setzero_si256(); - __m256i currentoffsetmax = _mm256_setzero_si256(); + const __m128i standardmax = _mm_set1_epi32(0x10ffff); + const __m128i offset = _mm_set1_epi32(0xffff2000); + const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff); + __m128i currentmax = _mm_setzero_si128(); + __m128i currentoffsetmax = _mm_setzero_si128(); - while (input + 8 < end) { - const __m256i in = _mm256_loadu_si256((__m256i *)input); - currentmax = _mm256_max_epu32(in, currentmax); + while (input + 4 < end) { + const __m128i in = _mm_loadu_si128((__m128i *)input); + currentmax = _mm_max_epu32(in, currentmax); currentoffsetmax = - _mm256_max_epu32(_mm256_add_epi32(in, offset), currentoffsetmax); - input += 8; + _mm_max_epu32(_mm_add_epi32(in, offset), currentoffsetmax); + input += 4; } - __m256i is_zero = - _mm256_xor_si256(_mm256_max_epu32(currentmax, standardmax), standardmax); - if (_mm256_testz_si256(is_zero, is_zero) == 0) { + __m128i is_zero = + _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax); + if (_mm_test_all_zeros(is_zero, is_zero) == 0) { return nullptr; } - is_zero = _mm256_xor_si256( - _mm256_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); - if (_mm256_testz_si256(is_zero, is_zero) == 0) { + is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (_mm_test_all_zeros(is_zero, is_zero) == 0) { return nullptr; } return input; } -const result avx2_validate_utf32le_with_errors(const char32_t *input, - size_t size) { +const result sse_validate_utf32le_with_errors(const char32_t *input, + size_t size) { const char32_t *start = input; const char32_t *end = input + size; - const __m256i standardmax = _mm256_set1_epi32(0x10ffff); - const __m256i offset = _mm256_set1_epi32(0xffff2000); - const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff); - __m256i currentmax = _mm256_setzero_si256(); - __m256i currentoffsetmax = _mm256_setzero_si256(); + const __m128i standardmax = _mm_set1_epi32(0x10ffff); + const __m128i offset = _mm_set1_epi32(0xffff2000); + const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff); + __m128i currentmax = _mm_setzero_si128(); + __m128i currentoffsetmax = _mm_setzero_si128(); - while (input + 8 < end) { - const __m256i in = _mm256_loadu_si256((__m256i *)input); - currentmax = _mm256_max_epu32(in, currentmax); + while (input + 4 < end) { + const __m128i in = _mm_loadu_si128((__m128i *)input); + currentmax = _mm_max_epu32(in, currentmax); currentoffsetmax = - _mm256_max_epu32(_mm256_add_epi32(in, offset), currentoffsetmax); + _mm_max_epu32(_mm_add_epi32(in, offset), currentoffsetmax); - __m256i is_zero = _mm256_xor_si256( - _mm256_max_epu32(currentmax, standardmax), standardmax); - if (_mm256_testz_si256(is_zero, is_zero) == 0) { + __m128i is_zero = + _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax); + if (_mm_test_all_zeros(is_zero, is_zero) == 0) { return result(error_code::TOO_LARGE, input - start); - } - - is_zero = - _mm256_xor_si256(_mm256_max_epu32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (_mm256_testz_si256(is_zero, is_zero) == 0) { - return result(error_code::SURROGATE, input - start); - } - input += 8; - } - - return result(error_code::SUCCESS, input - start); -} -/* end file src/haswell/avx2_validate_utf32le.cpp */ - -/* begin file src/haswell/avx2_convert_latin1_to_utf8.cpp */ -std::pair -avx2_convert_latin1_to_utf8(const char *latin1_input, size_t len, - char *utf8_output) { - const char *end = latin1_input + len; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); - const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); - const size_t safety_margin = 12; - - while (end - latin1_input >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in8 = _mm_loadu_si128((__m128i *)latin1_input); - // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m128i v_80 = _mm_set1_epi8((char)0x80); - if (_mm_testz_si128(in8, v_80)) { // ASCII fast path!!!! - // 1. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, in8); - // 2. adjust pointers - latin1_input += 16; - utf8_output += 16; - continue; // we are done for this round! - } - // We proceed only with the first 16 bytes. - const __m256i in = _mm256_cvtepu8_epi16((in8)); - - // 1. prepare 2-byte values - // input 16-bit word : [0000|0000|aabb|bbbb] x 8 - // expected output : [1100|00aa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [0000|00aa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in, 2); - // t1 = [0000|00aa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [1100|00aa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> 16)] - [0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; - - // 6. adjust pointers - latin1_input += 16; - continue; + } - } // while - return std::make_pair(latin1_input, utf8_output); + is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (_mm_test_all_zeros(is_zero, is_zero) == 0) { + return result(error_code::SURROGATE, input - start); + } + input += 4; + } + + return result(error_code::SUCCESS, input - start); } -/* end file src/haswell/avx2_convert_latin1_to_utf8.cpp */ -/* begin file src/haswell/avx2_convert_latin1_to_utf16.cpp */ -template -std::pair -avx2_convert_latin1_to_utf16(const char *latin1_input, size_t len, - char16_t *utf16_output) { - size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 32 +/* end file src/westmere/sse_validate_utf32le.cpp */ - size_t i = 0; - for (; i < rounded_len; i += 16) { - // Load 16 bytes from the address (input + i) into a xmm register - __m128i xmm0 = - _mm_loadu_si128(reinterpret_cast(latin1_input + i)); +/* begin file src/westmere/sse_convert_latin1_to_utf8.cpp */ +std::pair +sse_convert_latin1_to_utf8(const char *latin_input, + const size_t latin_input_length, char *utf8_output) { + const char *end = latin_input + latin_input_length; - // Zero extend each byte in xmm0 to word and put it in another xmm register - __m128i xmm1 = _mm_cvtepu8_epi16(xmm0); + const __m128i v_0000 = _mm_setzero_si128(); + // 0b1000_0000 + const __m128i v_80 = _mm_set1_epi8((uint8_t)0x80); + // 0b1111_1111_1000_0000 + const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); - // Shift xmm0 to the right by 8 bytes - xmm0 = _mm_srli_si128(xmm0, 8); + const __m128i latin_1_half_into_u16_byte_mask = + _mm_setr_epi8(0, '\x80', 1, '\x80', 2, '\x80', 3, '\x80', 4, '\x80', 5, + '\x80', 6, '\x80', 7, '\x80'); - // Zero extend each byte in the shifted xmm0 to word in xmm0 - xmm0 = _mm_cvtepu8_epi16(xmm0); + const __m128i latin_2_half_into_u16_byte_mask = + _mm_setr_epi8(8, '\x80', 9, '\x80', 10, '\x80', 11, '\x80', 12, '\x80', + 13, '\x80', 14, '\x80', 15, '\x80'); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - xmm0 = _mm_shuffle_epi8(xmm0, swap); - xmm1 = _mm_shuffle_epi8(xmm1, swap); + // each latin1 takes 1-2 utf8 bytes + // slow path writes useful 8-15 bytes twice (eagerly writes 16 bytes and then + // adjust the pointer) so the last write can exceed the utf8_output size by + // 8-1 bytes by reserving 8 extra input bytes, we expect the output to have + // 8-16 bytes free + while (end - latin_input >= 16 + 8) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + __m128i v_latin = _mm_loadu_si128((__m128i *)latin_input); + + if (_mm_testz_si128(v_latin, v_80)) { // ASCII fast path!!!! + _mm_storeu_si128((__m128i *)utf8_output, v_latin); + latin_input += 16; + utf8_output += 16; + continue; } - // Store the contents of xmm1 into the address pointed by (output + i) - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + i), xmm1); + // assuming a/b are bytes and A/B are uint16 of the same value + // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA + __m128i v_u16_latin_1_half = + _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask); + // aaaa_aaaa_bbbb_bbbb -> BBBB_BBBB + __m128i v_u16_latin_2_half = + _mm_shuffle_epi8(v_latin, latin_2_half_into_u16_byte_mask); - // Store the contents of xmm0 into the address pointed by (output + i + 8) - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + i + 8), xmm0); + internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_1_half, + utf8_output, v_0000, v_ff80); + internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_2_half, + utf8_output, v_0000, v_ff80); + latin_input += 16; + } + + if (end - latin_input >= 16) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + __m128i v_latin = _mm_loadu_si128((__m128i *)latin_input); + + if (_mm_testz_si128(v_latin, v_80)) { // ASCII fast path!!!! + _mm_storeu_si128((__m128i *)utf8_output, v_latin); + latin_input += 16; + utf8_output += 16; + } else { + // assuming a/b are bytes and A/B are uint16 of the same value + // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA + __m128i v_u16_latin_1_half = + _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask); + internal::westmere::write_v_u16_11bits_to_utf8( + v_u16_latin_1_half, utf8_output, v_0000, v_ff80); + latin_input += 8; + } } + return std::make_pair(latin_input, utf8_output); +} +/* end file src/westmere/sse_convert_latin1_to_utf8.cpp */ +/* begin file src/westmere/sse_convert_latin1_to_utf16.cpp */ +template +std::pair +sse_convert_latin1_to_utf16(const char *latin1_input, size_t len, + char16_t *utf16_output) { + size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 + for (size_t i = 0; i < rounded_len; i += 16) { + // Load 16 Latin1 characters into a 128-bit register + __m128i in = + _mm_loadu_si128(reinterpret_cast(&latin1_input[i])); + __m128i out1 = big_endian ? _mm_unpacklo_epi8(_mm_setzero_si128(), in) + : _mm_unpacklo_epi8(in, _mm_setzero_si128()); + __m128i out2 = big_endian ? _mm_unpackhi_epi8(_mm_setzero_si128(), in) + : _mm_unpackhi_epi8(in, _mm_setzero_si128()); + // Zero extend each Latin1 character to 16-bit integers and store the + // results back to memory + _mm_storeu_si128(reinterpret_cast<__m128i *>(&utf16_output[i]), out1); + _mm_storeu_si128(reinterpret_cast<__m128i *>(&utf16_output[i + 8]), out2); + } + // return pointers pointing to where we left off return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len); } -/* end file src/haswell/avx2_convert_latin1_to_utf16.cpp */ -/* begin file src/haswell/avx2_convert_latin1_to_utf32.cpp */ +/* end file src/westmere/sse_convert_latin1_to_utf16.cpp */ +/* begin file src/westmere/sse_convert_latin1_to_utf32.cpp */ std::pair -avx2_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - size_t rounded_len = ((len | 7) ^ 7); // Round down to nearest multiple of 8 +sse_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + const char *end = buf + len; - for (size_t i = 0; i < rounded_len; i += 8) { - // Load 8 Latin1 characters into a 64-bit register - __m128i in = _mm_loadl_epi64((__m128i *)&buf[i]); + while (end - buf >= 16) { + // Load 16 Latin1 characters (16 bytes) into a 128-bit register + __m128i in = _mm_loadu_si128((__m128i *)buf); - // Zero extend each set of 8 Latin1 characters to 8 32-bit integers using - // vpmovzxbd - __m256i out = _mm256_cvtepu8_epi32(in); + // Shift input to process next 4 bytes + __m128i in_shifted1 = _mm_srli_si128(in, 4); + __m128i in_shifted2 = _mm_srli_si128(in, 8); + __m128i in_shifted3 = _mm_srli_si128(in, 12); - // Store the results back to memory - _mm256_storeu_si256((__m256i *)&utf32_output[i], out); + // expand 8-bit to 32-bit unit + __m128i out1 = _mm_cvtepu8_epi32(in); + __m128i out2 = _mm_cvtepu8_epi32(in_shifted1); + __m128i out3 = _mm_cvtepu8_epi32(in_shifted2); + __m128i out4 = _mm_cvtepu8_epi32(in_shifted3); + + _mm_storeu_si128((__m128i *)utf32_output, out1); + _mm_storeu_si128((__m128i *)(utf32_output + 4), out2); + _mm_storeu_si128((__m128i *)(utf32_output + 8), out3); + _mm_storeu_si128((__m128i *)(utf32_output + 12), out4); + + utf32_output += 16; + buf += 16; } - // return pointers pointing to where we left off - return std::make_pair(buf + rounded_len, utf32_output + rounded_len); + return std::make_pair(buf, utf32_output); } -/* end file src/haswell/avx2_convert_latin1_to_utf32.cpp */ +/* end file src/westmere/sse_convert_latin1_to_utf32.cpp */ -/* begin file src/haswell/avx2_convert_utf8_to_utf16.cpp */ +/* begin file src/westmere/sse_convert_utf8_to_utf16.cpp */ // depends on "tables/utf8_to_utf16_tables.h" // Convert up to 12 bytes from utf8 to utf16 using a mask indicating the @@ -25883,18 +38593,20 @@ size_t convert_masked_utf8_to_utf16(const char *input, utf8_end_of_code_point_mask & 0xfff; if (utf8_end_of_code_point_mask == 0xfff) { // We process the data in chunks of 12 bytes. - __m256i ascii = _mm256_cvtepu8_epi16(in); + // Note: using 16 bytes is unsafe, see issue_ossfuzz_71218 + __m128i ascii_first = _mm_cvtepu8_epi16(in); + __m128i ascii_second = _mm_cvtepu8_epi16(_mm_srli_si128(in, 8)); if (big_endian) { - const __m256i swap256 = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - ascii = _mm256_shuffle_epi8(ascii, swap256); + ascii_first = _mm_shuffle_epi8(ascii_first, swap); + ascii_second = _mm_shuffle_epi8(ascii_second, swap); } - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf16_output), ascii); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output), ascii_first); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + 8), + ascii_second); utf16_output += 12; // We wrote 12 16-bit characters. return 12; // We consumed 12 bytes. } - if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { + if (((utf8_end_of_code_point_mask & 0xFFFF) == 0xaaaa)) { // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte // UTF-16 code units. There is probably a more efficient sequence, but the // following might do. @@ -25934,11 +38646,12 @@ size_t convert_masked_utf8_to_utf16(const char *input, utf16_output += 4; return 12; } + /// We do not have a fast path available, so we fallback. - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; if (idx < 64) { // SIX (6) input code-code units // this is a relatively easy scenario @@ -25946,8 +38659,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, // code code units spanning between 1 and 2 bytes each is 12 bytes. On // processors where pdep/pext is fast, we might be able to use a small // lookup table. - const __m128i sh = _mm_loadu_si128( - (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); const __m128i perm = _mm_shuffle_epi8(in, sh); const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); @@ -25955,12 +38668,11 @@ size_t convert_masked_utf8_to_utf16(const char *input, if (big_endian) composed = _mm_shuffle_epi8(composed, swap); _mm_storeu_si128((__m128i *)utf16_output, composed); - utf16_output += 6; // We wrote 12 bytes, 6 code points. There is a potential - // overflow of 4 bytes. + utf16_output += 6; // We wrote 12 bytes, 6 code points. } else if (idx < 145) { // FOUR (4) input code-code units - const __m128i sh = _mm_loadu_si128( - (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); const __m128i perm = _mm_shuffle_epi8(in, sh); const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits @@ -25976,7 +38688,7 @@ size_t convert_masked_utf8_to_utf16(const char *input, if (big_endian) composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); - utf16_output += 4; // Here we overflow by 8 bytes. + utf16_output += 4; } else if (idx < 209) { // TWO (2) input code-code units ////////////// @@ -25988,8 +38700,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, // only leading bytes at least as large as 0xf0 generate surrogate pairs. We // do as at the cost of an extra mask. ///////////// - const __m128i sh = _mm_loadu_si128( - (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); const __m128i perm = _mm_shuffle_epi8(in, sh); const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); @@ -26050,8 +38762,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, } return consumed; } -/* end file src/haswell/avx2_convert_utf8_to_utf16.cpp */ -/* begin file src/haswell/avx2_convert_utf8_to_utf32.cpp */ +/* end file src/westmere/sse_convert_utf8_to_utf16.cpp */ +/* begin file src/westmere/sse_convert_utf8_to_utf32.cpp */ // depends on "tables/utf8_to_utf16_tables.h" // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the @@ -26077,10 +38789,14 @@ size_t convert_masked_utf8_to_utf32(const char *input, utf8_end_of_code_point_mask & 0xfff; if (utf8_end_of_code_point_mask == 0xfff) { // We process the data in chunks of 12 bytes. - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), - _mm256_cvtepu8_epi32(in)); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output + 8), - _mm256_cvtepu8_epi32(_mm_srli_si128(in, 8))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu8_epi32(in)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu8_epi32(_mm_srli_si128(in, 4))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 8), + _mm_cvtepu8_epi32(_mm_srli_si128(in, 8))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 12), + _mm_cvtepu8_epi32(_mm_srli_si128(in, 12))); utf32_output += 12; // We wrote 12 32-bit characters. return 12; // We consumed 12 bytes. } @@ -26094,9 +38810,11 @@ size_t convert_masked_utf8_to_utf32(const char *input, const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - _mm256_storeu_si256((__m256i *)utf32_output, - _mm256_cvtepu16_epi32(composed)); - utf32_output += 8; // We wrote 16 bytes, 8 code points. + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu16_epi32(composed)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu16_epi32(_mm_srli_si128(composed, 8))); + utf32_output += 8; // We wrote 32 bytes, 8 code points. return 16; } if (input_utf8_end_of_code_point_mask == 0x924) { @@ -26139,10 +38857,11 @@ size_t convert_masked_utf8_to_utf32(const char *input, const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - _mm256_storeu_si256((__m256i *)utf32_output, - _mm256_cvtepu16_epi32(composed)); - utf32_output += 6; // We wrote 24 bytes, 6 code points. There is a potential - // overflow of 32 - 24 = 8 bytes. + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu16_epi32(composed)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu16_epi32(_mm_srli_si128(composed, 8))); + utf32_output += 6; // We wrote 12 bytes, 6 code points. } else if (idx < 145) { // FOUR (4) input code-code units const __m128i sh = @@ -26180,46 +38899,99 @@ size_t convert_masked_utf8_to_utf32(const char *input, _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += - 3; // We wrote 3 * 4 bytes, there is a potential overflow of 4 bytes. + utf32_output += 3; } else { // here we know that there is an error but we do not handle errors } return consumed; } -/* end file src/haswell/avx2_convert_utf8_to_utf32.cpp */ +/* end file src/westmere/sse_convert_utf8_to_utf32.cpp */ +/* begin file src/westmere/sse_convert_utf8_to_latin1.cpp */ +// depends on "tables/utf8_to_utf16_tables.h" -/* begin file src/haswell/avx2_convert_utf16_to_latin1.cpp */ +// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + const __m128i in = _mm_loadu_si128((__m128i *)input); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & + 0xfff; // we are only processing 12 bytes in case it is not all ASCII + if (utf8_end_of_code_point_mask == 0xfff) { + // We process the data in chunks of 12 bytes. + _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in); + latin1_output += 12; // We wrote 12 characters. + return 12; // We consumed 12 bytes. + } + /// We do not have a fast path available, so we fallback. + const uint8_t idx = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = + tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. On + // processors where pdep/pext is fast, we might be able to use a small lookup + // table. + const __m128i sh = + _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); + const __m128i perm = _mm_shuffle_epi8(in, sh); + const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); + __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); + const __m128i latin1_packed = _mm_packus_epi16(composed, composed); + // writing 8 bytes even though we only care about the first 6 bytes. + // performance note: it would be faster to use _mm_storeu_si128, we should + // investigate. + _mm_storel_epi64((__m128i *)latin1_output, latin1_packed); + latin1_output += 6; // We wrote 6 bytes. + return consumed; +} +/* end file src/westmere/sse_convert_utf8_to_latin1.cpp */ + +/* begin file src/westmere/sse_convert_utf16_to_latin1.cpp */ template std::pair -avx2_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { +sse_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { const char16_t *end = buf + len; - while (end - buf >= 16) { - // Load 16 UTF-16 characters into 256-bit AVX2 register - __m256i in = _mm256_loadu_si256(reinterpret_cast(buf)); + while (end - buf >= 8) { + // Load 8 UTF-16 characters into 128-bit SSE register + __m128i in = _mm_loadu_si128(reinterpret_cast(buf)); if (!match_system(big_endian)) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); } - __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00); - if (_mm256_testz_si256(in, high_byte_mask)) { + __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00); + if (_mm_testz_si128(in, high_byte_mask)) { // Pack 16-bit characters into 8-bit and store in latin1_output - __m128i lo = _mm256_extractf128_si256(in, 0); - __m128i hi = _mm256_extractf128_si256(in, 1); - __m128i latin1_packed_lo = _mm_packus_epi16(lo, lo); - __m128i latin1_packed_hi = _mm_packus_epi16(hi, hi); + __m128i latin1_packed = _mm_packus_epi16(in, in); _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), - latin1_packed_lo); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output + 8), - latin1_packed_hi); + latin1_packed); // Adjust pointers for next iteration - buf += 16; - latin1_output += 16; + buf += 8; + latin1_output += 8; } else { return std::make_pair(nullptr, reinterpret_cast(latin1_output)); } @@ -26229,54 +39001,47 @@ avx2_convert_utf16_to_latin1(const char16_t *buf, size_t len, template std::pair -avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { +sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { const char16_t *start = buf; const char16_t *end = buf + len; - while (end - buf >= 16) { - __m256i in = _mm256_loadu_si256(reinterpret_cast(buf)); + while (end - buf >= 8) { + __m128i in = _mm_loadu_si128(reinterpret_cast(buf)); if (!match_system(big_endian)) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); } - __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00); - if (_mm256_testz_si256(in, high_byte_mask)) { - __m128i lo = _mm256_extractf128_si256(in, 0); - __m128i hi = _mm256_extractf128_si256(in, 1); - __m128i latin1_packed_lo = _mm_packus_epi16(lo, lo); - __m128i latin1_packed_hi = _mm_packus_epi16(hi, hi); + __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00); + if (_mm_testz_si128(in, high_byte_mask)) { + __m128i latin1_packed = _mm_packus_epi16(in, in); _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), - latin1_packed_lo); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output + 8), - latin1_packed_hi); - buf += 16; - latin1_output += 16; + latin1_packed); + buf += 8; + latin1_output += 8; } else { // Fallback to scalar code for handling errors - for (int k = 0; k < 16; k++) { + for (int k = 0; k < 8; k++) { uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; if (word <= 0xff) { *latin1_output++ = char(word); } else { - return std::make_pair( - result{error_code::TOO_LARGE, (size_t)(buf - start + k)}, - latin1_output); + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); } } - buf += 16; + buf += 8; } } // while - return std::make_pair(result{error_code::SUCCESS, (size_t)(buf - start)}, + return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output); } -/* end file src/haswell/avx2_convert_utf16_to_latin1.cpp */ -/* begin file src/haswell/avx2_convert_utf16_to_utf8.cpp */ +/* end file src/westmere/sse_convert_utf16_to_latin1.cpp */ +/* begin file src/westmere/sse_convert_utf16_to_utf8.cpp */ /* The vectorized algorithm works on single SSE register i.e., it loads eight 16-bit code units. @@ -26332,117 +39097,91 @@ avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, */ template std::pair -avx2_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { +sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { + const char16_t *end = buf + len; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); - const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); + + const __m128i v_0000 = _mm_setzero_si128(); + const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m128i in = _mm_loadu_si128((__m128i *)buf); if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); } // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); - if (_mm256_testz_si256(in, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in), _mm256_extractf128_si256(in, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! + const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80); + if (_mm_testz_si128(in, v_ff80)) { // ASCII fast path!!!! + __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + nextin = _mm_shuffle_epi8(nextin, swap); + } + if (!_mm_testz_si128(nextin, v_ff80)) { + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in, in); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } else { + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in, nextin); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } } + // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000); + const uint16_t one_byte_bitmask = + static_cast(_mm_movemask_epi8(one_byte_bytemask)); // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; + const __m128i one_or_two_bytes_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000); + const uint16_t one_or_two_bytes_bitmask = + static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); - // 6. adjust pointers - buf += 16; + if (one_or_two_bytes_bitmask == 0xffff) { + internal::westmere::write_v_u16_11bits_to_utf8( + in, utf8_output, one_byte_bytemask, one_byte_bitmask); + buf += 8; continue; } + // 1. Check if there are any surrogate word in the input chunk. // We have also deal with situation when there is a surrogate word // at the end of a chunk. - const __m256i surrogates_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); + const __m128i surrogates_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); // bitmask = 0x0000 if there are no surrogates // = 0xc000 if the last word is a surrogate - const uint32_t surrogates_bitmask = - static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + const uint16_t surrogates_bitmask = + static_cast(_mm_movemask_epi8(surrogates_bytemask)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x00000000) { + if (surrogates_bitmask == 0x0000) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); /* In this branch we handle three cases: 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - @@ -26471,90 +39210,67 @@ avx2_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) +#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); + const __m128i t0 = _mm_shuffle_epi8(in, dup_even); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in, 4); + const __m128i s0 = _mm_srli_epi16(in, 4); // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); + const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); + const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m128i s4 = _mm_xor_si128(s3, m0); #undef simdutf_vec // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + const __m128i out0 = _mm_unpacklo_epi16(t2, s4); + const __m128i out1 = _mm_unpackhi_epi16(t2, s4); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, + 15, 13, -1, -1, -1, -1); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + buf += 8; continue; - }*/ + } const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); _mm_storeu_si128((__m128i *)utf8_output, utf8_0); utf8_output += row0[0]; _mm_storeu_si128((__m128i *)utf8_output, utf8_1); utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; + + buf += 8; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -26596,6 +39312,7 @@ avx2_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { buf += k; } } // while + return std::make_pair(buf, utf8_output); } @@ -26608,120 +39325,92 @@ avx2_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { */ template std::pair -avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_output) { +sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_output) { const char16_t *start = buf; const char16_t *end = buf + len; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); - const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); + const __m128i v_0000 = _mm_setzero_si128(); + const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); + __m128i in = _mm_loadu_si128((__m128i *)buf); if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); } // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); - if (_mm256_testz_si256(in, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in), _mm256_extractf128_si256(in, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! + const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80); + if (_mm_testz_si128(in, v_ff80)) { // ASCII fast path!!!! + __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); + if (big_endian) { + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + nextin = _mm_shuffle_epi8(nextin, swap); + } + if (!_mm_testz_si128(nextin, v_ff80)) { + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in, in); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } else { + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in, nextin); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } } + // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000); + const uint16_t one_byte_bitmask = + static_cast(_mm_movemask_epi8(one_byte_bytemask)); // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; + const __m128i one_or_two_bytes_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000); + const uint16_t one_or_two_bytes_bitmask = + static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); - // 6. adjust pointers - buf += 16; + if (one_or_two_bytes_bitmask == 0xffff) { + internal::westmere::write_v_u16_11bits_to_utf8( + in, utf8_output, one_byte_bytemask, one_byte_bitmask); + buf += 8; continue; } + // 1. Check if there are any surrogate word in the input chunk. // We have also deal with situation when there is a surrogate word // at the end of a chunk. - const __m256i surrogates_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); + const __m128i surrogates_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); // bitmask = 0x0000 if there are no surrogates // = 0xc000 if the last word is a surrogate - const uint32_t surrogates_bitmask = - static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + const uint16_t surrogates_bitmask = + static_cast(_mm_movemask_epi8(surrogates_bytemask)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x00000000) { + if (surrogates_bitmask == 0x0000) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); /* In this branch we handle three cases: 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - @@ -26750,90 +39439,67 @@ avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) +#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); + const __m128i t0 = _mm_shuffle_epi8(in, dup_even); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in, 4); + const __m128i s0 = _mm_srli_epi16(in, 4); // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); + const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); + const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m128i s4 = _mm_xor_si128(s3, m0); #undef simdutf_vec // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + const __m128i out0 = _mm_unpacklo_epi16(t2, s4); + const __m128i out1 = _mm_unpackhi_epi16(t2, s4); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, + 15, 13, -1, -1, -1, -1); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + buf += 8; continue; - }*/ + } const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); _mm_storeu_si128((__m128i *)utf8_output, utf8_0); utf8_output += row0[0]; _mm_storeu_si128((__m128i *)utf8_output, utf8_1); utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; + + buf += 8; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -26877,10 +39543,11 @@ avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, buf += k; } } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); } -/* end file src/haswell/avx2_convert_utf16_to_utf8.cpp */ -/* begin file src/haswell/avx2_convert_utf16_to_utf32.cpp */ +/* end file src/westmere/sse_convert_utf16_to_utf8.cpp */ +/* begin file src/westmere/sse_convert_utf16_to_utf32.cpp */ /* The vectorized algorithm works on single SSE register i.e., it loads eight 16-bit code units. @@ -26889,14 +39556,14 @@ avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, 1. an input register contains no surrogates and each value is in range 0x0000 .. 0x07ff. 2. an input register contains no surrogates and values are - in range 0x0000 .. 0xffff. + is in range 0x0000 .. 0xffff. 3. an input register contains surrogates --- i.e. codepoints can have 16 or 32 bits. Ad 1. When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII + can be converted into: 1) single UTF8 byte (when it's an ASCII char) or 2) two UTF8 bytes. For this case we do only some shuffle to obtain these 2-byte @@ -26931,48 +39598,47 @@ avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, */ /* - Returns a pair: the first unprocessed byte from buf and utf32_output + Returns a pair: the first unprocessed byte from buf and utf8_output A scalar routing should carry on the conversion of the tail. */ template std::pair -avx2_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_output) { +sse_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_output) { const char16_t *end = buf + len; - const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); - while (end - buf >= 16) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); + const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); + + while (end - buf >= 8) { + __m128i in = _mm_loadu_si128((__m128i *)buf); + if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); } // 1. Check if there are any surrogate word in the input chunk. // We have also deal with situation when there is a surrogate word // at the end of a chunk. - const __m256i surrogates_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); + const __m128i surrogates_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); // bitmask = 0x0000 if there are no surrogates // = 0xc000 if the last word is a surrogate - const uint32_t surrogates_bitmask = - static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + const uint16_t surrogates_bitmask = + static_cast(_mm_movemask_epi8(surrogates_bytemask)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x00000000) { - // case: we extend all sixteen 16-bit code units to sixteen 32-bit code - // units - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), - _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in))); - _mm256_storeu_si256( - reinterpret_cast<__m256i *>(utf32_output + 8), - _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in, 1))); - utf32_output += 16; - buf += 16; + if (surrogates_bitmask == 0x0000) { + // case: no surrogate pair, extend 16-bit code units to 32-bit code units + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu16_epi32(in)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu16_epi32(_mm_srli_si128(in, 8))); + utf32_output += 8; + buf += 8; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -26986,7 +39652,6 @@ avx2_convert_utf16_to_utf32(const char16_t *buf, size_t len, for (; k < forward; k++) { uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { - // No surrogate pair *utf32_output++ = char32_t(word); } else { // must be a surrogate pair @@ -27017,44 +39682,43 @@ avx2_convert_utf16_to_utf32(const char16_t *buf, size_t len, */ template std::pair -avx2_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_output) { +sse_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_output) { const char16_t *start = buf; const char16_t *end = buf + len; - const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); - while (end - buf >= 16) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); + const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); + + while (end - buf >= 8) { + __m128i in = _mm_loadu_si128((__m128i *)buf); + if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); + const __m128i swap = + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + in = _mm_shuffle_epi8(in, swap); } // 1. Check if there are any surrogate word in the input chunk. // We have also deal with situation when there is a surrogate word // at the end of a chunk. - const __m256i surrogates_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); + const __m128i surrogates_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); // bitmask = 0x0000 if there are no surrogates // = 0xc000 if the last word is a surrogate - const uint32_t surrogates_bitmask = - static_cast(_mm256_movemask_epi8(surrogates_bytemask)); + const uint16_t surrogates_bitmask = + static_cast(_mm_movemask_epi8(surrogates_bytemask)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x00000000) { - // case: we extend all sixteen 16-bit code units to sixteen 32-bit code - // units - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), - _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in))); - _mm256_storeu_si256( - reinterpret_cast<__m256i *>(utf32_output + 8), - _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in, 1))); - utf32_output += 16; - buf += 16; + if (surrogates_bitmask == 0x0000) { + // case: no surrogate pair, extend 16-bit code units to 32-bit code units + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), + _mm_cvtepu16_epi32(in)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), + _mm_cvtepu16_epi32(_mm_srli_si128(in, 8))); + utf32_output += 8; + buf += 8; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -27068,7 +39732,6 @@ avx2_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, for (; k < forward; k++) { uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; if ((word & 0xF800) != 0xD800) { - // No surrogate pair *utf32_output++ = char32_t(word); } else { // must be a surrogate pair @@ -27091,229 +39754,290 @@ avx2_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, } // while return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output); } -/* end file src/haswell/avx2_convert_utf16_to_utf32.cpp */ +/* end file src/westmere/sse_convert_utf16_to_utf32.cpp */ -/* begin file src/haswell/avx2_convert_utf32_to_latin1.cpp */ +/* begin file src/westmere/sse_convert_utf32_to_latin1.cpp */ std::pair -avx2_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const size_t rounded_len = - len & ~0x1F; // Round down to nearest multiple of 32 - - __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); +sse_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - __m256i shufmask = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); + __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00); + __m128i shufmask = + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); for (size_t i = 0; i < rounded_len; i += 16) { - __m256i in1 = _mm256_loadu_si256((__m256i *)buf); - __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8)); + __m128i in1 = _mm_loadu_si128((__m128i *)buf); + __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4)); + __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8)); + __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12)); - __m256i check_combined = _mm256_or_si256(in1, in2); + __m128i check_combined = _mm_or_si128(in1, in2); + check_combined = _mm_or_si128(check_combined, in3); + check_combined = _mm_or_si128(check_combined, in4); - if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { + if (!_mm_testz_si128(check_combined, high_bytes_mask)) { return std::make_pair(nullptr, latin1_output); } - - // Turn UTF32 bytes into latin 1 bytes - __m256i shuffled1 = _mm256_shuffle_epi8(in1, shufmask); - __m256i shuffled2 = _mm256_shuffle_epi8(in2, shufmask); - - // move Latin1 bytes to their correct spot - __m256i idx1 = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, 4, 0); - __m256i idx2 = _mm256_set_epi32(-1, -1, -1, -1, 4, 0, -1, -1); - __m256i reshuffled1 = _mm256_permutevar8x32_epi32(shuffled1, idx1); - __m256i reshuffled2 = _mm256_permutevar8x32_epi32(shuffled2, idx2); - - __m256i result = _mm256_or_si256(reshuffled1, reshuffled2); - _mm_storeu_si128((__m128i *)latin1_output, _mm256_castsi256_si128(result)); - + __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), + _mm_shuffle_epi8(in2, shufmask)); + __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), + _mm_shuffle_epi8(in4, shufmask)); + __m128i pack = _mm_unpacklo_epi64(pack1, pack2); + _mm_storeu_si128((__m128i *)latin1_output, pack); latin1_output += 16; buf += 16; } return std::make_pair(buf, latin1_output); } -std::pair -avx2_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const size_t rounded_len = - len & ~0x1F; // Round down to nearest multiple of 32 - - __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); - __m256i shufmask = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); +std::pair +sse_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { const char32_t *start = buf; + const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 + + __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00); + __m128i shufmask = + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); for (size_t i = 0; i < rounded_len; i += 16) { - __m256i in1 = _mm256_loadu_si256((__m256i *)buf); - __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8)); + __m128i in1 = _mm_loadu_si128((__m128i *)buf); + __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4)); + __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8)); + __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12)); - __m256i check_combined = _mm256_or_si256(in1, in2); + __m128i check_combined = _mm_or_si128(in1, in2); + check_combined = _mm_or_si128(check_combined, in3); + check_combined = _mm_or_si128(check_combined, in4); - if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { + if (!_mm_testz_si128(check_combined, high_bytes_mask)) { // Fallback to scalar code for handling errors - for (int k = 0; k < 8; k++) { + for (int k = 0; k < 16; k++) { char32_t codepoint = buf[k]; - if (codepoint <= 0xFF) { - *latin1_output++ = static_cast(codepoint); + if (codepoint <= 0xff) { + *latin1_output++ = char(codepoint); } else { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), latin1_output); } } - buf += 8; - } else { - __m256i shuffled1 = _mm256_shuffle_epi8(in1, shufmask); - __m256i shuffled2 = _mm256_shuffle_epi8(in2, shufmask); - - __m256i idx1 = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, 4, 0); - __m256i idx2 = _mm256_set_epi32(-1, -1, -1, -1, 4, 0, -1, -1); - __m256i reshuffled1 = _mm256_permutevar8x32_epi32(shuffled1, idx1); - __m256i reshuffled2 = _mm256_permutevar8x32_epi32(shuffled2, idx2); - - __m256i result = _mm256_or_si256(reshuffled1, reshuffled2); - _mm_storeu_si128((__m128i *)latin1_output, - _mm256_castsi256_si128(result)); - - latin1_output += 16; buf += 16; + continue; } + __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), + _mm_shuffle_epi8(in2, shufmask)); + __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), + _mm_shuffle_epi8(in4, shufmask)); + __m128i pack = _mm_unpacklo_epi64(pack1, pack2); + _mm_storeu_si128((__m128i *)latin1_output, pack); + latin1_output += 16; + buf += 16; } return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output); } -/* end file src/haswell/avx2_convert_utf32_to_latin1.cpp */ -/* begin file src/haswell/avx2_convert_utf32_to_utf8.cpp */ +/* end file src/westmere/sse_convert_utf32_to_latin1.cpp */ +/* begin file src/westmere/sse_convert_utf32_to_utf8.cpp */ std::pair -avx2_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { +sse_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { const char32_t *end = buf + len; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); - const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); - const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); - const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); - __m256i running_max = _mm256_setzero_si256(); - __m256i forbidden_bytemask = _mm256_setzero_si256(); + const __m128i v_0000 = _mm_setzero_si128(); //__m128 = 128 bits + const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); // 1111 1000 0000 + // 0000 + const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); // 1100 0000 1000 + // 0000 + const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); // 1111 1111 1000 + // 0000 + const __m128i v_ffff0000 = _mm_set1_epi32( + (uint32_t)0xffff0000); // 1111 1111 1111 1111 0000 0000 0000 0000 + const __m128i v_7fffffff = _mm_set1_epi32( + (uint32_t)0x7fffffff); // 0111 1111 1111 1111 1111 1111 1111 1111 + __m128i running_max = _mm_setzero_si128(); + __m128i forbidden_bytemask = _mm_setzero_si128(); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); - running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin); + while (end - buf >= + std::ptrdiff_t( + 16 + safety_margin)) { // buf is a char32_t pointer, each char32_t + // has 4 bytes or 32 bits, thus buf + 16 * + // char_32t = 512 bits = 64 bytes + // We load two 16 bytes registers for a total of 32 bytes or 16 characters. + __m128i in = _mm_loadu_si128((__m128i *)buf); + __m128i nextin = _mm_loadu_si128( + (__m128i *)buf + 1); // These two values can hold only 8 UTF32 chars + running_max = _mm_max_epu32( + _mm_max_epu32(in, running_max), // take element-wise max char32_t from + // in and running_max vector + nextin); // and take element-wise max element from nextin and + // running_max vector // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned // saturation - __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), - _mm256_and_si256(nextin, v_7fffffff)); - in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); + __m128i in_16 = _mm_packus_epi32( + _mm_and_si128(in, v_7fffffff), + _mm_and_si128( + nextin, + v_7fffffff)); // in this context pack the two __m128 into a single + // By ensuring the highest bit is set to 0(&v_7fffffff), we are making sure + // all values are interpreted as non-negative, or specifically, the values + // are within the range of valid Unicode code points. remember : having + // leading byte 0 means a positive number by the two complements system. + // Unicode is well beneath the range where you'll start getting issues so + // that's OK. - // Try to apply UTF-16 => UTF-8 routine on 256 bits - // (haswell/avx2_convert_utf16_to_utf8.cpp) + // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp - if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! + // Check for ASCII fast path + + // ASCII fast path!!!! + // We eagerly load another 32 bytes, hoping that they will be ASCII too. + // The intuition is that we try to collect 16 ASCII characters which + // requires a total of 64 bytes of input. If we fail, we just pass thirdin + // and fourthin as our new inputs. + if (_mm_testz_si128(in_16, v_ff80)) { // if the first two blocks are ASCII + __m128i thirdin = _mm_loadu_si128((__m128i *)buf + 2); + __m128i fourthin = _mm_loadu_si128((__m128i *)buf + 3); + running_max = _mm_max_epu32( + _mm_max_epu32(thirdin, running_max), + fourthin); // take the running max of all 4 vectors thus far + __m128i nextin_16 = _mm_packus_epi32( + _mm_and_si128(thirdin, v_7fffffff), + _mm_and_si128(fourthin, + v_7fffffff)); // pack into 1 vector, now you have two + if (!_mm_testz_si128( + nextin_16, + v_ff80)) { // checks if the second packed vector is ASCII, if not: + // 1. pack the bytes + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16( + in_16, in_16); // creates two copy of in_16 in 1 vector + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, + utf8_packed); // put them into the output + // 3. adjust pointers + buf += 8; // the char32_t buffer pointer goes up 8 char32_t chars* 32 + // bits = 256 bits + utf8_output += + 8; // same with output, e.g. lift the first two blocks alone. + // Proceed with next input + in_16 = nextin_16; + // We need to update in and nextin because they are used later. + in = thirdin; + nextin = fourthin; + } else { + // 1. pack the bytes + const __m128i utf8_packed = _mm_packus_epi16(in_16, nextin_16); + // 2. store (16 bytes) + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } } - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + + // no bits set above 7th bit -- find out all the ASCII characters + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16( // this takes four bytes at a time and compares: + _mm_and_si128(in_16, v_ff80), // the vector that get only the first + // 9 bits of each 16-bit/2-byte units + v_0000 // + ); // they should be all zero if they are ASCII. E.g. ASCII in UTF32 is + // of format 0000 0000 0000 0XXX XXXX + // _mm_cmpeq_epi16 should now return a 1111 1111 1111 1111 for equals, and + // 0000 0000 0000 0000 if not for each 16-bit/2-byte units + const uint16_t one_byte_bitmask = static_cast(_mm_movemask_epi8( + one_byte_bytemask)); // collect the MSB from previous vector and put + // them into uint16_t mas // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { + const __m128i one_or_two_bytes_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000); + const uint16_t one_or_two_bytes_bitmask = + static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); + + if (one_or_two_bytes_bitmask == 0xffff) { + // case: all code units either produce 1 or 2 UTF-8 bytes (at least one + // produces 2 bytes) // 1. prepare 2-byte values // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + const __m128i v_1f00 = + _mm_set1_epi16((int16_t)0x1f00); // 0001 1111 0000 0000 + const __m128i v_003f = + _mm_set1_epi16((int16_t)0x003f); // 0000 0000 0011 1111 // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in_16, 2); + const __m128i t0 = _mm_slli_epi16(in_16, 2); // shift packed vector by two // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); + const __m128i t1 = + _mm_and_si128(t0, v_1f00); // potentital first utf8 byte // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in_16, v_003f); + const __m128i t2 = + _mm_and_si128(in_16, v_003f); // potential second utf8 byte // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); + const __m128i t3 = + _mm_or_si128(t1, t2); // first and second potential utf8 byte together // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); + const __m128i t4 = _mm_or_si128( + t3, + v_c080); // t3 | 1100 0000 1000 0000 = full potential 2-byte utf8 unit // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); + const __m128i utf8_unpacked = + _mm_blendv_epi8(t4, in_16, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - + // MSB, a - LSB) + const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a + const uint16_t m1 = + static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 + const uint8_t m2 = + static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); + const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); // 6. adjust pointers - buf += 16; + buf += 8; + utf8_output += row[0]; continue; } - // Must check for overflow in packing - const __m256i saturation_bytemask = _mm256_cmpeq_epi32( - _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + + // Check for overflow in packing + + const __m128i saturation_bytemask = _mm_cmpeq_epi32( + _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { + static_cast(_mm_movemask_epi8(saturation_bytemask)); + if (saturation_bitmask == 0xffff) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); - forbidden_bytemask = _mm256_or_si256( - forbidden_bytemask, - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800)); + const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); + forbidden_bytemask = + _mm_or_si128(forbidden_bytemask, + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800)); - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes We expand the input word (16-bit) into two code units (32-bit), thus @@ -27335,95 +40059,72 @@ avx2_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) +#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in_16, 4); + const __m128i s0 = _mm_srli_epi16(in_16, 4); // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); + const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); + const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m128i s4 = _mm_xor_si128(s3, m0); #undef simdutf_vec // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + const __m128i out0 = _mm_unpacklo_epi16(t2, s4); + const __m128i out1 = _mm_unpackhi_epi16(t2, s4); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, + 15, 13, -1, -1, -1, -1); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + buf += 8; continue; - }*/ + } const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); _mm_storeu_si128((__m128i *)utf8_output, utf8_0); utf8_output += row0[0]; _mm_storeu_si128((__m128i *)utf8_output, utf8_1); utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; + + buf += 8; } else { - // case: at least one 32-bit word is larger than 0xFFFF <=> it will - // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD may require - // large, non-trivial tables? + // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD in the + // presence of surrogate pairs may require non-trivial tables. size_t forward = 15; size_t k = 0; if (size_t(end - buf) < forward + 1) { @@ -27431,19 +40132,19 @@ avx2_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { } for (; k < forward; k++) { uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + if ((word & 0xFFFFFF80) == 0) { *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { // 2-byte + } else if ((word & 0xFFFFF800) == 0) { *utf8_output++ = char((word >> 6) | 0b11000000); *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { // 3-byte + } else if ((word & 0xFFFF0000) == 0) { if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(nullptr, utf8_output); } *utf8_output++ = char((word >> 12) | 0b11100000); *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { // 4-byte + } else { if (word > 0x10FFFF) { return std::make_pair(nullptr, utf8_output); } @@ -27458,13 +40159,13 @@ avx2_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { } // while // check for invalid input - const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); - if (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi32( - _mm256_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffffffff) { + const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff); + if (static_cast(_mm_movemask_epi8(_mm_cmpeq_epi32( + _mm_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffff) { return std::make_pair(nullptr, utf8_output); } - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { return std::make_pair(nullptr, utf8_output); } @@ -27472,145 +40173,141 @@ avx2_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { } std::pair -avx2_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_output) { +sse_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_output) { const char32_t *end = buf + len; const char32_t *start = buf; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); - const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); - const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); - const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); - const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); + const __m128i v_0000 = _mm_setzero_si128(); + const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); + const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); + const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); + const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); + const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff); + const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); + // We load two 16 bytes registers for a total of 32 bytes or 8 characters. + __m128i in = _mm_loadu_si128((__m128i *)buf); + __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); // Check for too large input - const __m256i max_input = - _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff); - if (static_cast(_mm256_movemask_epi8( - _mm256_cmpeq_epi32(max_input, v_10ffff))) != 0xffffffff) { + __m128i max_input = _mm_max_epu32(_mm_max_epu32(in, nextin), v_10ffff); + if (static_cast(_mm_movemask_epi8( + _mm_cmpeq_epi32(max_input, v_10ffff))) != 0xffff) { return std::make_pair(result(error_code::TOO_LARGE, buf - start), utf8_output); } // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned // saturation - __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), - _mm256_and_si256(nextin, v_7fffffff)); - in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); + __m128i in_16 = _mm_packus_epi32(_mm_and_si128(in, v_7fffffff), + _mm_and_si128(nextin, v_7fffffff)); - // Try to apply UTF-16 => UTF-8 routine on 256 bits - // (haswell/avx2_convert_utf16_to_utf8.cpp) + // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp - if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! + // Check for ASCII fast path + if (_mm_testz_si128(in_16, v_ff80)) { // ASCII fast path!!!! // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); + // obviously suboptimal. + const __m128i utf8_packed = _mm_packus_epi16(in_16, in_16); // 2. store (16 bytes) _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! + buf += 8; + utf8_output += 8; + continue; } + // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); + const __m128i one_byte_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_ff80), v_0000); + const uint16_t one_byte_bitmask = + static_cast(_mm_movemask_epi8(one_byte_bytemask)); // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { + const __m128i one_or_two_bytes_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000); + const uint16_t one_or_two_bytes_bitmask = + static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); + + if (one_or_two_bytes_bitmask == 0xffff) { + // case: all code units either produce 1 or 2 UTF-8 bytes (at least one + // produces 2 bytes) // 1. prepare 2-byte values // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); + const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); + const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in_16, 2); + const __m128i t0 = _mm_slli_epi16(in_16, 2); // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); + const __m128i t1 = _mm_and_si128(t0, v_1f00); // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in_16, v_003f); + const __m128i t2 = _mm_and_si128(in_16, v_003f); // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); + const __m128i t3 = _mm_or_si128(t1, t2); // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); + const __m128i t4 = _mm_or_si128(t3, v_c080); // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); + const __m128i utf8_unpacked = + _mm_blendv_epi8(t4, in_16, one_byte_bytemask); // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; + // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - + // MSB, a - LSB) + const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a + const uint16_t m1 = + static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 + const uint8_t m2 = + static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; + const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); + const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); + + // 5. store bytes + _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); // 6. adjust pointers - buf += 16; + buf += 8; + utf8_output += row[0]; continue; } - // Must check for overflow in packing - const __m256i saturation_bytemask = _mm256_cmpeq_epi32( - _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); + + // Check for overflow in packing + const __m128i saturation_bytemask = _mm_cmpeq_epi32( + _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { + static_cast(_mm_movemask_epi8(saturation_bytemask)); + + if (saturation_bitmask == 0xffff) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes // Check for illegal surrogate code units - const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); - const __m256i forbidden_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800); - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != - 0x0) { + const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); + const __m128i forbidden_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800); + if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { return std::make_pair(result(error_code::SURROGATE, buf - start), utf8_output); } - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); + const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, + 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes We expand the input word (16-bit) into two code units (32-bit), thus @@ -27632,95 +40329,72 @@ avx2_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) +#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); + const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); + const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); + const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in_16, 4); + const __m128i s0 = _mm_srli_epi16(in_16, 4); // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); + const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); + const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); + const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); + const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, + simdutf_vec(0b0100000000000000)); + const __m128i s4 = _mm_xor_si128(s3, m0); #undef simdutf_vec // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); + const __m128i out0 = _mm_unpacklo_epi16(t2, s4); + const __m128i out1 = _mm_unpackhi_epi16(t2, s4); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { + const uint16_t mask = + (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); + if (mask == 0) { // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); + const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, + 15, 13, -1, -1, -1, -1); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); + _mm_storeu_si128((__m128i *)utf8_output, utf8_0); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); + _mm_storeu_si128((__m128i *)utf8_output, utf8_1); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; + buf += 8; continue; - }*/ + } const uint8_t mask0 = uint8_t(mask); + const uint8_t *row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); + const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); const uint8_t mask1 = static_cast(mask >> 8); + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); + const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); _mm_storeu_si128((__m128i *)utf8_output, utf8_0); utf8_output += row0[0]; _mm_storeu_si128((__m128i *)utf8_output, utf8_1); utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; + + buf += 8; } else { - // case: at least one 32-bit word is larger than 0xFFFF <=> it will - // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD may require - // large, non-trivial tables? + // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem + // wasteful to use scalar code, but being efficient with SIMD in the + // presence of surrogate pairs may require non-trivial tables. size_t forward = 15; size_t k = 0; if (size_t(end - buf) < forward + 1) { @@ -27728,12 +40402,12 @@ avx2_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, } for (; k < forward; k++) { uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) + if ((word & 0xFFFFFF80) == 0) { *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { // 2-byte + } else if ((word & 0xFFFFF800) == 0) { *utf8_output++ = char((word >> 6) | 0b11000000); *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { // 3-byte + } else if ((word & 0xFFFF0000) == 0) { if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair( result(error_code::SURROGATE, buf - start + k), utf8_output); @@ -27741,7 +40415,7 @@ avx2_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, *utf8_output++ = char((word >> 12) | 0b11100000); *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { // 4-byte + } else { if (word > 0x10FFFF) { return std::make_pair( result(error_code::TOO_LARGE, buf - start + k), utf8_output); @@ -27755,48 +40429,46 @@ avx2_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, buf += k; } } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); } -/* end file src/haswell/avx2_convert_utf32_to_utf8.cpp */ -/* begin file src/haswell/avx2_convert_utf32_to_utf16.cpp */ +/* end file src/westmere/sse_convert_utf32_to_utf8.cpp */ +/* begin file src/westmere/sse_convert_utf32_to_utf16.cpp */ template std::pair -avx2_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const char32_t *end = buf + len; - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - __m256i forbidden_bytemask = _mm256_setzero_si256(); +sse_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_output) { - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); + const char32_t *end = buf + len; - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); + const __m128i v_0000 = _mm_setzero_si128(); + const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); + __m128i forbidden_bytemask = _mm_setzero_si128(); - // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __m256i saturation_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); + while (end - buf >= 8) { + __m128i in = _mm_loadu_si128((__m128i *)buf); + __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); + const __m128i saturation_bytemask = _mm_cmpeq_epi32( + _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); + static_cast(_mm_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); - forbidden_bytemask = _mm256_or_si256( + // Check if no bits set above 16th + if (saturation_bitmask == 0xffff) { + // Pack UTF-32 to UTF-16 + __m128i utf16_packed = _mm_packus_epi32(in, nextin); + + const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); + forbidden_bytemask = _mm_or_si128( forbidden_bytemask, - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800)); + _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800)); - __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), - _mm256_extractf128_si256(in, 1)); if (big_endian) { const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); } + _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); utf16_output += 8; buf += 8; @@ -27840,7 +40512,7 @@ avx2_convert_utf32_to_utf16(const char32_t *buf, size_t len, } // check for invalid input - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { + if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { return std::make_pair(nullptr, utf16_output); } @@ -27849,45 +40521,42 @@ avx2_convert_utf32_to_utf16(const char32_t *buf, size_t len, template std::pair -avx2_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_output) { +sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_output) { const char32_t *start = buf; const char32_t *end = buf + len; - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); + const __m128i v_0000 = _mm_setzero_si128(); + const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); - // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __m256i saturation_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); + while (end - buf >= 8) { + __m128i in = _mm_loadu_si128((__m128i *)buf); + __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); + const __m128i saturation_bytemask = _mm_cmpeq_epi32( + _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); + static_cast(_mm_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); - const __m256i forbidden_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800); - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != - 0x0) { + // Check if no bits set above 16th + if (saturation_bitmask == 0xffff) { + // Pack UTF-32 to UTF-16 + __m128i utf16_packed = _mm_packus_epi32(in, nextin); + + const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); + const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); + const __m128i forbidden_bytemask = + _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800); + if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { return std::make_pair(result(error_code::SURROGATE, buf - start), utf16_output); } - __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), - _mm256_extractf128_si256(in, 1)); if (big_endian) { const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); } + _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); utf16_output += 8; buf += 8; @@ -27934,72 +40603,8 @@ avx2_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); } -/* end file src/haswell/avx2_convert_utf32_to_utf16.cpp */ - -/* begin file src/haswell/avx2_convert_utf8_to_latin1.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_latin1(const char *input, - uint64_t utf8_end_of_code_point_mask, - char *&latin1_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - const __m128i in = _mm_loadu_si128((__m128i *)input); - - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & - 0xfff; // we are only processing 12 bytes in case it is not all ASCII - - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in); - latin1_output += 12; // We wrote 12 characters. - return 12; // We consumed 1 bytes. - } - /// We do not have a fast path available, so we fallback. - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; - // this indicates an invalid input: - if (idx >= 64) { - return consumed; - } - // Here we should have (idx < 64), if not, there is a bug in the validation or - // elsewhere. SIX (6) input code-code units this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small lookup - // table. - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - const __m128i latin1_packed = _mm_packus_epi16(composed, composed); - // writing 8 bytes even though we only care about the first 6 bytes. - // performance note: it would be faster to use _mm_storeu_si128, we should - // investigate. - _mm_storel_epi64((__m128i *)latin1_output, latin1_packed); - latin1_output += 6; // We wrote 6 bytes. - return consumed; -} -/* end file src/haswell/avx2_convert_utf8_to_latin1.cpp */ - -/* begin file src/haswell/avx2_base64.cpp */ +/* end file src/westmere/sse_convert_utf32_to_utf16.cpp */ +/* begin file src/westmere/sse_base64.cpp */ /** * References and further reading: * @@ -28027,151 +40632,155 @@ size_t convert_masked_utf8_to_latin1(const char *input, * Nick Kopp. 2013. Base64 Encoding on a GPU. * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). */ - -template -simdutf_really_inline __m256i lookup_pshufb_improved(const __m256i input) { +template __m128i lookup_pshufb_improved(const __m128i input) { // credit: Wojciech Muła - __m256i result = _mm256_subs_epu8(input, _mm256_set1_epi8(51)); - const __m256i less = _mm256_cmpgt_epi8(_mm256_set1_epi8(26), input); - result = - _mm256_or_si256(result, _mm256_and_si256(less, _mm256_set1_epi8(13))); - __m256i shift_LUT; - if (base64_url) { - shift_LUT = _mm256_setr_epi8( - 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0, + // reduce 0..51 -> 0 + // 52..61 -> 1 .. 10 + // 62 -> 11 + // 63 -> 12 + __m128i result = _mm_subs_epu8(input, _mm_set1_epi8(51)); - 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0); - } else { - shift_LUT = _mm256_setr_epi8( - 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0, + // distinguish between ranges 0..25 and 26..51: + // 0 .. 25 -> remains 0 + // 26 .. 51 -> becomes 13 + const __m128i less = _mm_cmpgt_epi8(_mm_set1_epi8(26), input); + result = _mm_or_si128(result, _mm_and_si128(less, _mm_set1_epi8(13))); - 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); + __m128i shift_LUT; + if (base64_url) { + shift_LUT = _mm_setr_epi8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0); + } else { + shift_LUT = _mm_setr_epi8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, + '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); } - result = _mm256_shuffle_epi8(shift_LUT, result); - return _mm256_add_epi8(result, input); + // read shift + result = _mm_shuffle_epi8(shift_LUT, result); + + return _mm_add_epi8(result, input); } template size_t encode_base64(char *dst, const char *src, size_t srclen, base64_options options) { // credit: Wojciech Muła + // SSE (lookup: pshufb improved unrolled) const uint8_t *input = (const uint8_t *)src; uint8_t *out = (uint8_t *)dst; - const __m256i shuf = - _mm256_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1, + const __m128i shuf = + _mm_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1); - 10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1); size_t i = 0; - for (; i + 100 <= srclen; i += 96) { - const __m128i lo0 = _mm_loadu_si128( + for (; i + 52 <= srclen; i += 48) { + __m128i in0 = _mm_loadu_si128( reinterpret_cast(input + i + 4 * 3 * 0)); - const __m128i hi0 = _mm_loadu_si128( + __m128i in1 = _mm_loadu_si128( reinterpret_cast(input + i + 4 * 3 * 1)); - const __m128i lo1 = _mm_loadu_si128( + __m128i in2 = _mm_loadu_si128( reinterpret_cast(input + i + 4 * 3 * 2)); - const __m128i hi1 = _mm_loadu_si128( + __m128i in3 = _mm_loadu_si128( reinterpret_cast(input + i + 4 * 3 * 3)); - const __m128i lo2 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 4)); - const __m128i hi2 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 5)); - const __m128i lo3 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 6)); - const __m128i hi3 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 7)); - __m256i in0 = _mm256_shuffle_epi8(_mm256_set_m128i(hi0, lo0), shuf); - __m256i in1 = _mm256_shuffle_epi8(_mm256_set_m128i(hi1, lo1), shuf); - __m256i in2 = _mm256_shuffle_epi8(_mm256_set_m128i(hi2, lo2), shuf); - __m256i in3 = _mm256_shuffle_epi8(_mm256_set_m128i(hi3, lo3), shuf); + in0 = _mm_shuffle_epi8(in0, shuf); + in1 = _mm_shuffle_epi8(in1, shuf); + in2 = _mm_shuffle_epi8(in2, shuf); + in3 = _mm_shuffle_epi8(in3, shuf); - const __m256i t0_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x0fc0fc00)); - const __m256i t0_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x0fc0fc00)); - const __m256i t0_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x0fc0fc00)); - const __m256i t0_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x0fc0fc00)); + const __m128i t0_0 = _mm_and_si128(in0, _mm_set1_epi32(0x0fc0fc00)); + const __m128i t0_1 = _mm_and_si128(in1, _mm_set1_epi32(0x0fc0fc00)); + const __m128i t0_2 = _mm_and_si128(in2, _mm_set1_epi32(0x0fc0fc00)); + const __m128i t0_3 = _mm_and_si128(in3, _mm_set1_epi32(0x0fc0fc00)); - const __m256i t1_0 = - _mm256_mulhi_epu16(t0_0, _mm256_set1_epi32(0x04000040)); - const __m256i t1_1 = - _mm256_mulhi_epu16(t0_1, _mm256_set1_epi32(0x04000040)); - const __m256i t1_2 = - _mm256_mulhi_epu16(t0_2, _mm256_set1_epi32(0x04000040)); - const __m256i t1_3 = - _mm256_mulhi_epu16(t0_3, _mm256_set1_epi32(0x04000040)); + const __m128i t1_0 = _mm_mulhi_epu16(t0_0, _mm_set1_epi32(0x04000040)); + const __m128i t1_1 = _mm_mulhi_epu16(t0_1, _mm_set1_epi32(0x04000040)); + const __m128i t1_2 = _mm_mulhi_epu16(t0_2, _mm_set1_epi32(0x04000040)); + const __m128i t1_3 = _mm_mulhi_epu16(t0_3, _mm_set1_epi32(0x04000040)); - const __m256i t2_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x003f03f0)); - const __m256i t2_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x003f03f0)); - const __m256i t2_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x003f03f0)); - const __m256i t2_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x003f03f0)); + const __m128i t2_0 = _mm_and_si128(in0, _mm_set1_epi32(0x003f03f0)); + const __m128i t2_1 = _mm_and_si128(in1, _mm_set1_epi32(0x003f03f0)); + const __m128i t2_2 = _mm_and_si128(in2, _mm_set1_epi32(0x003f03f0)); + const __m128i t2_3 = _mm_and_si128(in3, _mm_set1_epi32(0x003f03f0)); - const __m256i t3_0 = - _mm256_mullo_epi16(t2_0, _mm256_set1_epi32(0x01000010)); - const __m256i t3_1 = - _mm256_mullo_epi16(t2_1, _mm256_set1_epi32(0x01000010)); - const __m256i t3_2 = - _mm256_mullo_epi16(t2_2, _mm256_set1_epi32(0x01000010)); - const __m256i t3_3 = - _mm256_mullo_epi16(t2_3, _mm256_set1_epi32(0x01000010)); + const __m128i t3_0 = _mm_mullo_epi16(t2_0, _mm_set1_epi32(0x01000010)); + const __m128i t3_1 = _mm_mullo_epi16(t2_1, _mm_set1_epi32(0x01000010)); + const __m128i t3_2 = _mm_mullo_epi16(t2_2, _mm_set1_epi32(0x01000010)); + const __m128i t3_3 = _mm_mullo_epi16(t2_3, _mm_set1_epi32(0x01000010)); - const __m256i input0 = _mm256_or_si256(t1_0, t3_0); - const __m256i input1 = _mm256_or_si256(t1_1, t3_1); - const __m256i input2 = _mm256_or_si256(t1_2, t3_2); - const __m256i input3 = _mm256_or_si256(t1_3, t3_3); + const __m128i input0 = _mm_or_si128(t1_0, t3_0); + const __m128i input1 = _mm_or_si128(t1_1, t3_1); + const __m128i input2 = _mm_or_si128(t1_2, t3_2); + const __m128i input3 = _mm_or_si128(t1_3, t3_3); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(input0)); - out += 32; + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), + lookup_pshufb_improved(input0)); + out += 16; - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(input1)); - out += 32; + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), + lookup_pshufb_improved(input1)); + out += 16; - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(input2)); - out += 32; - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(input3)); - out += 32; + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), + lookup_pshufb_improved(input2)); + out += 16; + + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), + lookup_pshufb_improved(input3)); + out += 16; } - for (; i + 28 <= srclen; i += 24) { - // lo = [xxxx|DDDC|CCBB|BAAA] - // hi = [xxxx|HHHG|GGFF|FEEE] - const __m128i lo = - _mm_loadu_si128(reinterpret_cast(input + i)); - const __m128i hi = - _mm_loadu_si128(reinterpret_cast(input + i + 4 * 3)); + for (; i + 16 <= srclen; i += 12) { + + __m128i in = _mm_loadu_si128(reinterpret_cast(input + i)); // bytes from groups A, B and C are needed in separate 32-bit lanes - // in = [0HHH|0GGG|0FFF|0EEE[0DDD|0CCC|0BBB|0AAA] - __m256i in = _mm256_shuffle_epi8(_mm256_set_m128i(hi, lo), shuf); + // in = [DDDD|CCCC|BBBB|AAAA] + // + // an input triplet has layout + // [????????|ccdddddd|bbbbcccc|aaaaaabb] + // byte 3 byte 2 byte 1 byte 0 -- byte 3 comes from the next + // triplet + // + // shuffling changes the order of bytes: 1, 0, 2, 1 + // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] + // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ + // processed bits + in = _mm_shuffle_epi8(in, shuf); - // this part is well commented in encode.sse.cpp + // unpacking - const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00)); - const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); - const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0)); - const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); - const __m256i indices = _mm256_or_si256(t1, t3); + // t0 = [0000cccc|cc000000|aaaaaa00|00000000] + const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); + // t1 = [00000000|00cccccc|00000000|00aaaaaa] + // (c * (1 << 10), a * (1 << 6)) >> 16 (note: an unsigned + // multiplication) + const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(indices)); - out += 32; + // t2 = [00000000|00dddddd|000000bb|bbbb0000] + const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); + // t3 = [00dddddd|00000000|00bbbbbb|00000000]( + // (d * (1 << 8), b * (1 << 4)) + const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); + + // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 + const __m128i indices = _mm_or_si128(t1, t3); + + _mm_storeu_si128(reinterpret_cast<__m128i *>(out), + lookup_pshufb_improved(indices)); + out += 16; } + return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, srclen - i, options); } - static inline void compress(__m128i data, uint16_t mask, char *output) { if (mask == 0) { _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data); return; } + // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits @@ -28197,196 +40806,209 @@ static inline void compress(__m128i data, uint16_t mask, char *output) { __m128i compactmask = _mm_loadu_si128(reinterpret_cast( tables::base64::pshufb_combine_table + pop1 * 8)); __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); } -static inline void compress(__m256i data, uint32_t mask, char *output) { - if (mask == 0) { - _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), data); - return; - } - compress(_mm256_castsi256_si128(data), uint16_t(mask), output); - compress(_mm256_extracti128_si256(data, 1), uint16_t(mask >> 16), - output + _mm_popcnt_u32(~mask & 0xFFFF)); -} - struct block64 { - __m256i chunks[2]; + __m128i chunks[4]; }; template -static inline uint32_t to_base64_mask(__m256i *src, bool *error) { - const __m256i ascii_space_tbl = - _mm256_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, - 0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0); +static inline uint16_t to_base64_mask(__m128i *src, uint32_t *error) { + const __m128i ascii_space_tbl = + _mm_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, + 0xc, 0xd, 0x0, 0x0); // credit: aqrit - __m256i delta_asso; + __m128i delta_asso; if (base64_url) { - delta_asso = - _mm256_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0, - 0x0, 0x0, 0xF, 0x0, 0xF, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF); + delta_asso = _mm_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, + 0x0, 0x0, 0x0, 0xF, 0x0, 0xF); } else { - delta_asso = _mm256_setr_epi8( - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x0F, 0x00, 0x0F, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); - } - __m256i delta_values; + delta_asso = _mm_setr_epi8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); + } + __m128i delta_values; if (base64_url) { - delta_values = _mm256_setr_epi8( - 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9), - uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), uint8_t(0xE0), - uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), - uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), - uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); + delta_values = _mm_setr_epi8(0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), + uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), + 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), + uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); } else { - delta_values = _mm256_setr_epi8( - int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), int8_t(0x04), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), int8_t(0x00), - int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), - int8_t(0x00), int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), - int8_t(0xB9), int8_t(0xB9)); - } - __m256i check_asso; + delta_values = + _mm_setr_epi8(int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)); + } + __m128i check_asso; if (base64_url) { - check_asso = - _mm256_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x3, - 0x7, 0xB, 0xE, 0xB, 0x6, 0xD, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x1, 0x1, 0x1, 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6); + check_asso = _mm_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x3, 0x7, 0xB, 0xE, 0xB, 0x6); } else { - check_asso = _mm256_setr_epi8( - 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, - 0x0B, 0x0B, 0x0B, 0x0F, 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); + check_asso = _mm_setr_epi8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); } - __m256i check_values; + __m128i check_values; if (base64_url) { - check_values = _mm256_setr_epi8( - uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), - uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), uint8_t(0xA6), - uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, uint8_t(0x80), - 0x0, uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), - uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), - uint8_t(0xA6), uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, - uint8_t(0x80), 0x0, uint8_t(0x80)); + check_values = _mm_setr_epi8(uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), + uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), + uint8_t(0xB6), uint8_t(0xA6), uint8_t(0xB5), + uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, + uint8_t(0x80), 0x0, uint8_t(0x80)); } else { - check_values = _mm256_setr_epi8( - int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0xCF), - int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), int8_t(0x86), - int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), int8_t(0x91), - int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), - int8_t(0x86), int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), - int8_t(0x91), int8_t(0x80)); + + check_values = + _mm_setr_epi8(int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), + int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)); } - const __m256i shifted = _mm256_srli_epi32(*src, 3); - const __m256i delta_hash = - _mm256_avg_epu8(_mm256_shuffle_epi8(delta_asso, *src), shifted); - const __m256i check_hash = - _mm256_avg_epu8(_mm256_shuffle_epi8(check_asso, *src), shifted); - const __m256i out = - _mm256_adds_epi8(_mm256_shuffle_epi8(delta_values, delta_hash), *src); - const __m256i chk = - _mm256_adds_epi8(_mm256_shuffle_epi8(check_values, check_hash), *src); - const int mask = _mm256_movemask_epi8(chk); + const __m128i shifted = _mm_srli_epi32(*src, 3); + + const __m128i delta_hash = + _mm_avg_epu8(_mm_shuffle_epi8(delta_asso, *src), shifted); + const __m128i check_hash = + _mm_avg_epu8(_mm_shuffle_epi8(check_asso, *src), shifted); + + const __m128i out = + _mm_adds_epi8(_mm_shuffle_epi8(delta_values, delta_hash), *src); + const __m128i chk = + _mm_adds_epi8(_mm_shuffle_epi8(check_values, check_hash), *src); + const int mask = _mm_movemask_epi8(chk); if (mask) { - __m256i ascii_space = - _mm256_cmpeq_epi8(_mm256_shuffle_epi8(ascii_space_tbl, *src), *src); - *error |= (mask != _mm256_movemask_epi8(ascii_space)); + __m128i ascii_space = + _mm_cmpeq_epi8(_mm_shuffle_epi8(ascii_space_tbl, *src), *src); + *error = (mask ^ _mm_movemask_epi8(ascii_space)); } *src = out; - return (uint32_t)mask; + return (uint16_t)mask; } template -static inline uint64_t to_base64_mask(block64 *b, bool *error) { - *error = 0; - uint64_t m0 = to_base64_mask(&b->chunks[0], error); - uint64_t m1 = to_base64_mask(&b->chunks[1], error); - return m0 | (m1 << 32); +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { + uint32_t err0 = 0; + uint32_t err1 = 0; + uint32_t err2 = 0; + uint32_t err3 = 0; + uint64_t m0 = to_base64_mask(&b->chunks[0], &err0); + uint64_t m1 = to_base64_mask(&b->chunks[1], &err1); + uint64_t m2 = to_base64_mask(&b->chunks[2], &err2); + uint64_t m3 = to_base64_mask(&b->chunks[3], &err3); + *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | + ((uint64_t)err3 << 48); + return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); +} + +#if defined(_MSC_VER) && !defined(__clang__) +static inline size_t simdutf_tzcnt_u64(uint64_t num) { + unsigned long ret; + if (num == 0) { + return 64; + } + _BitScanForward64(&ret, num); + return ret; } +#else // GCC or Clang +static inline size_t simdutf_tzcnt_u64(uint64_t num) { + return num ? __builtin_ctzll(num) : 64; +} +#endif static inline void copy_block(block64 *b, char *output) { - _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), b->chunks[0]); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + 32), b->chunks[1]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), b->chunks[0]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 16), b->chunks[1]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 32), b->chunks[2]); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 48), b->chunks[3]); } static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { uint64_t nmask = ~mask; - compress(b->chunks[0], uint32_t(mask), output); - compress(b->chunks[1], uint32_t(mask >> 32), + compress(b->chunks[0], uint16_t(mask), output); + compress(b->chunks[1], uint16_t(mask >> 16), + output + _mm_popcnt_u64(nmask & 0xFFFF)); + compress(b->chunks[2], uint16_t(mask >> 32), output + _mm_popcnt_u64(nmask & 0xFFFFFFFF)); + compress(b->chunks[3], uint16_t(mask >> 48), + output + _mm_popcnt_u64(nmask & 0xFFFFFFFFFFFFULL)); return _mm_popcnt_u64(nmask); } // The caller of this function is responsible to ensure that there are 64 bytes // available from reading at src. The data is read into a block64 structure. static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = _mm256_loadu_si256(reinterpret_cast(src)); - b->chunks[1] = - _mm256_loadu_si256(reinterpret_cast(src + 32)); + b->chunks[0] = _mm_loadu_si128(reinterpret_cast(src)); + b->chunks[1] = _mm_loadu_si128(reinterpret_cast(src + 16)); + b->chunks[2] = _mm_loadu_si128(reinterpret_cast(src + 32)); + b->chunks[3] = _mm_loadu_si128(reinterpret_cast(src + 48)); } // The caller of this function is responsible to ensure that there are 128 bytes // available from reading at src. The data is read into a block64 structure. static inline void load_block(block64 *b, const char16_t *src) { - __m256i m1 = _mm256_loadu_si256(reinterpret_cast(src)); - __m256i m2 = _mm256_loadu_si256(reinterpret_cast(src + 16)); - __m256i m3 = _mm256_loadu_si256(reinterpret_cast(src + 32)); - __m256i m4 = _mm256_loadu_si256(reinterpret_cast(src + 48)); - __m256i m1p = _mm256_permute2x128_si256(m1, m2, 0x20); - __m256i m2p = _mm256_permute2x128_si256(m1, m2, 0x31); - __m256i m3p = _mm256_permute2x128_si256(m3, m4, 0x20); - __m256i m4p = _mm256_permute2x128_si256(m3, m4, 0x31); - b->chunks[0] = _mm256_packus_epi16(m1p, m2p); - b->chunks[1] = _mm256_packus_epi16(m3p, m4p); + __m128i m1 = _mm_loadu_si128(reinterpret_cast(src)); + __m128i m2 = _mm_loadu_si128(reinterpret_cast(src + 8)); + __m128i m3 = _mm_loadu_si128(reinterpret_cast(src + 16)); + __m128i m4 = _mm_loadu_si128(reinterpret_cast(src + 24)); + __m128i m5 = _mm_loadu_si128(reinterpret_cast(src + 32)); + __m128i m6 = _mm_loadu_si128(reinterpret_cast(src + 40)); + __m128i m7 = _mm_loadu_si128(reinterpret_cast(src + 48)); + __m128i m8 = _mm_loadu_si128(reinterpret_cast(src + 56)); + b->chunks[0] = _mm_packus_epi16(m1, m2); + b->chunks[1] = _mm_packus_epi16(m3, m4); + b->chunks[2] = _mm_packus_epi16(m5, m6); + b->chunks[3] = _mm_packus_epi16(m7, m8); } -static inline void base64_decode(char *out, __m256i str) { +static inline void base64_decode(char *out, __m128i str) { // credit: aqrit - const __m256i pack_shuffle = - _mm256_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, - 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); - const __m256i t0 = _mm256_maddubs_epi16(str, _mm256_set1_epi32(0x01400140)); - const __m256i t1 = _mm256_madd_epi16(t0, _mm256_set1_epi32(0x00011000)); - const __m256i t2 = _mm256_shuffle_epi8(t1, pack_shuffle); + const __m128i pack_shuffle = + _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); + + const __m128i t0 = _mm_maddubs_epi16(str, _mm_set1_epi32(0x01400140)); + const __m128i t1 = _mm_madd_epi16(t0, _mm_set1_epi32(0x00011000)); + const __m128i t2 = _mm_shuffle_epi8(t1, pack_shuffle); // Store the output: - _mm_storeu_si128((__m128i *)out, _mm256_castsi256_si128(t2)); - _mm_storeu_si128((__m128i *)(out + 12), _mm256_extracti128_si256(t2, 1)); + // this writes 16 bytes, but we only need 12. + _mm_storeu_si128((__m128i *)out, t2); } // decode 64 bytes and output 48 bytes static inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, - _mm256_loadu_si256(reinterpret_cast(src))); - base64_decode(out + 24, _mm256_loadu_si256( - reinterpret_cast(src + 32))); + base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); + base64_decode(out + 12, + _mm_loadu_si128(reinterpret_cast(src + 16))); + base64_decode(out + 24, + _mm_loadu_si128(reinterpret_cast(src + 32))); + base64_decode(out + 36, + _mm_loadu_si128(reinterpret_cast(src + 48))); } static inline void base64_decode_block_safe(char *out, const char *src) { - base64_decode(out, - _mm256_loadu_si256(reinterpret_cast(src))); - char buffer[32]; // We enforce safety with a buffer. - base64_decode( - buffer, _mm256_loadu_si256(reinterpret_cast(src + 32))); - std::memcpy(out + 24, buffer, 24); + base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); + base64_decode(out + 12, + _mm_loadu_si128(reinterpret_cast(src + 16))); + base64_decode(out + 24, + _mm_loadu_si128(reinterpret_cast(src + 32))); + char buffer[16]; + base64_decode(buffer, + _mm_loadu_si128(reinterpret_cast(src + 48))); + std::memcpy(out + 36, buffer, 12); } static inline void base64_decode_block(char *out, block64 *b) { base64_decode(out, b->chunks[0]); - base64_decode(out + 24, b->chunks[1]); + base64_decode(out + 12, b->chunks[1]); + base64_decode(out + 24, b->chunks[2]); + base64_decode(out + 36, b->chunks[3]); } static inline void base64_decode_block_safe(char *out, block64 *b) { base64_decode(out, b->chunks[0]); - char buffer[32]; // We enforce safety with a buffer. - base64_decode(buffer, b->chunks[1]); - std::memcpy(out + 24, buffer, 24); + base64_decode(out + 12, b->chunks[1]); + base64_decode(out + 24, b->chunks[2]); + char buffer[16]; + base64_decode(buffer, b->chunks[3]); + std::memcpy(out + 36, buffer, 12); } template @@ -28433,7 +41055,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, const chartype *const srcend = src + srclen; constexpr size_t block_size = 6; - static_assert(block_size >= 2, "block_size must be at least two"); + static_assert(block_size >= 2, "block should of size 2 or more"); char buffer[block_size * 64]; char *bufferptr = buffer; if (srclen >= 64) { @@ -28442,16 +41064,13 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, block64 b; load_block(&b, src); src += 64; - bool error = false; + uint64_t error = 0; uint64_t badcharmask = to_base64_mask(&b, &error); if (error) { src -= 64; - while (src < srcend && scalar::base64::is_eight_byte(*src) && - to_base64[uint8_t(*src)] <= 64) { - src++; - } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; + size_t error_offset = simdutf_tzcnt_u64(error); + return {error_code::INVALID_BASE64_CHARACTER, + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; } if (badcharmask != 0) { // optimization opportunity: check for simple masks like those made of @@ -28492,7 +41111,6 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, // time, otherwise, we should just decode directly. int last_block = (int)((bufferptr - buffer_start) % 64); if (last_block != 0 && srcend - src + last_block >= 64) { - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { uint8_t val = to_base64[uint8_t(*src)]; *bufferptr = char(val); @@ -28552,9 +41170,9 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, if (src < srcend + equalsigns) { full_result r = scalar::base64::base64_tail_decode( dst, src, srcend - src, equalsigns, options, last_chunk_options); + r.input_count += size_t(src - srcinit); if (r.error == error_code::INVALID_BASE64_CHARACTER || r.error == error_code::BASE64_EXTRA_BITS) { - r.input_count += size_t(src - srcinit); return r; } else { r.output_count += size_t(dst - dstinit); @@ -28578,15 +41196,15 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } return {SUCCESS, srclen, size_t(dst - dstinit)}; } -/* end file src/haswell/avx2_base64.cpp */ +/* end file src/westmere/sse_base64.cpp */ } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* begin file src/generic/buf_block_reader.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { // Walks through a buffer in block-sized increments, loading the last part with @@ -28692,12 +41310,12 @@ simdutf_really_inline void buf_block_reader::advance() { } } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/buf_block_reader.h */ /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf8_validation { @@ -28917,12 +41535,12 @@ struct utf8_checker { using utf8_validation::utf8_checker; } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ /* begin file src/generic/utf8_validation/utf8_validator.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf8_validation { @@ -29057,14 +41675,14 @@ result generic_validate_ascii_with_errors(const char *input, size_t length) { } // namespace utf8_validation } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/utf8_validation/utf8_validator.h */ // transcoding from UTF-8 to UTF-16 /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf8_to_utf16 { @@ -29135,13 +41753,13 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace utf8_to_utf16 } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf8_to_utf16 { using namespace simd; @@ -29470,14 +42088,14 @@ struct validating_transcoder { }; // struct utf8_checker } // namespace utf8_to_utf16 } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ // transcoding from UTF-8 to UTF-32 /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf8_to_utf32 { @@ -29516,13 +42134,13 @@ simdutf_warn_unused size_t convert_valid(const char *input, size_t size, } // namespace utf8_to_utf32 } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf8_to_utf32 { using namespace simd; @@ -29837,14 +42455,14 @@ struct validating_transcoder { }; // struct utf8_checker } // namespace utf8_to_utf32 } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ // other functions /* begin file src/generic/utf8.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf8 { @@ -29879,12 +42497,12 @@ simdutf_really_inline size_t utf16_length_from_utf8(const char *in, } } // namespace utf8 } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/utf8.h */ /* begin file src/generic/utf16.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf16 { @@ -29954,15 +42572,14 @@ change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { } // namespace utf16 } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/utf16.h */ - // transcoding from UTF-8 to Latin 1 /* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf8_to_latin1 { using namespace simd; @@ -30272,13 +42889,13 @@ struct validating_transcoder { }; // struct utf8_checker } // namespace utf8_to_latin1 } // unnamed namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf /* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ /* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ namespace simdutf { -namespace haswell { +namespace westmere { namespace { namespace utf8_to_latin1 { using namespace simd; @@ -30352,19 +42969,24 @@ simdutf_really_inline size_t convert_valid(const char *in, size_t size, } // namespace utf8_to_latin1 } // namespace -} // namespace haswell +} // namespace westmere } // namespace simdutf // namespace simdutf /* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +// +// Implementation-specific overrides +// + namespace simdutf { -namespace haswell { +namespace westmere { simdutf_warn_unused int implementation::detect_encodings(const char *input, size_t length) const noexcept { // If there is a BOM, then we trust it. auto bom_encoding = simdutf::BOM::check_bom(input, length); + // todo: reimplement as a one-pass algorithm. if (bom_encoding != encoding_type::unspecified) { return bom_encoding; } @@ -30388,22 +43010,23 @@ implementation::detect_encodings(const char *input, simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return haswell::utf8_validation::generic_validate_utf8(buf, len); + return westmere::utf8_validation::generic_validate_utf8(buf, len); } simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { - return haswell::utf8_validation::generic_validate_utf8_with_errors(buf, len); + return westmere::utf8_validation::generic_validate_utf8_with_errors(buf, len); } simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return haswell::utf8_validation::generic_validate_ascii(buf, len); + return westmere::utf8_validation::generic_validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return haswell::utf8_validation::generic_validate_ascii_with_errors(buf, len); + return westmere::utf8_validation::generic_validate_ascii_with_errors(buf, + len); } simdutf_warn_unused bool @@ -30414,7 +43037,7 @@ implementation::validate_utf16le(const char16_t *buf, // handling nullptr return true; } - const char16_t *tail = avx2_validate_utf16(buf, len); + const char16_t *tail = sse_validate_utf16(buf, len); if (tail) { return scalar::utf16::validate(tail, len - (tail - buf)); @@ -30431,7 +43054,7 @@ implementation::validate_utf16be(const char16_t *buf, // handling nullptr return true; } - const char16_t *tail = avx2_validate_utf16(buf, len); + const char16_t *tail = sse_validate_utf16(buf, len); if (tail) { return scalar::utf16::validate(tail, len - (tail - buf)); } else { @@ -30441,7 +43064,7 @@ implementation::validate_utf16be(const char16_t *buf, simdutf_warn_unused result implementation::validate_utf16le_with_errors( const char16_t *buf, size_t len) const noexcept { - result res = avx2_validate_utf16_with_errors(buf, len); + result res = sse_validate_utf16_with_errors(buf, len); if (res.count != len) { result scalar_res = scalar::utf16::validate_with_errors( buf + res.count, len - res.count); @@ -30453,7 +43076,7 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors( simdutf_warn_unused result implementation::validate_utf16be_with_errors( const char16_t *buf, size_t len) const noexcept { - result res = avx2_validate_utf16_with_errors(buf, len); + result res = sse_validate_utf16_with_errors(buf, len); if (res.count != len) { result scalar_res = scalar::utf16::validate_with_errors( buf + res.count, len - res.count); @@ -30470,7 +43093,7 @@ implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { // handling nullptr return true; } - const char32_t *tail = avx2_validate_utf32le(buf, len); + const char32_t *tail = sse_validate_utf32le(buf, len); if (tail) { return scalar::utf32::validate(tail, len - (tail - buf)); } else { @@ -30480,12 +43103,12 @@ implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { + if (len == 0) { // empty input is valid UTF-32. protect the implementation from // handling nullptr return result(error_code::SUCCESS, 0); } - result res = avx2_validate_utf32le_with_errors(buf, len); + result res = sse_validate_utf32le_with_errors(buf, len); if (res.count != len) { result scalar_res = scalar::utf32::validate_with_errors(buf + res.count, len - res.count); @@ -30497,8 +43120,9 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = - avx2_convert_latin1_to_utf8(buf, len, utf8_output); + sse_convert_latin1_to_utf8(buf, len, utf8_output); size_t converted_chars = ret.second - utf8_output; if (ret.first != buf + len) { @@ -30513,7 +43137,7 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - avx2_convert_latin1_to_utf16(buf, len, utf16_output); + sse_convert_latin1_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -30533,7 +43157,7 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - avx2_convert_latin1_to_utf16(buf, len, utf16_output); + sse_convert_latin1_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -30553,7 +43177,7 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - avx2_convert_latin1_to_utf32(buf, len, utf32_output); + sse_convert_latin1_to_utf32(buf, len, utf32_output); if (ret.first == nullptr) { return 0; } @@ -30582,8 +43206,8 @@ simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( } simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *input, size_t size, char *latin1_output) const noexcept { - return utf8_to_latin1::convert_valid(input, size, latin1_output); + const char *buf, size_t len, char *latin1_output) const noexcept { + return westmere::utf8_to_latin1::convert_valid(buf, len, latin1_output); } simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( @@ -30643,12 +43267,12 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - haswell::avx2_convert_utf16_to_latin1(buf, len, - latin1_output); + sse_convert_utf16_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } size_t saved_bytes = ret.second - latin1_output; + if (ret.first != buf + len) { const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert( @@ -30664,12 +43288,12 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - haswell::avx2_convert_utf16_to_latin1(buf, len, - latin1_output); + sse_convert_utf16_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } size_t saved_bytes = ret.second - latin1_output; + if (ret.first != buf + len) { const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert( @@ -30686,7 +43310,7 @@ simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - avx2_convert_utf16_to_latin1_with_errors( + sse_convert_utf16_to_latin1_with_errors( buf, len, latin1_output); if (ret.first.error) { return ret.first; @@ -30713,8 +43337,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - avx2_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); + sse_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -30738,21 +43362,20 @@ implementation::convert_utf16be_to_latin1_with_errors( simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function + // optimization opportunity: we could provide an optimized function. return convert_utf16be_to_latin1(buf, len, latin1_output); } simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function + // optimization opportunity: we could provide an optimized function. return convert_utf16le_to_latin1(buf, len, latin1_output); } simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - haswell::avx2_convert_utf16_to_utf8(buf, len, - utf8_output); + sse_convert_utf16_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -30772,8 +43395,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - haswell::avx2_convert_utf16_to_utf8(buf, len, - utf8_output); + sse_convert_utf16_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -30795,7 +43417,7 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - haswell::avx2_convert_utf16_to_utf8_with_errors( + westmere::sse_convert_utf16_to_utf8_with_errors( buf, len, utf8_output); if (ret.first.error) { return ret.first; @@ -30823,7 +43445,7 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - haswell::avx2_convert_utf16_to_utf8_with_errors( + westmere::sse_convert_utf16_to_utf8_with_errors( buf, len, utf8_output); if (ret.first.error) { return ret.first; @@ -30856,34 +43478,16 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( return convert_utf16be_to_utf8(buf, len, utf8_output); } -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - avx2_convert_utf32_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - avx2_convert_utf32_to_latin1(buf, len, latin1_output); + sse_convert_utf32_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } size_t saved_bytes = ret.second - latin1_output; - if (ret.first != buf + len) { + // if (ret.first != buf + len) { + if (ret.first < buf + len) { const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( ret.first, len - (ret.first - buf), ret.second); if (scalar_saved_bytes == 0) { @@ -30899,7 +43503,8 @@ simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - avx2_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + westmere::sse_convert_utf32_to_latin1_with_errors(buf, len, + latin1_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_latin1::convert_with_errors( buf + ret.first.count, len - ret.first.count, ret.second); @@ -30918,15 +43523,35 @@ simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( const char32_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: we could provide an optimized function. return convert_utf32_to_latin1(buf, len, latin1_output); } +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + sse_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( const char32_t *buf, size_t len, char *utf8_output) const noexcept { // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - haswell::avx2_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + westmere::sse_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf8::convert_with_errors( buf + ret.first.count, len - ret.first.count, ret.second); @@ -30946,8 +43571,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - haswell::avx2_convert_utf16_to_utf32(buf, len, - utf32_output); + sse_convert_utf16_to_utf32(buf, len, utf32_output); if (ret.first == nullptr) { return 0; } @@ -30967,8 +43591,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - haswell::avx2_convert_utf16_to_utf32(buf, len, - utf32_output); + sse_convert_utf16_to_utf32(buf, len, utf32_output); if (ret.first == nullptr) { return 0; } @@ -30990,7 +43613,7 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - haswell::avx2_convert_utf16_to_utf32_with_errors( + westmere::sse_convert_utf16_to_utf32_with_errors( buf, len, utf32_output); if (ret.first.error) { return ret.first; @@ -31018,7 +43641,7 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - haswell::avx2_convert_utf16_to_utf32_with_errors( + westmere::sse_convert_utf16_to_utf32_with_errors( buf, len, utf32_output); if (ret.first.error) { return ret.first; @@ -31049,7 +43672,7 @@ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - avx2_convert_utf32_to_utf16(buf, len, utf16_output); + sse_convert_utf32_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -31069,7 +43692,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - avx2_convert_utf32_to_utf16(buf, len, utf16_output); + sse_convert_utf32_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -31091,7 +43714,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - haswell::avx2_convert_utf32_to_utf16_with_errors( + westmere::sse_convert_utf32_to_utf16_with_errors( buf, len, utf16_output); if (ret.first.count != len) { result scalar_res = @@ -31115,7 +43738,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - haswell::avx2_convert_utf32_to_utf16_with_errors( + westmere::sse_convert_utf32_to_utf16_with_errors( buf, len, utf16_output); if (ret.first.count != len) { result scalar_res = @@ -31200,6 +43823,76 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( return utf16::utf8_length_from_utf16(input, length); } +simdutf_warn_unused size_t +implementation::utf16_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf16_length_from_latin1(length); +} + +simdutf_warn_unused size_t +implementation::utf32_length_from_latin1(size_t length) const noexcept { + return scalar::latin1::utf32_length_from_latin1(length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t len) const noexcept { + const uint8_t *str = reinterpret_cast(input); + size_t answer = len / sizeof(__m128i) * sizeof(__m128i); + size_t i = 0; + if (answer >= 2048) { // long strings optimization + __m128i two_64bits = _mm_setzero_si128(); + while (i + sizeof(__m128i) <= len) { + __m128i runner = _mm_setzero_si128(); + size_t iterations = (len - i) / sizeof(__m128i); + if (iterations > 255) { + iterations = 255; + } + size_t max_i = i + iterations * sizeof(__m128i) - sizeof(__m128i); + for (; i + 4 * sizeof(__m128i) <= max_i; i += 4 * sizeof(__m128i)) { + __m128i input1 = _mm_loadu_si128((const __m128i *)(str + i)); + __m128i input2 = + _mm_loadu_si128((const __m128i *)(str + i + sizeof(__m128i))); + __m128i input3 = + _mm_loadu_si128((const __m128i *)(str + i + 2 * sizeof(__m128i))); + __m128i input4 = + _mm_loadu_si128((const __m128i *)(str + i + 3 * sizeof(__m128i))); + __m128i input12 = + _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input1), + _mm_cmpgt_epi8(_mm_setzero_si128(), input2)); + __m128i input34 = + _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input3), + _mm_cmpgt_epi8(_mm_setzero_si128(), input4)); + __m128i input1234 = _mm_add_epi8(input12, input34); + runner = _mm_sub_epi8(runner, input1234); + } + for (; i <= max_i; i += sizeof(__m128i)) { + __m128i more_input = _mm_loadu_si128((const __m128i *)(str + i)); + runner = _mm_sub_epi8(runner, + _mm_cmpgt_epi8(_mm_setzero_si128(), more_input)); + } + two_64bits = + _mm_add_epi64(two_64bits, _mm_sad_epu8(runner, _mm_setzero_si128())); + } + answer += + _mm_extract_epi64(two_64bits, 0) + _mm_extract_epi64(two_64bits, 1); + } else if (answer > 0) { // short string optimization + for (; i + 2 * sizeof(__m128i) <= len; i += 2 * sizeof(__m128i)) { + __m128i latin = _mm_loadu_si128((const __m128i *)(input + i)); + uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin); + answer += count_ones(non_ascii); + latin = _mm_loadu_si128((const __m128i *)(input + i) + 1); + non_ascii = (uint16_t)_mm_movemask_epi8(latin); + answer += count_ones(non_ascii); + } + for (; i + sizeof(__m128i) <= len; i += sizeof(__m128i)) { + __m128i latin = _mm_loadu_si128((const __m128i *)(input + i)); + uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin); + answer += count_ones(non_ascii); + } + } + return answer + scalar::latin1::utf8_length_from_latin1( + reinterpret_cast(str + i), len - i); +} + simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( const char16_t *input, size_t length) const noexcept { return utf16::utf32_length_from_utf16(input, length); @@ -31210,108 +43903,42 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( return utf16::utf32_length_from_utf16(input, length); } -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); -} - simdutf_warn_unused size_t implementation::utf16_length_from_utf8( const char *input, size_t length) const noexcept { return utf8::utf16_length_from_utf8(input, length); } -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t len) const noexcept { - const uint8_t *data = reinterpret_cast(input); - size_t answer = len / sizeof(__m256i) * sizeof(__m256i); - size_t i = 0; - if (answer >= 2048) { // long strings optimization - __m256i four_64bits = _mm256_setzero_si256(); - while (i + sizeof(__m256i) <= len) { - __m256i runner = _mm256_setzero_si256(); - // We can do up to 255 loops without overflow. - size_t iterations = (len - i) / sizeof(__m256i); - if (iterations > 255) { - iterations = 255; - } - size_t max_i = i + iterations * sizeof(__m256i) - sizeof(__m256i); - for (; i + 4 * sizeof(__m256i) <= max_i; i += 4 * sizeof(__m256i)) { - __m256i input1 = _mm256_loadu_si256((const __m256i *)(data + i)); - __m256i input2 = - _mm256_loadu_si256((const __m256i *)(data + i + sizeof(__m256i))); - __m256i input3 = _mm256_loadu_si256( - (const __m256i *)(data + i + 2 * sizeof(__m256i))); - __m256i input4 = _mm256_loadu_si256( - (const __m256i *)(data + i + 3 * sizeof(__m256i))); - __m256i input12 = - _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input1), - _mm256_cmpgt_epi8(_mm256_setzero_si256(), input2)); - __m256i input23 = - _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input3), - _mm256_cmpgt_epi8(_mm256_setzero_si256(), input4)); - __m256i input1234 = _mm256_add_epi8(input12, input23); - runner = _mm256_sub_epi8(runner, input1234); - } - for (; i <= max_i; i += sizeof(__m256i)) { - __m256i input_256_chunk = - _mm256_loadu_si256((const __m256i *)(data + i)); - runner = _mm256_sub_epi8( - runner, _mm256_cmpgt_epi8(_mm256_setzero_si256(), input_256_chunk)); - } - four_64bits = _mm256_add_epi64( - four_64bits, _mm256_sad_epu8(runner, _mm256_setzero_si256())); - } - answer += _mm256_extract_epi64(four_64bits, 0) + - _mm256_extract_epi64(four_64bits, 1) + - _mm256_extract_epi64(four_64bits, 2) + - _mm256_extract_epi64(four_64bits, 3); - } else if (answer > 0) { - for (; i + sizeof(__m256i) <= len; i += sizeof(__m256i)) { - __m256i latin = _mm256_loadu_si256((const __m256i *)(data + i)); - uint32_t non_ascii = _mm256_movemask_epi8(latin); - answer += count_ones(non_ascii); - } - } - return answer + scalar::latin1::utf8_length_from_latin1( - reinterpret_cast(data + i), len - i); -} - simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffffff80 = _mm256_set1_epi32((uint32_t)0xffffff80); - const __m256i v_fffff800 = _mm256_set1_epi32((uint32_t)0xfffff800); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m128i v_00000000 = _mm_setzero_si128(); + const __m128i v_ffffff80 = _mm_set1_epi32((uint32_t)0xffffff80); + const __m128i v_fffff800 = _mm_set1_epi32((uint32_t)0xfffff800); + const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); size_t pos = 0; size_t count = 0; - for (; pos + 8 <= length; pos += 8) { - __m256i in = _mm256_loadu_si256((__m256i *)(input + pos)); - const __m256i ascii_bytes_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffffff80), v_00000000); - const __m256i one_two_bytes_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_fffff800), v_00000000); - const __m256i two_bytes_bytemask = - _mm256_xor_si256(one_two_bytes_bytemask, ascii_bytes_bytemask); - const __m256i one_two_three_bytes_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const __m256i three_bytes_bytemask = - _mm256_xor_si256(one_two_three_bytes_bytemask, one_two_bytes_bytemask); - const uint32_t ascii_bytes_bitmask = - static_cast(_mm256_movemask_epi8(ascii_bytes_bytemask)); - const uint32_t two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(two_bytes_bytemask)); - const uint32_t three_bytes_bitmask = - static_cast(_mm256_movemask_epi8(three_bytes_bytemask)); + for (; pos + 4 <= length; pos += 4) { + __m128i in = _mm_loadu_si128((__m128i *)(input + pos)); + const __m128i ascii_bytes_bytemask = + _mm_cmpeq_epi32(_mm_and_si128(in, v_ffffff80), v_00000000); + const __m128i one_two_bytes_bytemask = + _mm_cmpeq_epi32(_mm_and_si128(in, v_fffff800), v_00000000); + const __m128i two_bytes_bytemask = + _mm_xor_si128(one_two_bytes_bytemask, ascii_bytes_bytemask); + const __m128i one_two_three_bytes_bytemask = + _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000); + const __m128i three_bytes_bytemask = + _mm_xor_si128(one_two_three_bytes_bytemask, one_two_bytes_bytemask); + const uint16_t ascii_bytes_bitmask = + static_cast(_mm_movemask_epi8(ascii_bytes_bytemask)); + const uint16_t two_bytes_bitmask = + static_cast(_mm_movemask_epi8(two_bytes_bytemask)); + const uint16_t three_bytes_bitmask = + static_cast(_mm_movemask_epi8(three_bytes_bytemask)); size_t ascii_count = count_ones(ascii_bytes_bitmask) / 4; size_t two_bytes_count = count_ones(two_bytes_bitmask) / 4; size_t three_bytes_count = count_ones(three_bytes_bitmask) / 4; - count += 32 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; + count += 16 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; } return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); @@ -31319,18 +43946,18 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf32( simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); + const __m128i v_00000000 = _mm_setzero_si128(); + const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); size_t pos = 0; size_t count = 0; - for (; pos + 8 <= length; pos += 8) { - __m256i in = _mm256_loadu_si256((__m256i *)(input + pos)); - const __m256i surrogate_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const uint32_t surrogate_bitmask = - static_cast(_mm256_movemask_epi8(surrogate_bytemask)); - size_t surrogate_count = (32 - count_ones(surrogate_bitmask)) / 4; - count += 8 + surrogate_count; + for (; pos + 4 <= length; pos += 4) { + __m128i in = _mm_loadu_si128((__m128i *)(input + pos)); + const __m128i surrogate_bytemask = + _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000); + const uint16_t surrogate_bitmask = + static_cast(_mm_movemask_epi8(surrogate_bytemask)); + size_t surrogate_count = (16 - count_ones(surrogate_bitmask)) / 4; + count += 4 + surrogate_count; } return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); @@ -31381,3597 +44008,5852 @@ simdutf_warn_unused result implementation::base64_to_binary( last_chunk_options); } -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - return (options & base64_url) - ? compress_decode_base64(output, input, length, options, - last_chunk_options) - : compress_decode_base64(output, input, length, options, - last_chunk_options); -} +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + +simdutf_warn_unused size_t implementation::base64_length_from_binary( + size_t length, base64_options options) const noexcept { + return scalar::base64::base64_length_from_binary(length, options); +} + +size_t implementation::binary_to_base64(const char *input, size_t length, + char *output, + base64_options options) const noexcept { + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } +} +} // namespace westmere +} // namespace simdutf + +/* begin file src/simdutf/westmere/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + +/* end file src/simdutf/westmere/end.h */ +/* end file src/westmere/implementation.cpp */ +#endif +#if SIMDUTF_IMPLEMENTATION_LSX +/* begin file src/lsx/implementation.cpp */ +/* begin file src/simdutf/lsx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lsx" +// #define SIMDUTF_IMPLEMENTATION lsx +/* end file src/simdutf/lsx/begin.h */ +namespace simdutf { +namespace lsx { +namespace { +#ifndef SIMDUTF_LSX_H + #error "lsx.h must be included" +#endif +using namespace simd; + +// convert vmskltz/vmskgez/vmsknz to +// simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index +const uint8_t lsx_1_2_utf8_bytes_mask[] = { + 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, + 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, + 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, + 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, + 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, + 101, 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99, + 102, 103, 114, 115, 118, 119, 40, 41, 44, 45, 56, 57, 60, 61, 104, + 105, 108, 109, 120, 121, 124, 125, 42, 43, 46, 47, 58, 59, 62, 63, + 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148, + 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147, + 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152, + 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143, + 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164, + 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163, + 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168, + 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, + 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, + 255}; + +simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { + // const v16u8 shuf = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; + // return __lsx_vshuf_b(__lsx_vldi(0), vec, shuf); + return __lsx_vshuf4i_b(vec, 0b10110001); + // return __lsx_vor_v(__lsx_vslli_h(vec, 8), __lsx_vsrli_h(vec, 8)); +} + +simdutf_really_inline bool is_ascii(const simd8x64 &input) { + return input.is_ascii(); +} + +simdutf_unused simdutf_really_inline simd8 +must_be_continuation(const simd8 prev1, const simd8 prev2, + const simd8 prev3) { + simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller + // is using ^ as well. This will work fine because we only have to report + // errors for cases with 0-1 lead bytes. Multiple lead bytes implies 2 + // overlapping multibyte characters, and if that happens, there is guaranteed + // to be at least *one* lead byte that is part of only 1 other multibyte + // character. The error will be detected there. + return is_second_byte ^ is_third_byte ^ is_fourth_byte; +} + +simdutf_really_inline simd8 +must_be_2_3_continuation(const simd8 prev2, + const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + return is_third_byte ^ is_fourth_byte; +} + +// common functions for utf8 conversions +simdutf_really_inline __m128i convert_utf8_3_byte_to_utf16(__m128i in) { + // Low half contains 10bbbbbb|10cccccc + // High half contains 1110aaaa|1110aaaa + const v16u8 sh = {2, 1, 5, 4, 8, 7, 11, 10, 0, 0, 3, 3, 6, 6, 9, 9}; + const v8u16 v0fff = {0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff}; + + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, (__m128i)sh); + // 1110aaaa => aaaa0000 + __m128i perm_high = __lsx_vslli_b(__lsx_vbsrl_v(perm, 8), 4); + // 10bbbbbb 10cccccc => 0010bbbb bbcccccc + __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), /* perm >> 2*/ + perm, __lsx_vrepli_h(0x3f) /* 0x003f */); + // 0010bbbb bbcccccc => aaaabbbb bbcccccc + composed = __lsx_vbitsel_v(perm_high, composed, (__m128i)v0fff); + + return composed; +} + +simdutf_really_inline __m128i convert_utf8_2_byte_to_utf16(__m128i in) { + // 10bbbbb 110aaaaa => 00bbbbb 000aaaaa + __m128i composed = __lsx_vand_v(in, __lsx_vldi(0x3f)); + // 00bbbbbb 000aaaaa => 00000aaa aabbbbbb + composed = __lsx_vbitsel_v( + __lsx_vsrli_h(__lsx_vslli_h(composed, 8), 2), /* (aaaaa << 8) >> 2 */ + __lsx_vsrli_h(composed, 8), /* bbbbbb >> 8 */ + __lsx_vrepli_h(0x3f)); /* 0x003f */ + return composed; +} + +simdutf_really_inline __m128i +convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { + // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. + // This is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. + __m128i sh = + __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx]), + 0); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000000 00bbbbbb + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_h(0x7f)); // 6 or 7 bits + // 1 byte: 00000000 00000000 + // 2 byte: 00000aaa aa000000 + const __m128i v1f00 = __lsx_vldi(-2785); // -2785(13bit) => 151f + __m128i composed = __lsx_vsrli_h(__lsx_vand_v(perm, v1f00), 2); // 5 bits + // Combine with a shift right accumulate + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000aaa aabbbbbb + composed = __lsx_vadd_h(ascii, composed); + return composed; +} + +/* begin file src/lsx/lsx_validate_utf16.cpp */ +/* + In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. + + In a vectorized algorithm we want to examine the most significant + nibble in order to select a fast path. If none of highest nibbles + are 0xD (13), than we are sure that UTF-16 chunk in a vector + register is valid. + + Let us analyze what we need to check if the nibble is 0xD. The + value of the preceding nibble determines what we have: + + 0xd000 .. 0xd7ff - a valid word + 0xd800 .. 0xdbff - low surrogate + 0xdc00 .. 0xdfff - high surrogate + + Other constraints we have to consider: + - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) + - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) + - there must not be sole low surrogate nor high surrogate + + We're going to build three bitmasks based on the 3rd nibble: + - V = valid word, + - L = low surrogate (0xd800 .. 0xdbff) + - H = high surrogate (0xdc00 .. 0xdfff) + + 0 1 2 3 4 5 6 7 <--- word index + [ V | L | H | L | H | V | V | L ] + 1 0 0 0 0 1 1 0 - V = valid masks + 0 1 0 1 0 0 0 1 - L = low surrogate + 0 0 1 0 1 0 0 0 - H high surrogate + + + 1 0 0 0 0 1 1 0 V = valid masks + 0 1 0 1 0 0 0 0 a = L & (H >> 1) + 0 0 1 0 1 0 0 0 b = a << 1 + 1 1 1 1 1 1 1 0 c = V | a | b + ^ + the last bit can be zero, we just consume 7 + code units and recheck this word in the next iteration +*/ + +/* Returns: + - pointer to the last unprocessed character (a scalar fallback should check + the rest); + - nullptr if an error was detected. +*/ +template +const char16_t *lsx_validate_utf16(const char16_t *input, size_t size) { + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + if (big_endian) { + in0 = in0.swap_bytes(); + in1 = in1.swap_bytes(); + } + const auto in = simd8(__lsx_vssrlni_bu_h(in1.value, in0.value, 8)); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher word) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return nullptr; + } + } + } + + return input; +} + +template +const result lsx_validate_utf16_with_errors(const char16_t *input, + size_t size) { + const char16_t *start = input; + const char16_t *end = input + size; + + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + + while (input + simd16::SIZE * 2 < end) { + // 0. Load data: since the validation takes into account only higher + // byte of each word, we compress the two vectors into one which + // consists only the higher bytes. + auto in0 = simd16(input); + auto in1 = + simd16(input + simd16::SIZE / sizeof(char16_t)); + + if (big_endian) { + in0 = in0.swap_bytes(); + in1 = in1.swap_bytes(); + } + + const auto in = simd8(__lsx_vssrlni_bu_h(in1.value, in0.value, 8)); + + // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). + const auto surrogates_wordmask = (in & v_f8) == v_d8; + const uint16_t surrogates_bitmask = + static_cast(surrogates_wordmask.to_bitmask()); + if (surrogates_bitmask == 0x0000) { + input += 16; + } else { + // 2. We have some surrogates that have to be distinguished: + // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) + // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) + // + // Fact: high surrogate has 11th bit set (3rd bit in the higher word) + + // V - non-surrogate code units + // V = not surrogates_wordmask + const uint16_t V = static_cast(~surrogates_bitmask); + + // H - word-mask for high surrogates: the six highest bits are 0b1101'11 + const auto vH = (in & v_fc) == v_dc; + const uint16_t H = static_cast(vH.to_bitmask()); + + // L - word mask for low surrogates + // L = not H and surrogates_wordmask + const uint16_t L = static_cast(~H & surrogates_bitmask); + + const uint16_t a = static_cast( + L & (H >> 1)); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint16_t b = static_cast( + a << 1); // Just mark that the opinput - startite fact is hold, + // thanks to that we have only two masks for valid case. + const uint16_t c = static_cast( + V | a | b); // Combine all the masks into the final one. + + if (c == 0xffff) { + // The whole input register contains valid UTF-16, i.e., + // either single code units or proper surrogate pairs. + input += 16; + } else if (c == 0x7fff) { + // The 15 lower code units of the input register contains valid UTF-16. + // The 15th word may be either a low or high surrogate. It the next + // iteration we 1) check if the low surrogate is followed by a high + // one, 2) reject sole high surrogate. + input += 15; + } else { + return result(error_code::SURROGATE, input - start); + } + } + } + + return result(error_code::SUCCESS, input - start); +} +/* end file src/lsx/lsx_validate_utf16.cpp */ +/* begin file src/lsx/lsx_validate_utf32le.cpp */ + +const char32_t *lsx_validate_utf32le(const char32_t *input, size_t size) { + const char32_t *end = input + size; + + __m128i offset = __lsx_vreplgr2vr_w(uint32_t(0xffff2000)); + __m128i standardoffsetmax = __lsx_vreplgr2vr_w(uint32_t(0xfffff7ff)); + __m128i standardmax = __lsx_vldi(-2288); /*0x10ffff*/ + __m128i currentmax = __lsx_vldi(0x0); + __m128i currentoffsetmax = __lsx_vldi(0x0); + + while (input + 4 < end) { + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + currentmax = __lsx_vmax_wu(in, currentmax); + // 0xD8__ + 0x2000 = 0xF8__ => 0xF8__ > 0xF7FF + currentoffsetmax = + __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); + + input += 4; + } + + __m128i is_zero = + __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); + if (__lsx_bnz_v(is_zero)) { + return nullptr; + } + + is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lsx_bnz_v(is_zero)) { + return nullptr; + } + + return input; +} + +const result lsx_validate_utf32le_with_errors(const char32_t *input, + size_t size) { + const char32_t *start = input; + const char32_t *end = input + size; + + __m128i offset = __lsx_vreplgr2vr_w(uint32_t(0xffff2000)); + __m128i standardoffsetmax = __lsx_vreplgr2vr_w(uint32_t(0xfffff7ff)); + __m128i standardmax = __lsx_vldi(-2288); /*0x10ffff*/ + __m128i currentmax = __lsx_vldi(0x0); + __m128i currentoffsetmax = __lsx_vldi(0x0); + + while (input + 4 < end) { + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + currentmax = __lsx_vmax_wu(in, currentmax); + currentoffsetmax = + __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); + + __m128i is_zero = + __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); + if (__lsx_bnz_v(is_zero)) { + return result(error_code::TOO_LARGE, input - start); + } + + is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lsx_bnz_v(is_zero)) { + return result(error_code::SURROGATE, input - start); + } + + input += 4; + } + + return result(error_code::SUCCESS, input - start); +} +/* end file src/lsx/lsx_validate_utf32le.cpp */ + +/* begin file src/lsx/lsx_convert_latin1_to_utf8.cpp */ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ + +std::pair +lsx_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char *end = latin1_input + len; + + __m128i zero = __lsx_vldi(0); + // We always write 16 bytes, of which more than the first 8 bytes + // are valid. A safety margin of 8 is more than sufficient. + while (latin1_input + 16 <= end) { + __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); + uint32_t ascii = __lsx_vpickve2gr_hu(__lsx_vmskgez_b(in8), 0); + if (ascii == 0xffff) { // ASCII fast path!!!! + __lsx_vst(in8, utf8_output, 0); + utf8_output += 16; + latin1_input += 16; + continue; + } + // We just fallback on UTF-16 code. This could be optimized/simplified + // further. + __m128i in16 = __lsx_vilvl_b(zero, in8); + // 1. prepare 2-byte values + // input 8-bit word : [aabb|bbbb] x 8 + // expected output : [1100|00aa|10bb|bbbb] x 8 + // t0 = [0000|00aa|bbbb|bb00] + __m128i t0 = __lsx_vslli_h(in16, 2); + // t1 = [0000|00aa|0000|0000] + __m128i t1 = __lsx_vand_v(t0, __lsx_vldi(-2785)); + // t3 = [0000|00aa|00bb|bbbb] + __m128i t2 = __lsx_vbitsel_v(t1, in16, __lsx_vrepli_h(0x3f)); + // t4 = [1100|00aa|10bb|bbbb] + __m128i t3 = __lsx_vor_v(t2, __lsx_vreplgr2vr_h(uint16_t(0xc080))); + // merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = __lsx_vsle_hu(in16, __lsx_vrepli_h(0x7F)); + __m128i utf8_unpacked = __lsx_vbitsel_v(t3, in16, one_byte_bytemask); + + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[(ascii & 0xff)]][0]; + __m128i shuffle = __lsx_vld(row + 1, 0); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + + // store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + // adjust pointers + latin1_input += 8; + utf8_output += row[0]; + + } // while + + return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); +} +/* end file src/lsx/lsx_convert_latin1_to_utf8.cpp */ +/* begin file src/lsx/lsx_convert_latin1_to_utf16.cpp */ +std::pair +lsx_convert_latin1_to_utf16le(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + + __m128i zero = __lsx_vldi(0); + while (buf + 16 <= end) { + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); + __m128i inlow = __lsx_vilvl_b(zero, in8); + __m128i inhigh = __lsx_vilvh_b(zero, in8); + __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); + __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); + + utf16_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf16_output); } -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - if (options & base64_url) { - return encode_base64(output, input, length, options); - } else { - return encode_base64(output, input, length, options); +std::pair +lsx_convert_latin1_to_utf16be(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + __m128i zero = __lsx_vldi(0); + while (buf + 16 <= end) { + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i inlow = __lsx_vilvl_b(in8, zero); + __m128i inhigh = __lsx_vilvh_b(in8, zero); + __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); + __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); + utf16_output += 16; + buf += 16; } + + return std::make_pair(buf, utf16_output); } -} // namespace haswell -} // namespace simdutf +/* end file src/lsx/lsx_convert_latin1_to_utf16.cpp */ +/* begin file src/lsx/lsx_convert_latin1_to_utf32.cpp */ +std::pair +lsx_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { + const char *end = buf + len; -/* begin file src/simdutf/haswell/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif + while (buf + 16 <= end) { + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, in8); + __m128i in16high = __lsx_vilvh_b(zero, in8); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_POP_DISABLE_WARNINGS -#endif // end of workaround -/* end file src/simdutf/haswell/end.h */ -/* end file src/haswell/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_PPC64 -/* begin file src/ppc64/implementation.cpp */ + __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); + __lsx_vst(in32_1, reinterpret_cast(utf32_output + 4), 0); + __lsx_vst(in32_2, reinterpret_cast(utf32_output + 8), 0); + __lsx_vst(in32_3, reinterpret_cast(utf32_output + 12), 0); + utf32_output += 16; + buf += 16; + } + return std::make_pair(buf, utf32_output); +} +/* end file src/lsx/lsx_convert_latin1_to_utf32.cpp */ +/* begin file src/lsx/lsx_convert_utf8_to_utf16.cpp */ +// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 16, usually 12). +template +size_t convert_masked_utf8_to_utf16(const char *input, + uint64_t utf8_end_of_code_point_mask, + char16_t *&utf16_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { + // We process in chunks of 16 bytes + // The routine in simd.h is reused. + simd8 temp{in}; + temp.store_ascii_as_utf16(utf16_output); + utf16_output += 16; // We wrote 16 16-bit characters. + return 16; // We consumed 16 bytes. + } -/* begin file src/simdutf/ppc64/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "ppc64" -// #define SIMDUTF_IMPLEMENTATION ppc64 -/* end file src/simdutf/ppc64/begin.h */ -namespace simdutf { -namespace ppc64 { -namespace { -#ifndef SIMDUTF_PPC64_H - #error "ppc64.h must be included" -#endif -using namespace simd; + uint64_t buffer[2]; + // 3 byte sequences are the next most common, as seen in CJK, which has long + // sequences of these. + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. + __m128i composed = convert_utf8_3_byte_to_utf16(in); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } -simdutf_really_inline bool is_ascii(const simd8x64 &input) { - // careful: 0x80 is not ascii. - return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere(); -} + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 4; // We wrote 4 16-bit characters. + return 12; // We consumed 12 bytes. + } -simdutf_unused simdutf_really_inline simd8 -must_be_continuation(const simd8 prev1, const simd8 prev2, - const simd8 prev3) { - simd8 is_second_byte = - prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 - simd8 is_third_byte = - prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = - prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction - // will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > - int8_t(0); -} + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xAAAA) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte + // UTF-16 code units. + __m128i composed = convert_utf8_2_byte_to_utf16(in); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } -simdutf_really_inline simd8 -must_be_2_3_continuation(const simd8 prev2, - const simd8 prev3) { - simd8 is_third_byte = - prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80 - simd8 is_fourth_byte = - prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80 - // Caller requires a bool (all 1's). All values resulting from the subtraction - // will be <= 64, so signed comparison is fine. - return simd8(is_third_byte | is_fourth_byte); -} + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 6; // We wrote 6 16-bit characters. + return 12; // We consumed 12 bytes. + } -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; -/* begin file src/generic/buf_block_reader.h */ -namespace simdutf { -namespace ppc64 { -namespace { + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + const __m128i zero = __lsx_vldi(0); + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + __m128i composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + // Store + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 6; // We wrote 6 16-bit characters. + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // XXX: depending on the system scalar instructions might be faster. + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // 1 byte: 00000000 0ccccccc + // 2 byte: xx0bbbbb x0cccccc + // 3 byte: xxbbbbbb x0cccccc + __m128i lowperm = __lsx_vpickev_h(perm, perm); + // 1 byte: 00000000 00000000 + // 2 byte: 00000000 00000000 + // 3 byte: 00000000 1110aaaa + __m128i highperm = __lsx_vpickod_h(perm, perm); + // 3 byte: aaaa0000 00000000 + highperm = __lsx_vslli_h(highperm, 12); + // ASCII + // 1 byte: 00000000 0ccccccc + // 2+byte: 00000000 00cccccc + __m128i ascii = __lsx_vand_v(lowperm, __lsx_vrepli_h(0x7f)); + // 1 byte: 00000000 00000000 + // 2 byte: xx0bbbbb 00000000 + // 3 byte: xxbbbbbb 00000000 + __m128i middlebyte = __lsx_vand_v(lowperm, __lsx_vldi(-2561) /*0xFF00*/); + // 1 byte: 00000000 0ccccccc + // 2 byte: 0010bbbb bbcccccc + // 3 byte: 0010bbbb bbcccccc + __m128i composed = __lsx_vor_v(__lsx_vsrli_h(middlebyte, 2), ascii); -// Walks through a buffer in block-sized increments, loading the last part with -// spaces -template struct buf_block_reader { -public: - simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdutf_really_inline size_t block_index(); - simdutf_really_inline bool has_full_block() const; - simdutf_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 - * (in which case this function fills the buffer with spaces and returns 0. In - * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder - * block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdutf_really_inline size_t get_remainder(uint8_t *dst) const; - simdutf_really_inline void advance(); + __m128i v0fff = __lsx_vreplgr2vr_h(uint16_t(0xfff)); + // aaaabbbb bbcccccc + composed = __lsx_vbitsel_v(highperm, composed, v0fff); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text_64(const uint8_t *text) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 4; // We wrote 4 16-bit codepoints + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but + // it is easier when we can assume they are all pairs. This version does + // not use the LUT, but 4 byte sequences are less common and the overhead + // of the extra memory access is less important than the early branch + // overhead in shorter sequences. -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text(const simd8x64 &in) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - in.store(reinterpret_cast(buf)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - if (buf[i] < ' ') { - buf[i] = '_'; + // Swap byte pairs + // 10dddddd 10cccccc|10bbbbbb 11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + __m128i swap = lsx_swap_bytes(in); + // Shift left 2 bits + // cccccc00 dddddd00 xxxxxxxx bbbbbb00 + __m128i shift = __lsx_vslli_b(swap, 2); + // Create a magic number containing the low 2 bits of the trail surrogate + // and all the corrections needed to create the pair. UTF-8 4b prefix = + // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) + // surrogate high = +0x0000|0xD800 + // surrogate low = +0xDC00|0x0000 + // ------------------------------- + // = +0xDC00|0xE7C0 + __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xDC00E7C0)); + // Generate unadjusted trail surrogate minus lowest 2 bits + // vec(0000FF00) = __lsx_vldi(-1758) + // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 + __m128i trail = + __lsx_vbitsel_v(shift, swap, __lsx_vldi(-1758 /*0000FF00*/)); + // Insert low 2 bits of trail surrogate to magic number for later + // 11011100 00000000 11100111 110000cc + __m128i magic_with_low_2 = __lsx_vor_v(__lsx_vsrli_w(shift, 30), magic); + + // Generate lead surrogate + // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx + // 000000cc ccdddddd|xxxxxxxx xxxxxxxx + __m128i lead = __lsx_vbitsel_v( + __lsx_vsrli_h(__lsx_vand_v(shift, __lsx_vldi(0x3F)), 4), swap, + __lsx_vrepli_h(0x3f /* 0x003f*/)); + + // Blend pairs + // __lsx_vldi(-1741) => vec(0x0000FFFF) + // 000000cc ccdddddd|11110aaa bbbbbb00 + __m128i blend = + __lsx_vbitsel_v(lead, trail, __lsx_vldi(-1741) /* (0x0000FFFF)*4 */); + + // Add magic number to finish the result + // 110111CC CCDDDDDD|110110AA BBBBBBCC + __m128i composed = __lsx_vadd_h(blend, magic_with_low_2); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + // __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + __lsx_vst(composed, reinterpret_cast(buffer), 0); + std::memcpy(utf16_output, buffer, 12); + utf16_output += 6; // We 3 32-bit surrogate pairs. + return 12; // We consumed 12 bytes. + } + // 3 1-4 byte sequences + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 3 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // added to fix issue https://github.com/simdutf/simdutf/issues/514 + // We only want to write 2 * 16-bit code units when that is actually what we + // have. Unfortunately, we cannot trust the input. So it is possible to get + // 0xff as an input byte and it should not result in a surrogate pair. We + // need to check for that. + uint32_t permbuffer[4]; + __lsx_vst(perm, permbuffer, 0); + // Mask the low and middle bytes + // 00000000 00000000 00000000 0ddddddd + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7f)); + // Because the surrogates need more work, the high surrogate is computed + // first. + __m128i middlehigh = __lsx_vslli_w(perm, 2); + // 00000000 00000000 00cccccc 00000000 + __m128i middlebyte = __lsx_vand_v(perm, __lsx_vldi(-3777) /* 0x00003F00 */); + // Start assembling the sequence. Since the 4th byte is in the same position + // as it would be in a surrogate and there is no dependency, shift left + // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: + // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx + __m128i ab = + __lsx_vbitsel_v(middlehigh, perm, __lsx_vldi(-1656) /*0xFF000000*/); + // Top 16 bits contains the high ten bits of the surrogate pair before + // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa + // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction + __m128i v_fffc0000 = __lsx_vreplgr2vr_w(uint32_t(0xFFFC0000)); + __m128i abc = __lsx_vbitsel_v(__lsx_vslli_w(middlebyte, 4), ab, v_fffc0000); + // Combine the low 6 or 7 bits by a shift right accumulate + // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct + // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o + // correction + __m128i composed = __lsx_vor_v(ascii, __lsx_vsrli_w(abc, 6)); + // After this is for surrogates + // Blend the low and high surrogates + // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd + __m128i mixed = + __lsx_vbitsel_v(abc, composed, __lsx_vldi(-1741) /*0x0000FFFF*/); + // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits + // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: + // 11110aaa bbbbbbcc|000000cc ccdddddd + __m128i v_ffff03ff = __lsx_vreplgr2vr_w(uint32_t(0xFFFF03FF)); + __m128i masked_pair = __lsx_vand_v(mixed, v_ffff03ff); + // Correct the remaining UTF-8 prefix, surrogate offset, and add the + // surrogate prefixes in one magic 16-bit addition. similar magic number but + // without the continue byte adjust and halfword swapped UTF-8 4b prefix = + // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) + // surrogate high = +0xD800|0x0000 + // surrogate low = +0x0000|0xDC00 + // ----------------------------------- + // = +0xE7C0|0xDC00 + __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xE7C0DC00)); + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete + __m128i surrogates = __lsx_vadd_w(masked_pair, magic); + // If the high bit is 1 (s32 less than zero), this needs a surrogate pair + __m128i is_pair = __lsx_vslt_w(perm, zero); + // Select either the 4 byte surrogate pair or the 2 byte solo codepoint + // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD + __m128i selected = __lsx_vbitsel_v(composed, surrogates, is_pair); + // Byte swap if necessary + if (!match_system(big_endian)) { + selected = lsx_swap_bytes(selected); + } + // Attempting to shuffle and store would be complex, just scalarize. + uint32_t buffer_tmp[4]; + __lsx_vst(selected, buffer_tmp, 0); + // Test for the top bit of the surrogate mask. Remove due to issue 514 + // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : + // 0x00800000; + for (size_t i = 0; i < 3; i++) { + // Surrogate + // Used to be if (buffer[i] & SURROGATE_MASK) { + // See discussion above. + // patch for issue https://github.com/simdutf/simdutf/issues/514 + if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { + utf16_output[0] = uint16_t(buffer_tmp[i] >> 16); + utf16_output[1] = uint16_t(buffer_tmp[i] & 0xFFFF); + utf16_output += 2; + } else { + utf16_output[0] = uint16_t(buffer_tmp[i] & 0xFFFF); + utf16_output++; + } } + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; } - buf[sizeof(simd8x64)] = '\0'; - return buf; } +/* end file src/lsx/lsx_convert_utf8_to_utf16.cpp */ +/* begin file src/lsx/lsx_convert_utf8_to_utf32.cpp */ +// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the +// end of the code points. Only the least significant 12 bits of the mask +// are accessed. +// It returns how many bytes were consumed (up to 12). +size_t convert_masked_utf8_to_utf32(const char *input, + uint64_t utf8_end_of_code_point_mask, + char32_t *&utf32_out) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + uint32_t *&utf32_output = reinterpret_cast(utf32_out); + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xFFF; + // + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + // + // We first try a few fast paths. + if ((utf8_end_of_code_point_mask & 0xffff) == 0xffff) { + // We process in chunks of 16 bytes. + // use fast implementation in src/simdutf/arm64/simd.h + // Ideally the compiler can keep the tables in registers. + simd8 temp{in}; + temp.store_ascii_as_utf32_tbl(utf32_out); + utf32_output += 16; // We wrote 16 32-bit characters. + return 16; // We consumed 16 bytes. + } + __m128i zero = __lsx_vldi(0); + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte + // UTF-32 code units. Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_3_byte_to_utf16(in); + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); -simdutf_unused static char *format_mask(uint64_t mask) { - static char *buf = reinterpret_cast(malloc(64 + 1)); - for (size_t i = 0; i < 64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + utf32_output += 4; // We wrote 4 32-bit characters. + return 12; // We consumed 12 bytes. } - buf[64] = '\0'; - return buf; -} + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if (input_utf8_end_of_code_point_mask == 0xaaa) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte + // UTF-32 code units. Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_2_byte_to_utf16(in); -template -simdutf_really_inline -buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) - : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, - idx{0} {} + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); -template -simdutf_really_inline size_t buf_block_reader::block_index() { - return idx; + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); + utf32_output += 6; + return 12; // We consumed 12 bytes. + } + /// Either no fast path or an unimportant fast path. + + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + + if (idx < 64) { + // SIX (6) input code-code units + // Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); + utf32_output += 6; + return consumed; + } else if (idx < 145) { + // FOUR (4) input code-code units + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // Shuffle + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // Split + // 00000000 00000000 0ccccccc + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); // 6 or 7 bits + // Note: unmasked + // xxxxxxxx aaaaxxxx xxxxxxxx + __m128i high = + __lsx_vsrli_w(__lsx_vand_v(perm, __lsx_vldi(0xf)), 4); // 4 bits + // Use 16 bit bic instead of and. + // The top bits will be corrected later in the bsl + // 00000000 10bbbbbb 00000000 + __m128i middle = + __lsx_vand_v(perm, __lsx_vldi(-1758 /*0x0000FF00*/)); // 5 or 6 bits + // Combine low and middle with shift right accumulate + // 00000000 00xxbbbb bbcccccc + __m128i lowmid = __lsx_vor_v(ascii, __lsx_vsrli_w(middle, 2)); + // Insert top 4 bits from high byte with bitwise select + // 00000000 aaaabbbb bbcccccc + __m128i composed = + __lsx_vbitsel_v(lowmid, high, __lsx_vldi(-3600 /*0x0000F000*/)); + __lsx_vst(composed, utf32_output, 0); + utf32_output += 4; // We wrote 4 32-bit characters. + return consumed; + } else if (idx < 209) { + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-32 code units. This uses the same method as the fixed 3 byte + // version, reversing and shift left insert. However, there is no need for + // a shuffle mask now, just rev16 and rev32. + // + // This version does not use the LUT, but 4 byte sequences are less common + // and the overhead of the extra memory access is less important than the + // early branch overhead in shorter sequences, so it comes last. + + // Swap pairs of bytes + // 10dddddd|10cccccc|10bbbbbb|11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + __m128i swap = lsx_swap_bytes(in); + // Shift left and insert + // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb + __m128i merge1 = __lsx_vbitsel_v(__lsx_vsrli_h(swap, 2), swap, + __lsx_vrepli_h(0x3f /*0x003F*/)); + // Shift insert again + // xxxxxxxx xxxaaabb bbbbcccc ccdddddd + __m128i merge2 = + __lsx_vbitsel_v(__lsx_vslli_w(merge1, 12), /* merge1 << 12 */ + __lsx_vsrli_w(merge1, 16), /* merge1 >> 16 */ + __lsx_vldi(-2545)); /*0x00000FFF*/ + // Clear the garbage + // 00000000 000aaabb bbbbcccc ccdddddd + __m128i composed = __lsx_vand_v(merge2, __lsx_vldi(-2273 /*0x1FFFFF*/)); + // Store + __lsx_vst(composed, utf32_output, 0); + utf32_output += 3; // We wrote 3 32-bit characters. + return 12; // We consumed 12 bytes. + } + // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit + // due to surrogates no longer being involved. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 2 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + + // Ascii + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); + __m128i middle = __lsx_vand_v(perm, __lsx_vldi(-3777 /*0x00003f00*/)); + // 00000000 00000000 0000cccc ccdddddd + __m128i cd = + __lsx_vbitsel_v(__lsx_vsrli_w(middle, 2), ascii, __lsx_vrepli_w(0x3f)); + + __m128i correction = __lsx_vand_v(perm, __lsx_vldi(-3520 /*0x00400000*/)); + __m128i corrected = __lsx_vadd_b(perm, __lsx_vsrli_w(correction, 1)); + // Insert twice + // 00000000 000aaabb bbbbxxxx xxxxxxxx + __m128i corrected_srli2 = + __lsx_vsrli_w(__lsx_vand_v(corrected, __lsx_vrepli_b(0x7)), 2); + __m128i ab = + __lsx_vbitsel_v(corrected_srli2, corrected, __lsx_vrepli_h(0x3f)); + ab = __lsx_vsrli_w(ab, 4); + // 00000000 000aaabb bbbbcccc ccdddddd + __m128i composed = + __lsx_vbitsel_v(ab, cd, __lsx_vldi(-2545 /*0x00000FFF*/)); + // Store + __lsx_vst(composed, utf32_output, 0); + utf32_output += 3; // We wrote 3 32-bit characters. + return consumed; + } else { + // here we know that there is an error but we do not handle errors + return 12; + } } +/* end file src/lsx/lsx_convert_utf8_to_utf32.cpp */ +/* begin file src/lsx/lsx_convert_utf8_to_latin1.cpp */ +size_t convert_masked_utf8_to_latin1(const char *input, + uint64_t utf8_end_of_code_point_mask, + char *&latin1_output) { + // we use an approach where we try to process up to 12 input bytes. + // Why 12 input bytes and not 16? Because we are concerned with the size of + // the lookup tables. Also 12 is nicely divisible by two and three. + // + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; + // Optimization note: our main path below is load-latency dependent. Thus it + // is maybe beneficial to have fast paths that depend on branch prediction but + // have less latency. This results in more instructions but, potentially, also + // higher speeds. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { + // We process in chunks of 16 bytes + __lsx_vst(in, reinterpret_cast(latin1_output), 0); + latin1_output += 16; // We wrote 16 18-bit characters. + return 16; // We consumed 16 bytes. + } + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + // this indicates an invalid input: + if (idx >= 64) { + return consumed; + } + // Here we should have (idx < 64), if not, there is a bug in the validation or + // elsewhere. SIX (6) input code-code units this is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 + // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy + // scenario we process SIX (6) input code-code units. The max length in bytes + // of six code code units spanning between 1 and 2 bytes each is 12 bytes. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); + // ascii mask + // 1 byte: 11111111 11111111 + // 2 byte: 00000000 00000000 + __m128i ascii_mask = __lsx_vslt_bu(perm, __lsx_vldi(0x80)); + // utf8 mask + // 1 byte: 00000000 00000000 + // 2 byte: 00111111 00111111 + __m128i utf8_mask = __lsx_vand_v(__lsx_vsle_bu(__lsx_vldi(0x80), perm), + __lsx_vldi(0b00111111)); + // mask + // 1 byte: 11111111 11111111 + // 2 byte: 00111111 00111111 + __m128i mask = __lsx_vor_v(utf8_mask, ascii_mask); + + __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), perm, mask); + // writing 8 bytes even though we only care about the first 6 bytes. + __m128i latin1_packed = __lsx_vpickev_b(__lsx_vldi(0), composed); -template -simdutf_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; + uint64_t buffer[2]; + // __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + __lsx_vst(latin1_packed, reinterpret_cast(buffer), 0); + std::memcpy(latin1_output, buffer, 6); + latin1_output += 6; // We wrote 6 bytes. + return consumed; } +/* end file src/lsx/lsx_convert_utf8_to_latin1.cpp */ -template -simdutf_really_inline const uint8_t * -buf_block_reader::full_block() const { - return &buf[idx]; +/* begin file src/lsx/lsx_convert_utf16_to_latin1.cpp */ +template +std::pair +lsx_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *end = buf + len; + while (buf + 16 <= end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + if (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + in1 = lsx_swap_bytes(in1); + } + if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vpickev_b(in1, in); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); } -template -simdutf_really_inline size_t -buf_block_reader::get_remainder(uint8_t *dst) const { - if (len == idx) { - return 0; - } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, - STEP_SIZE); // std::memset STEP_SIZE because it is more efficient - // to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; +template +std::pair +lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { + const char16_t *start = buf; + const char16_t *end = buf + len; + while (buf + 16 <= end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + if (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + in1 = lsx_swap_bytes(in1); + } + if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vpickev_b(in1, in); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + // Let us do a scalar fallback. + for (int k = 0; k < 16; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } + } + } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); } +/* end file src/lsx/lsx_convert_utf16_to_latin1.cpp */ +/* begin file src/lsx/lsx_convert_utf16_to_utf8.cpp */ +/* + The vectorized algorithm works on single SSE register i.e., it + loads eight 16-bit code units. -template -simdutf_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} + We consider three cases: + 1. an input register contains no surrogates and each value + is in range 0x0000 .. 0x07ff. + 2. an input register contains no surrogates and values are + is in range 0x0000 .. 0xffff. + 3. an input register contains surrogates --- i.e. codepoints + can have 16 or 32 bits. -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/buf_block_reader.h */ -/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_validation { + Ad 1. -using namespace simd; + When values are less than 0x0800, it means that a 16-bit code unit + can be converted into: 1) single UTF8 byte (when it's an ASCII + char) or 2) two UTF8 bytes. -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + For this case we do only some shuffle to obtain these 2-byte + codes and finally compress the whole SSE register with a single + shuffle. - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, + Ad 2. - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, + When values fit in 16-bit code units, but are above 0x07ff, then + a single word may produce one, two or three UTF8 bytes. - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + We prepare data for all these three cases in two registers. + The first register contains lower two UTF8 bytes (used in all + cases), while the second one contains just the third byte for + the three-UTF8-bytes case. - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} + Finally these two registers are interleaved forming eight-element + array of 32-bit values. The array spans two SSE registers. + The bytes from the registers are compressed using two shuffles. -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = {255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 0b11110000u - 1, - 0b11100000u - 1, - 0b11000000u - 1}; - const simd8 max_value( - &max_array[sizeof(max_array) - sizeof(simd8)]); - return input.gt_bits(max_value); -} + We need 256-entry lookup table to get a compression pattern + and the number of output bytes in the compressed vector register. + Each entry occupies 17 bytes. -struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - simd8 prev_incomplete; - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } + To summarize: + - We need two 256-entry tables that have 8704 bytes in total. +*/ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ +template +std::pair +lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *end = buf + len; - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error |= this->prev_incomplete; - } + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 - simdutf_really_inline void check_next_input(const simd8x64 &input) { - if (simdutf_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); + while (buf + 16 + safety_margin <= end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + if (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + } + if (__lsx_bz_v( + __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + if (!match_system(big_endian)) { + nextin = lsx_swap_bytes(nextin); + } + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(nextin, in); + // 2. store (16 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } else { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(in, in); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; } - this->prev_incomplete = - is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; } - } - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + // t0 = [000a|aaaa|bbbb|bb00] + __m128i t0 = __lsx_vslli_h(in, 2); + // t1 = [000a|aaaa|0000|0000] + __m128i t1 = __lsx_vand_v(t0, __lsx_vldi(-2785 /*0x1f00*/)); + // t2 = [0000|0000|00bb|bbbb] + __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + __m128i t3 = __lsx_vor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); + __m128i t4 = __lsx_vor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = + __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + // 4. pack the bytes + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + // 5. store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } + __m128i surrogates_bytemask = + __lsx_vseq_h(__lsx_vand_v(in, __lsx_vldi(-2568 /*0xF800*/)), + __lsx_vldi(-2600 /*0xD800*/)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lsx_bz_v(surrogates_bytemask)) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes -}; // struct utf8_checker -} // namespace utf8_validation + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. -using utf8_validation::utf8_checker; + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -/* begin file src/generic/utf8_validation/utf8_validator.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_validation { + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return !c.errors(); -} + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m128i t0 = __lsx_vpickev_b(in, in); + t0 = __lsx_vilvl_b(t0, t0); -bool generic_validate_utf8(const char *input, size_t length) { - return generic_validate_utf8( - reinterpret_cast(input), length); -} + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m128i t2 = __lsx_vor_v(t1, __lsx_vldi(-2688 /*0x8000*/)); -/** - * Validates that the string is actual UTF-8 and stops on errors. - */ -template -result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input + count), length - count); - res.count += count; - return res; - } - reader.advance(); - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input) + count, length - count); - res.count += count; - return res; - } else { - return result(error_code::SUCCESS, length); - } -} + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m128i s0 = __lsx_vsrli_h(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m128i s1 = __lsx_vslli_h(in, 2); + // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] + s1 = __lsx_vand_v(s1, __lsx_vldi(-2753 /*0x3F00*/)); -result generic_validate_utf8_with_errors(const char *input, size_t length) { - return generic_validate_utf8_with_errors( - reinterpret_cast(input), length); + // [00bb|bbbb|0000|aaaa] + __m128i s2 = __lsx_vor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); + __m128i m0 = __lsx_vandn_v(one_or_two_bytes_bytemask, + __lsx_vldi(-2752 /*0x4000*/)); + __m128i s4 = __lsx_vxor_v(s3, m0); + + // 4. expand code units 16-bit => 32-bit + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); + + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); + + __m128i one_or_two_bytes_bytemask_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + + __m128i one_byte_bytemask_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, + one_byte_bytemask_low)), + 0); + const uint32_t mask1 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, + one_byte_bytemask_high)), + 0); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } + } + buf += k; + } + } // while + return std::make_pair(buf, reinterpret_cast(utf8_output)); } -template -bool generic_validate_ascii(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); - uint8_t blocks[64]{}; - simd::simd8x64 running_or(blocks); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - running_or |= in; - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - running_or |= in; - return running_or.is_ascii(); -} +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char16_t *start = buf; + const char16_t *end = buf + len; -bool generic_validate_ascii(const char *input, size_t length) { - return generic_validate_ascii( - reinterpret_cast(input), length); -} + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + while (buf + 16 + safety_margin <= end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + if (!match_system(big_endian)) { + in = lsx_swap_bytes(in); + } + if (__lsx_bz_v( + __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! + // It is common enough that we have sequences of 16 consecutive ASCII + // characters. + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + if (!match_system(big_endian)) { + nextin = lsx_swap_bytes(nextin); + } + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(nextin, in); + // 2. store (16 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! + } else { + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(in, in); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + in = nextin; + } + } -template -result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); + __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + // t0 = [000a|aaaa|bbbb|bb00] + __m128i t0 = __lsx_vslli_h(in, 2); + // t1 = [000a|aaaa|0000|0000] + __m128i t1 = __lsx_vand_v(t0, __lsx_vldi(-2785 /*0x1f00*/)); + // t2 = [0000|0000|00bb|bbbb] + __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + __m128i t3 = __lsx_vor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); + __m128i t4 = __lsx_vor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = + __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + // 4. pack the bytes + const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + // 5. store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; } - reader.advance(); + __m128i surrogates_bytemask = + __lsx_vseq_h(__lsx_vand_v(in, __lsx_vldi(-2568 /*0xF800*/)), + __lsx_vldi(-2600 /*0xD800*/)); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lsx_bz_v(surrogates_bytemask)) { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } else { - return result(error_code::SUCCESS, length); - } -} + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. -result generic_validate_ascii_with_errors(const char *input, size_t length) { - return generic_validate_ascii_with_errors( - reinterpret_cast(input), length); -} + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. -} // namespace utf8_validation -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -// transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_utf16 { -using namespace simd; + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m128i t0 = __lsx_vpickev_b(in, in); + t0 = __lsx_vilvl_b(t0, t0); -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m128i t2 = __lsx_vor_v(t1, __lsx_vldi(-2688)); - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m128i s0 = __lsx_vsrli_h(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m128i s1 = __lsx_vslli_h(in, 2); + // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] + s1 = __lsx_vand_v(s1, __lsx_vldi(-2753 /*0x3F00*/)); - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, + // [00bb|bbbb|0000|aaaa] + __m128i s2 = __lsx_vor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); + __m128i m0 = __lsx_vandn_v(one_or_two_bytes_bytemask, + __lsx_vldi(-2752 /*0x4000*/)); + __m128i s4 = __lsx_vxor_v(s3, m0); - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, + // 4. expand code units 16-bit => 32-bit + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); + + __m128i one_or_two_bytes_bytemask_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + + __m128i one_byte_bytemask_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, + one_byte_bytemask_low)), + 0); + const uint32_t mask1 = __lsx_vpickve2gr_bu( + __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, + one_byte_bytemask_high)), + 0); - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; - template - simdutf_really_inline size_t convert(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = scalar::utf8_to_utf16::convert( - in + pos, size - pos, utf16_output); - if (howmany == 0) { - return 0; + for (; k < forward; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xF800) != 0xD800) { + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf8_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf8_output++ = char((value >> 18) | 0b11110000); + *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((value & 0b111111) | 0b10000000); + } } - utf16_output += howmany; + buf += k; } - return utf16_output - start; - } + } // while - template - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} +/* end file src/lsx/lsx_convert_utf16_to_utf8.cpp */ +/* begin file src/lsx/lsx_convert_utf16_to_utf32.cpp */ +template +std::pair +lsx_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *end = buf + len; + + __m128i zero = __lsx_vldi(0); + __m128i v_f800 = __lsx_vldi(-2568); /*0xF800*/ + __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ + + while (buf + 8 <= end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + if (!match_system(big_endian)) { + in = lsx_swap_bytes(in); } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; + + __m128i surrogates_bytemask = + __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); + // It might seem like checking for surrogates_bitmask == 0xc000 could help. + // However, it is likely an uncommon occurrence. + if (__lsx_bz_v(surrogates_bytemask)) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); + __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. } + buf += k; } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; + } // while + return std::make_pair(buf, reinterpret_cast(utf32_output)); +} + +/* + Returns a pair: a result struct and utf8_output. + If there is an error, the count field of the result is the position of the + error. Otherwise, it is the position of the first unprocessed byte in buf + (even if finished). A scalar routing should carry on the conversion of the + tail if needed. +*/ +template +std::pair +lsx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); + const char16_t *start = buf; + const char16_t *end = buf + len; + + __m128i zero = __lsx_vldi(0); + __m128i v_f800 = __lsx_vldi(-2568); /*0xF800*/ + __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ + + while (buf + 8 <= end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + if (!match_system(big_endian)) { + in = lsx_swap_bytes(in); } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf16_output += res.count; + + __m128i surrogates_bytemask = + __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); + if (__lsx_bz_v(surrogates_bytemask)) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); + __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); + utf32_output += 8; + buf += 8; + // surrogate pair(s) in a register + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + } else { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; + k++; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k - 1), + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + } } + buf += k; } - return result(error_code::SUCCESS, utf16_output - start); - } + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf32_output)); +} +/* end file src/lsx/lsx_convert_utf16_to_utf32.cpp */ - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } +/* begin file src/lsx/lsx_convert_utf32_to_latin1.cpp */ +std::pair +lsx_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; + __m128i v_ff = __lsx_vrepli_w(0xFF); -}; // struct utf8_checker -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ + while (buf + 16 <= end) { + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_utf16 { + __m128i in12 = __lsx_vor_v(in1, in2); + if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 8; + latin1_output += 8; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); + } + } // while + return std::make_pair(buf, latin1_output); +} -using namespace simd; +std::pair +lsx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *start = buf; + const char32_t *end = buf + len; -template -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char16_t *utf16_output) noexcept { - // The implementation is not specific to haswell and should be moved to the - // generic directory. - size_t pos = 0; - char16_t *start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - // this loop could be unrolled further. For example, we could process the - // mask far more than 64 bytes. - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; + const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; + __m128i v_ff = __lsx_vrepli_w(0xFF); + + while (buf + 16 <= end) { + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); + + __m128i in12 = __lsx_vor_v(in1, in2); + + if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 8; + latin1_output += 8; } else { - // Slow path. We hope that the compiler will recognize that this is a slow - // path. Anything that is not a continuation mask is a 'leading byte', - // that is, the start of a new code point. - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - // The *start* of code points is not so useful, rather, we want the *end* - // of code points. - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times when using solely - // the slow/regular path, and at least four times if there are fast paths. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - // - // Thus we may allow convert_masked_utf8_to_utf16 to process - // more bytes at a time under a fast-path mode where 16 bytes - // are consumed at once (e.g., when encountering ASCII). - size_t consumed = convert_masked_utf8_to_utf16( - input + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; + // Let us do a scalar fallback. + for (int k = 0; k < 8; k++) { + uint32_t word = buf[k]; + if (word <= 0xff) { + *latin1_output++ = char(word); + } else { + return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), + latin1_output); + } } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. } - } - utf16_output += scalar::utf8_to_utf16::convert_valid( - input + pos, size - pos, utf16_output); - return utf16_output - start; + } // while + return std::make_pair(result(error_code::SUCCESS, buf - start), + latin1_output); } +/* end file src/lsx/lsx_convert_utf32_to_latin1.cpp */ +/* begin file src/lsx/lsx_convert_utf32_to_utf8.cpp */ +std::pair +lsx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *end = buf + len; -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -// transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ + __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xC080)); + __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7FF)); + __m128i v_dfff = __lsx_vreplgr2vr_h(uint16_t(0xDFFF)); + __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ + __m128i forbidden_bytemask = __lsx_vldi(0x0); -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_utf32 { -using namespace simd; + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + while (buf + 16 + safety_margin < end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, + // Check if no bits set above 16th + if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) + __m128i utf16_packed = __lsx_vpickev_h(nextin, in); - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + continue; // we are done for this round! + } + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, + // t0 = [000a|aaaa|bbbb|bb00] + const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const __m128i t1 = __lsx_vand_v(t0, __lsx_vldi(-2785 /*0x1f00*/)); + // t2 = [0000|0000|00bb|bbbb] + const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + const __m128i t3 = __lsx_vor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m128i t4 = __lsx_vor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = + __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + uint32_t m2 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + // 5. store bytes + __lsx_vst(utf8_packed, utf8_output, 0); - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single + UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three + UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); + t0 = __lsx_vilvl_b(t0, t0); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m128i t2 = __lsx_vor_v(t1, __lsx_vldi(-2688 /*0x8000*/)); - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m128i s1 = __lsx_vslli_h(utf16_packed, 2); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + s1 = __lsx_vand_v(s1, __lsx_vldi(-2753 /*0x3F00*/)); + // [00bb|bbbb|0000|aaaa] + __m128i s2 = __lsx_vor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + // __m128i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); + __m128i m0 = __lsx_vandn_v(one_or_two_bytes_bytemask, + __lsx_vldi(-2752 /*0x4000*/)); + __m128i s4 = __lsx_vxor_v(s3, m0); -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; + // 4. expand code units 16-bit => 32-bit + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); + + __m128i one_or_two_bytes_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + + __m128i one_byte_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)), + 0); + const uint32_t mask1 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)), + 0); - simdutf_really_inline size_t convert(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // we have an error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); + + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + buf += 8; } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); - if (howmany == 0) { - return 0; + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. + } else { + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } } - utf32_output += howmany; + buf += k; } - return utf32_output - start; + } // while + + // check for invalid input + if (__lsx_bnz_v(forbidden_bytemask)) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); } + return std::make_pair(buf, reinterpret_cast(utf8_output)); +} - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; +std::pair +lsx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const char32_t *start = buf; + const char32_t *end = buf + len; + + __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xC080)); + __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7FF)); + __m128i v_dfff = __lsx_vreplgr2vr_h(uint16_t(0xDFFF)); + __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ + __m128i forbidden_bytemask = __lsx_vldi(0x0); + const size_t safety_margin = + 12; // to avoid overruns, see issue + // https://github.com/simdutf/simdutf/issues/92 + + while (buf + 16 + safety_margin < end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); + + // Check if no bits set above 16th + if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) + __m128i utf16_packed = __lsx_vpickev_h(nextin, in); + + if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); + // 2. store (8 bytes) + __lsx_vst(utf8_packed, utf8_output, 0); + // 3. adjust pointers + buf += 8; + utf8_output += 8; + continue; // we are done for this round! + } + __m128i zero = __lsx_vldi(0); + if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 + + // t0 = [000a|aaaa|bbbb|bb00] + const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const __m128i t1 = __lsx_vand_v(t0, __lsx_vldi(-2785 /*0x1f00*/)); + // t2 = [0000|0000|00bb|bbbb] + const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + const __m128i t3 = __lsx_vor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m128i t4 = __lsx_vor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); + __m128i utf8_unpacked = + __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + uint32_t m2 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); + // 4. pack the bytes + const uint8_t *row = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lsx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle = __lsx_vld(row, 1); + __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); + // 5. store bytes + __lsx_vst(utf8_packed, utf8_output, 0); + + // 6. adjust pointers + buf += 8; + utf8_output += row[0]; + continue; } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + if (__lsx_bnz_v(forbidden_bytemask)) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf8_output)); } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - if (pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf32_output += res.count; - } - } - return result(error_code::SUCCESS, utf32_output - start); - } + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single + UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two + UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three + UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- precompute + either byte 1 for case #2 or byte 2 for case #3. Note that they + differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, taking + into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); + t0 = __lsx_vilvl_b(t0, t0); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); + __m128i t1 = __lsx_vand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m128i t2 = __lsx_vor_v(t1, __lsx_vldi(-2688 /*0x8000*/)); - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m128i s1 = __lsx_vslli_h(utf16_packed, 2); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + s1 = __lsx_vand_v(s1, __lsx_vldi(-2753 /*0x3F00*/)); + // [00bb|bbbb|0000|aaaa] + __m128i s2 = __lsx_vor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); + __m128i s3 = __lsx_vor_v(s2, v_c0e0); + // __m128i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); + __m128i m0 = __lsx_vandn_v(one_or_two_bytes_bytemask, + __lsx_vldi(-2752 /*0x4000*/)); + __m128i s4 = __lsx_vxor_v(s3, m0); -}; // struct utf8_checker -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ + // 4. expand code units 16-bit => 32-bit + __m128i out0 = __lsx_vilvl_h(s4, t2); + __m128i out1 = __lsx_vilvh_h(s4, t2); -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_utf32 { + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m128i one_byte_bytemask = + __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); + + __m128i one_or_two_bytes_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); + __m128i one_or_two_bytes_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); + + __m128i one_byte_bytemask_u16_to_u32_low = + __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); + __m128i one_byte_bytemask_u16_to_u32_high = + __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); + + const uint32_t mask0 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)), + 0); + const uint32_t mask1 = + __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( + one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)), + 0); -using namespace simd; + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char32_t *utf32_output) noexcept { - size_t pos = 0; - char32_t *start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); + + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; + + buf += 8; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. } else { - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - size_t max_starting_point = (pos + 64) - 12; - while (pos < max_starting_point) { - size_t consumed = convert_masked_utf8_to_utf32( - input + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. + size_t forward = 15; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } } + buf += k; } - } - utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, - utf32_output); - return utf32_output - start; + } // while + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); } +/* end file src/lsx/lsx_convert_utf32_to_utf8.cpp */ +/* begin file src/lsx/lsx_convert_utf32_to_utf16.cpp */ +template +std::pair +lsx_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *end = buf + len; -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -// other functions -/* begin file src/generic/utf16.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf16 { + __m128i forbidden_bytemask = __lsx_vrepli_h(0); + __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ + __m128i v_dfff = __lsx_vreplgr2vr_h(uint16_t(0xdfff)); + while (buf + 8 <= end) { + __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); -template -simdutf_really_inline size_t count_code_points(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); - count += count_ones(not_pair) / 2; - } - return count + - scalar::utf16::count_code_points(in + pos, size - pos); -} + // Check if no bits set above 16th + if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { + __m128i utf16_packed = __lsx_vpickev_h(in1, in0); + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); -template -simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); + if (!match_system(big_endian)) { + utf16_packed = lsx_swap_bytes(utf16_packed); + } + __lsx_vst(utf16_packed, utf16_output, 0); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 3; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; } - uint64_t ascii_mask = input.lteq(0x7F); - uint64_t twobyte_mask = input.lteq(0x7FF); - uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); + } - size_t ascii_count = count_ones(ascii_mask) / 2; - size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; - size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; - size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; - count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + - ascii_count; + // check for invalid input + if (__lsx_bnz_v(forbidden_bytemask)) { + return std::make_pair(nullptr, reinterpret_cast(utf16_output)); } - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); + return std::make_pair(buf, reinterpret_cast(utf16_output)); } template -simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, - size_t size) { - return count_code_points(in, size); -} +std::pair +lsx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); + const char32_t *start = buf; + const char32_t *end = buf + len; -simdutf_really_inline void -change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { - size_t pos = 0; + __m128i forbidden_bytemask = __lsx_vrepli_h(0); + __m128i v_d800 = __lsx_vldi(-2600); /*0xD800*/ + __m128i v_dfff = __lsx_vreplgr2vr_h(uint16_t(0xdfff)); - while (pos < size / 32 * 32) { - simd16x32 input(reinterpret_cast(in + pos)); - input.swap_bytes(); - input.store(reinterpret_cast(output)); - pos += 32; - output += 32; + while (buf + 8 <= end) { + __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); + // Check if no bits set above 16th + if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { + __m128i utf16_packed = __lsx_vpickev_h(in1, in0); + + forbidden_bytemask = __lsx_vor_v( + __lsx_vand_v( + __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + if (__lsx_bnz_v(forbidden_bytemask)) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf16_output)); + } + + if (!match_system(big_endian)) { + utf16_packed = lsx_swap_bytes(utf16_packed); + } + + __lsx_vst(utf16_packed, utf16_output, 0); + utf16_output += 8; + buf += 8; + } else { + size_t forward = 3; + size_t k = 0; + if (size_t(end - buf) < forward + 1) { + forward = size_t(end - buf - 1); + } + for (; k < forward; k++) { + uint32_t word = buf[k]; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair( + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair( + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } + buf += k; + } } - scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf16_output)); } +/* end file src/lsx/lsx_convert_utf32_to_utf16.cpp */ +/* begin file src/lsx/lsx_base64.cpp */ +/** + * References and further reading: + * + * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the + * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. + * https://arxiv.org/abs/1910.05109 + * + * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 + * Instructions, ACM Transactions on the Web 12 (3), 2018. + * https://arxiv.org/abs/1704.00605 + * + * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. + * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, + * Request for Comments: 4648. + * + * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. + * http://www.alfredklomp.com/programming/sse-base64/. (2014). + * + * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD + * acceleration. https://github.com/aklomp/base64. (2014). + * + * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). + * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ + * + * Nick Kopp. 2013. Base64 Encoding on a GPU. + * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). + */ -} // namespace utf16 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf16.h */ -/* begin file src/generic/utf8.h */ +template +size_t encode_base64(char *dst, const char *src, size_t srclen, + base64_options options) { + // credit: Wojciech Muła + // SSE (lookup: pshufb improved unrolled) + const uint8_t *input = (const uint8_t *)src; + static const char *lookup_tbl = + isbase64url + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + uint8_t *out = (uint8_t *)dst; -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8 { + v16u8 shuf; + __m128i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, + base64_tbl2, base64_tbl3; + if (srclen >= 16) { + shuf = v16u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; + v_fc0fc00 = __lsx_vreplgr2vr_w(uint32_t(0x0fc0fc00)); + v_3f03f0 = __lsx_vreplgr2vr_w(uint32_t(0x003f03f0)); + shift_r = __lsx_vreplgr2vr_w(uint32_t(0x0006000a)); + shift_l = __lsx_vreplgr2vr_w(uint32_t(0x00080004)); + base64_tbl0 = __lsx_vld(lookup_tbl, 0); + base64_tbl1 = __lsx_vld(lookup_tbl, 16); + base64_tbl2 = __lsx_vld(lookup_tbl, 32); + base64_tbl3 = __lsx_vld(lookup_tbl, 48); + } -using namespace simd; + size_t i = 0; + for (; i + 52 <= srclen; i += 48) { + __m128i in0 = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 0); + __m128i in1 = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); + __m128i in2 = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 2); + __m128i in3 = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 3); + + in0 = __lsx_vshuf_b(in0, in0, (__m128i)shuf); + in1 = __lsx_vshuf_b(in1, in1, (__m128i)shuf); + in2 = __lsx_vshuf_b(in2, in2, (__m128i)shuf); + in3 = __lsx_vshuf_b(in3, in3, (__m128i)shuf); + + __m128i t0_0 = __lsx_vand_v(in0, v_fc0fc00); + __m128i t0_1 = __lsx_vand_v(in1, v_fc0fc00); + __m128i t0_2 = __lsx_vand_v(in2, v_fc0fc00); + __m128i t0_3 = __lsx_vand_v(in3, v_fc0fc00); + + __m128i t1_0 = __lsx_vsrl_h(t0_0, shift_r); + __m128i t1_1 = __lsx_vsrl_h(t0_1, shift_r); + __m128i t1_2 = __lsx_vsrl_h(t0_2, shift_r); + __m128i t1_3 = __lsx_vsrl_h(t0_3, shift_r); + + __m128i t2_0 = __lsx_vand_v(in0, v_3f03f0); + __m128i t2_1 = __lsx_vand_v(in1, v_3f03f0); + __m128i t2_2 = __lsx_vand_v(in2, v_3f03f0); + __m128i t2_3 = __lsx_vand_v(in3, v_3f03f0); + + __m128i t3_0 = __lsx_vsll_h(t2_0, shift_l); + __m128i t3_1 = __lsx_vsll_h(t2_1, shift_l); + __m128i t3_2 = __lsx_vsll_h(t2_2, shift_l); + __m128i t3_3 = __lsx_vsll_h(t2_3, shift_l); + + __m128i input0 = __lsx_vor_v(t1_0, t3_0); + __m128i input0_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input0); + __m128i input0_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input0, __lsx_vldi(32))); + __m128i input0_mask = __lsx_vslei_bu(input0, 31); + __m128i input0_result = + __lsx_vbitsel_v(input0_shuf1, input0_shuf0, input0_mask); + __lsx_vst(input0_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); + __m128i input1 = __lsx_vor_v(t1_1, t3_1); + __m128i input1_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input1); + __m128i input1_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input1, __lsx_vldi(32))); + __m128i input1_mask = __lsx_vslei_bu(input1, 31); + __m128i input1_result = + __lsx_vbitsel_v(input1_shuf1, input1_shuf0, input1_mask); + __lsx_vst(input1_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; + + __m128i input2 = __lsx_vor_v(t1_2, t3_2); + __m128i input2_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input2); + __m128i input2_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input2, __lsx_vldi(32))); + __m128i input2_mask = __lsx_vslei_bu(input2, 31); + __m128i input2_result = + __lsx_vbitsel_v(input2_shuf1, input2_shuf0, input2_mask); + __lsx_vst(input2_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; + + __m128i input3 = __lsx_vor_v(t1_3, t3_3); + __m128i input3_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input3); + __m128i input3_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, + __lsx_vsub_b(input3, __lsx_vldi(32))); + __m128i input3_mask = __lsx_vslei_bu(input3, 31); + __m128i input3_result = + __lsx_vbitsel_v(input3_shuf1, input3_shuf0, input3_mask); + __lsx_vst(input3_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; } - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} + for (; i + 16 <= srclen; i += 12) { + + __m128i in = __lsx_vld(reinterpret_cast(input + i), 0); + + // bytes from groups A, B and C are needed in separate 32-bit lanes + // in = [DDDD|CCCC|BBBB|AAAA] + // + // an input triplet has layout + // [????????|ccdddddd|bbbbcccc|aaaaaabb] + // byte 3 byte 2 byte 1 byte 0 -- byte 3 comes from the next + // triplet + // + // shuffling changes the order of bytes: 1, 0, 2, 1 + // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] + // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ + // processed bits + in = __lsx_vshuf_b(in, in, (__m128i)shuf); + + // unpacking + // t0 = [0000cccc|cc000000|aaaaaa00|00000000] + __m128i t0 = __lsx_vand_v(in, v_fc0fc00); + // t1 = [00000000|00cccccc|00000000|00aaaaaa] + // ((c >> 6), (a >> 10)) + __m128i t1 = __lsx_vsrl_h(t0, shift_r); -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} -} // namespace utf8 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8.h */ + // t2 = [00000000|00dddddd|000000bb|bbbb0000] + __m128i t2 = __lsx_vand_v(in, v_3f03f0); + // t3 = [00dddddd|00000000|00bbbbbb|00000000] + // ((d << 8), (b << 4)) + __m128i t3 = __lsx_vsll_h(t2, shift_l); -// -// Implementation-specific overrides -// -namespace simdutf { -namespace ppc64 { + // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 + __m128i indices = __lsx_vor_v(t1, t3); -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - // todo: reimplement as a one-pass algorithm. - int out = 0; - if (validate_utf8(input, length)) { - out |= encoding_type::UTF8; - } - if ((length % 2) == 0) { - if (validate_utf16(reinterpret_cast(input), length / 2)) { - out |= encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } - } + __m128i indices_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, indices); + __m128i indices_shuf1 = __lsx_vshuf_b( + base64_tbl3, base64_tbl2, __lsx_vsub_b(indices, __lsx_vldi(32))); + __m128i indices_mask = __lsx_vslei_bu(indices, 31); + __m128i indices_result = + __lsx_vbitsel_v(indices_shuf1, indices_shuf0, indices_mask); - return out; -} + __lsx_vst(indices_result, reinterpret_cast<__m128i *>(out), 0); + out += 16; + } -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return ppc64::utf8_validation::generic_validate_utf8(buf, len); + return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, + srclen - i, options); } -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return ppc64::utf8_validation::generic_validate_utf8_with_errors(buf, len); -} +static inline void compress(__m128i data, uint16_t mask, char *output) { + if (mask == 0) { + __lsx_vst(data, reinterpret_cast<__m128i *>(output), 0); + return; + } + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return ppc64::utf8_validation::generic_validate_ascii(buf, len); -} + v2u64 shufmask = {tables::base64::thintable_epi8[mask1], + tables::base64::thintable_epi8[mask2]}; -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return ppc64::utf8_validation::generic_validate_ascii_with_errors(buf, len); -} + // we increment by 0x08 the second half of the mask + v4u32 hi = {0, 0, 0x08080808, 0x08080808}; + __m128i shufmask1 = __lsx_vadd_b((__m128i)shufmask, (__m128i)hi); -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - return scalar::utf16::validate(buf, len); -} + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(data, data, shufmask1); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = tables::base64::BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + __lsx_vld(reinterpret_cast( + tables::base64::pshufb_combine_table + pop1 * 8), + 0); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - return scalar::utf16::validate(buf, len); + __lsx_vst(answer, reinterpret_cast<__m128i *>(output), 0); } -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate_with_errors(buf, len); -} +struct block64 { + __m128i chunks[4]; +}; -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate_with_errors(buf, len); -} +template +static inline uint16_t to_base64_mask(__m128i *src, bool *error) { + const v16u8 ascii_space_tbl = {0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}; + // credit: aqrit + /* + '0'(0x30)-'9'(0x39) => delta_values_index = 4 + 'A'(0x41)-'Z'(0x5a) => delta_values_index = 4/5/12(4+8) + 'a'(0x61)-'z'(0x7a) => delta_values_index = 6/7/14(6+8) + '+'(0x2b) => delta_values_index = 3 + '/'(0x2f) => delta_values_index = 2+8 = 10 + '-'(0x2d) => delta_values_index = 2+8 = 10 + '_'(0x5f) => delta_values_index = 5+8 = 13 + */ + v16u8 delta_asso = {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, + 0x0, 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}; + v16i8 delta_values; + if (base64_url) { + delta_values = + v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), + int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}; + } else { + delta_values = + v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}; + } -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - return scalar::utf32::validate_with_errors(buf, len); -} + v16u8 check_asso; + if (base64_url) { + check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x06, 0x0B, 0x12}; + } else { + check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F}; + } -simdutf_warn_unused bool -implementation::validate_utf32(const char16_t *buf, size_t len) const noexcept { - return scalar::utf32::validate(buf, len); -} + v16i8 check_values; + if (base64_url) { + check_values = v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), + int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}; + } else { + check_values = + v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), + int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}; + } -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return 0; // stub -} + const __m128i shifted = __lsx_vsrli_b(*src, 3); + __m128i asso_index = __lsx_vand_v(*src, __lsx_vldi(0xF)); + const __m128i delta_hash = + __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)delta_asso, (__m128i)delta_asso, + (__m128i)asso_index), + shifted); + const __m128i check_hash = + __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)check_asso, (__m128i)check_asso, + (__m128i)asso_index), + shifted); -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return 0; // stub -} + const __m128i out = + __lsx_vsadd_b(__lsx_vshuf_b((__m128i)delta_values, (__m128i)delta_values, + (__m128i)delta_hash), + *src); + const __m128i chk = + __lsx_vsadd_b(__lsx_vshuf_b((__m128i)check_values, (__m128i)check_values, + (__m128i)check_hash), + *src); + unsigned int mask = __lsx_vpickve2gr_hu(__lsx_vmskltz_b(chk), 0); + if (mask) { + __m128i ascii_space = __lsx_vseq_b(__lsx_vshuf_b((__m128i)ascii_space_tbl, + (__m128i)ascii_space_tbl, + (__m128i)asso_index), + *src); + *error |= + (mask != __lsx_vpickve2gr_hu(__lsx_vmskltz_b((__m128i)ascii_space), 0)); + } -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return result(error_code::OTHER, 0); // stub + *src = out; + return (uint16_t)mask; } -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return result(error_code::OTHER, 0); // stub +template +static inline uint64_t to_base64_mask(block64 *b, bool *error) { + *error = 0; + uint64_t m0 = to_base64_mask(&b->chunks[0], error); + uint64_t m1 = to_base64_mask(&b->chunks[1], error); + uint64_t m2 = to_base64_mask(&b->chunks[2], error); + uint64_t m3 = to_base64_mask(&b->chunks[3], error); + return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); } -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return 0; // stub +static inline void copy_block(block64 *b, char *output) { + __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output), 0); + __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output), 16); + __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output), 32); + __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output), 48); } -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char * /*buf*/, size_t /*len*/, - char16_t * /*utf16_output*/) const noexcept { - return 0; // stub +static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { + uint64_t nmask = ~mask; + uint64_t count = + __lsx_vpickve2gr_d(__lsx_vpcnt_h(__lsx_vreplgr2vr_d(nmask)), 0); + uint16_t *count_ptr = (uint16_t *)&count; + compress(b->chunks[0], uint16_t(mask), output); + compress(b->chunks[1], uint16_t(mask >> 16), output + count_ptr[0]); + compress(b->chunks[2], uint16_t(mask >> 32), + output + count_ptr[0] + count_ptr[1]); + compress(b->chunks[3], uint16_t(mask >> 48), + output + count_ptr[0] + count_ptr[1] + count_ptr[2]); + return count_ones(nmask); } -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char * /*buf*/, size_t /*len*/, - char32_t * /*utf16_output*/) const noexcept { - return 0; // stub +// The caller of this function is responsible to ensure that there are 64 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char *src) { + b->chunks[0] = __lsx_vld(reinterpret_cast(src), 0); + b->chunks[1] = __lsx_vld(reinterpret_cast(src), 16); + b->chunks[2] = __lsx_vld(reinterpret_cast(src), 32); + b->chunks[3] = __lsx_vld(reinterpret_cast(src), 48); } -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char * /*buf*/, size_t /*len*/, - char32_t * /*utf16_output*/) const noexcept { - return result(error_code::OTHER, 0); // stub +// The caller of this function is responsible to ensure that there are 128 bytes +// available from reading at src. The data is read into a block64 structure. +static inline void load_block(block64 *b, const char16_t *src) { + __m128i m1 = __lsx_vld(reinterpret_cast(src), 0); + __m128i m2 = __lsx_vld(reinterpret_cast(src), 16); + __m128i m3 = __lsx_vld(reinterpret_cast(src), 32); + __m128i m4 = __lsx_vld(reinterpret_cast(src), 48); + __m128i m5 = __lsx_vld(reinterpret_cast(src), 64); + __m128i m6 = __lsx_vld(reinterpret_cast(src), 80); + __m128i m7 = __lsx_vld(reinterpret_cast(src), 96); + __m128i m8 = __lsx_vld(reinterpret_cast(src), 112); + b->chunks[0] = __lsx_vssrlni_bu_h(m2, m1, 0); + b->chunks[1] = __lsx_vssrlni_bu_h(m4, m3, 0); + b->chunks[2] = __lsx_vssrlni_bu_h(m6, m5, 0); + b->chunks[3] = __lsx_vssrlni_bu_h(m8, m7, 0); } -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char * /*buf*/, size_t /*len*/, - char32_t * /*utf16_output*/) const noexcept { - return 0; // stub -} +static inline void base64_decode(char *out, __m128i str) { + __m128i t0 = __lsx_vor_v( + __lsx_vslli_w(str, 26), + __lsx_vslli_w(__lsx_vand_v(str, __lsx_vldi(-1758 /*0x0000FF00*/)), 12)); + __m128i t1 = + __lsx_vsrli_w(__lsx_vand_v(str, __lsx_vldi(-3521 /*0x003F0000*/)), 2); + __m128i t2 = __lsx_vor_v(t0, t1); + __m128i t3 = __lsx_vor_v(t2, __lsx_vsrli_w(str, 16)); + const v16u8 pack_shuffle = {3, 2, 1, 7, 6, 5, 11, 10, + 9, 15, 14, 13, 0, 0, 0, 0}; + t3 = __lsx_vshuf_b(t3, t3, (__m128i)pack_shuffle); -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert(buf, len, - utf8_output); + // Store the output: + // we only need 12. + __lsx_vstelm_d(t3, out, 0, 0); + __lsx_vstelm_w(t3, out + 8, 0, 2); } - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert(buf, len, utf8_output); +// decode 64 bytes and output 48 bytes +static inline void base64_decode_block(char *out, const char *src) { + base64_decode(out, __lsx_vld(reinterpret_cast(src), 0)); + base64_decode(out + 12, + __lsx_vld(reinterpret_cast(src), 16)); + base64_decode(out + 24, + __lsx_vld(reinterpret_cast(src), 32)); + base64_decode(out + 36, + __lsx_vld(reinterpret_cast(src), 48)); } - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_with_errors( - buf, len, utf8_output); +static inline void base64_decode_block_safe(char *out, const char *src) { + base64_decode_block(out, src); } - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_with_errors( - buf, len, utf8_output); +static inline void base64_decode_block(char *out, block64 *b) { + base64_decode(out, b->chunks[0]); + base64_decode(out + 12, b->chunks[1]); + base64_decode(out + 24, b->chunks[2]); + base64_decode(out + 36, b->chunks[3]); } - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_valid(buf, len, - utf8_output); +static inline void base64_decode_block_safe(char *out, block64 *b) { + base64_decode_block(out, b); } -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_valid(buf, len, - utf8_output); -} +template +full_result +compress_decode_base64(char *dst, const char_type *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { + const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value + : tables::base64::to_base64_value; + size_t equallocation = + srclen; // location of the first padding character if any + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + size_t equalsigns = 0; + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 1; + // skip trailing spaces + while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && + to_base64[uint8_t(src[srclen - 1])] == 64) { + srclen--; + } + if (srclen > 0 && src[srclen - 1] == '=') { + equallocation = srclen - 1; + srclen--; + equalsigns = 2; + } + } + if (srclen == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + const char_type *const srcinit = src; + const char *const dstinit = dst; + const char_type *const srcend = src + srclen; -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert(buf, len, utf8_output); -} + constexpr size_t block_size = 10; + char buffer[block_size * 64]; + char *bufferptr = buffer; + if (srclen >= 64) { + const char_type *const srcend64 = src + srclen - 64; + while (src <= srcend64) { + block64 b; + load_block(&b, src); + src += 64; + bool error = false; + uint64_t badcharmask = to_base64_mask(&b, &error); + if (badcharmask) { + if (error) { + src -= 64; + while (src < srcend && scalar::base64::is_eight_byte(*src) && + to_base64[uint8_t(*src)] <= 64) { + src++; + } + if (src < srcend) { + // should never happen + } + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + } -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output); -} + if (badcharmask != 0) { + // optimization opportunity: check for simple masks like those made of + // continuous 1s followed by continuous 0s. And masks containing a + // single bad character. + bufferptr += compress_block(&b, badcharmask, bufferptr); + } else { + // optimization opportunity: if bufferptr == buffer and mask == 0, we + // can avoid the call to compress_block and decode directly. + copy_block(&b, bufferptr); + bufferptr += 64; + } + if (bufferptr >= (block_size - 1) * 64 + buffer) { + for (size_t i = 0; i < (block_size - 1); i++) { + base64_decode_block(dst, buffer + i * 64); + dst += 48; + } + std::memcpy(buffer, buffer + (block_size - 1) * 64, + 64); // 64 might be too much + bufferptr -= (block_size - 1) * 64; + } + } + } + char *buffer_start = buffer; + // Optimization note: if this is almost full, then it is worth our + // time, otherwise, we should just decode directly. + int last_block = (int)((bufferptr - buffer_start) % 64); + if (last_block != 0 && srcend - src + last_block >= 64) { + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { + uint8_t val = to_base64[uint8_t(*src)]; + *bufferptr = char(val); + if (!scalar::base64::is_eight_byte(*src) || val > 64) { + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + bufferptr += (val <= 63); + src++; + } + } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output); -} + for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { + base64_decode_block(dst, buffer_start); + dst += 48; + } + if ((bufferptr - buffer_start) % 64 != 0) { + while (buffer_start + 4 < bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + triple = scalar::utf32::swap_bytes(triple); + std::memcpy(dst, &triple, 4); -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert(buf, len, - utf16_output); -} + dst += 3; + buffer_start += 4; + } + if (buffer_start + 4 <= bufferptr) { + uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + + (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + + (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + + (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) + << 8; + triple = scalar::utf32::swap_bytes(triple); + std::memcpy(dst, &triple, 3); -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert(buf, len, - utf16_output); + dst += 3; + buffer_start += 4; + } + // we may have 1, 2 or 3 bytes left and we need to decode them so let us + // backtrack + int leftover = int(bufferptr - buffer_start); + while (leftover > 0) { + while (to_base64[uint8_t(*(src - 1))] == 64) { + src--; + } + src--; + leftover--; + } + } + if (src < srcend + equalsigns) { + full_result r = scalar::base64::base64_tail_decode( + dst, src, srcend - src, equalsigns, options, last_chunk_options); + r.input_count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + return r; + } else { + r.output_count += size_t(dst - dstinit); + } + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + r.error = error_code::INVALID_BASE64_CHARACTER; + r.input_count = equallocation; + } + } + return r; + } + if (equalsigns > 0) { + if ((size_t(dst - dstinit) % 3 == 0) || + ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; + } + } + return {SUCCESS, srclen, size_t(dst - dstinit)}; } +/* end file src/lsx/lsx_base64.cpp */ -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_with_errors( - buf, len, utf16_output); -} +} // namespace +} // namespace lsx +} // namespace simdutf -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_with_errors( - buf, len, utf16_output); -} +/* begin file src/generic/buf_block_reader.h */ +namespace simdutf { +namespace lsx { +namespace { -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_valid( - buf, len, utf16_output); -} +// Walks through a buffer in block-sized increments, loading the last part with +// spaces +template struct buf_block_reader { +public: + simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdutf_really_inline size_t block_index(); + simdutf_really_inline bool has_full_block() const; + simdutf_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 + * (in which case this function fills the buffer with spaces and returns 0. In + * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder + * block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdutf_really_inline size_t get_remainder(uint8_t *dst) const; + simdutf_really_inline void advance(); -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_valid(buf, len, - utf16_output); -} +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert(buf, len, - utf32_output); +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text_64(const uint8_t *text) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; } -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert(buf, len, - utf32_output); +// Routines to print masks and text for debugging bitmask operations +simdutf_unused static char *format_input_text(const simd8x64 &in) { + static char *buf = + reinterpret_cast(malloc(sizeof(simd8x64) + 1)); + in.store(reinterpret_cast(buf)); + for (size_t i = 0; i < sizeof(simd8x64); i++) { + if (buf[i] < ' ') { + buf[i] = '_'; + } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; } -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_with_errors( - buf, len, utf32_output); +simdutf_unused static char *format_mask(uint64_t mask) { + static char *buf = reinterpret_cast(malloc(64 + 1)); + for (size_t i = 0; i < 64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; } -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_with_errors( - buf, len, utf32_output); -} +template +simdutf_really_inline +buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) + : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, + idx{0} {} -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_valid( - buf, len, utf32_output); +template +simdutf_really_inline size_t buf_block_reader::block_index() { + return idx; } -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_valid(buf, len, - utf32_output); +template +simdutf_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; } -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - scalar::utf16::change_endianness_utf16(input, length, output); +template +simdutf_really_inline const uint8_t * +buf_block_reader::full_block() const { + return &buf[idx]; } -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::count_code_points(input, length); +template +simdutf_really_inline size_t +buf_block_reader::get_remainder(uint8_t *dst) const { + if (len == idx) { + return 0; + } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, + STEP_SIZE); // std::memset STEP_SIZE because it is more efficient + // to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; } -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::count_code_points(input, length); +template +simdutf_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; } -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/buf_block_reader.h */ +/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_validation { -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf8_length_from_utf16(input, - length); -} +using namespace simd; -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf8_length_from_utf16(input, length); -} +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf32_length_from_utf16(input, - length); -} + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf32_length_from_utf16(input, length); -} + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return scalar::utf8::utf16_length_from_utf8(input, length); -} + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return scalar::utf32::utf8_length_from_utf32(input, length); -} + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return scalar::utf32::utf16_length_from_utf32(input, length); + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); } - -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return scalar::utf8::count_code_points(input, length); +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); +// +// Return nonzero if there are incomplete multibyte characters at the end of the +// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. +// +simdutf_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they + // ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = {255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 0b11110000u - 1, + 0b11100000u - 1, + 0b11000000u - 1}; + const simd8 max_value( + &max_array[sizeof(max_array) - sizeof(simd8)]); + return input.gt_bits(max_value); } -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - // skip trailing spaces - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; +struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast + // path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } + + // The only problem that can happen at EOF is that a multibyte character is + // too short or a byte value too large in the last bytes: check_special_cases + // only checks for bytes too large in the first of two bytes. + simdutf_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an + // ASCII block can't possibly finish them. + this->error |= this->prev_incomplete; } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; + + simdutf_really_inline void check_next_input(const simd8x64 &input) { + if (simdutf_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = + is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; } - return {SUCCESS, 0}; } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } + + // do not forget to call check_eof! + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); } - return r; + +}; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ +/* begin file src/generic/utf8_validation/utf8_validator.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_validation { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return !c.errors(); } -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); +bool generic_validate_utf8(const char *input, size_t length) { + return generic_validate_utf8( + reinterpret_cast(input), length); } -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - // skip trailing spaces - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; +/** + * Validates that the string is actual UTF-8 and stops on errors. + */ +template +result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input + count), length - count); + res.count += count; + return res; } - return {SUCCESS, 0}; + reader.advance(); + count += 64; } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + if (c.errors()) { + if (count != 0) { + count--; + } // Sometimes the error is only detected in the next chunk + result res = scalar::utf8::rewind_and_validate_with_errors( + reinterpret_cast(input), + reinterpret_cast(input) + count, length - count); + res.count += count; + return res; + } else { + return result(error_code::SUCCESS, length); } - return r; } -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); +result generic_validate_utf8_with_errors(const char *input, size_t length) { + return generic_validate_utf8_with_errors( + reinterpret_cast(input), length); } -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - return scalar::base64::binary_to_base64(input, length, output, options); +template +bool generic_validate_ascii(const uint8_t *input, size_t length) { + buf_block_reader<64> reader(input, length); + uint8_t blocks[64]{}; + simd::simd8x64 running_or(blocks); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + running_or |= in; + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + running_or |= in; + return running_or.is_ascii(); } -} // namespace ppc64 -} // namespace simdutf - -/* begin file src/simdutf/ppc64/end.h */ -/* end file src/simdutf/ppc64/end.h */ -/* end file src/ppc64/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_RVV -/* begin file src/rvv/implementation.cpp */ - - - +bool generic_validate_ascii(const char *input, size_t length) { + return generic_validate_ascii( + reinterpret_cast(input), length); +} -/* begin file src/simdutf/rvv/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "rvv" -// #define SIMDUTF_IMPLEMENTATION rvv +template +result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { + buf_block_reader<64> reader(input, length); + size_t count{0}; + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } + reader.advance(); -#if SIMDUTF_CAN_ALWAYS_RUN_RVV -// nothing needed. -#else -SIMDUTF_TARGET_RVV -#endif -/* end file src/simdutf/rvv/begin.h */ -namespace simdutf { -namespace rvv { -namespace { -#ifndef SIMDUTF_RVV_H - #error "rvv.h must be included" -#endif + count += 64; + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + if (!in.is_ascii()) { + result res = scalar::ascii::validate_with_errors( + reinterpret_cast(input + count), length - count); + return result(res.error, count + res.count); + } else { + return result(error_code::SUCCESS, length); + } +} + +result generic_validate_ascii_with_errors(const char *input, size_t length) { + return generic_validate_ascii_with_errors( + reinterpret_cast(input), length); +} +} // namespace utf8_validation } // unnamed namespace -} // namespace rvv +} // namespace lsx } // namespace simdutf +/* end file src/generic/utf8_validation/utf8_validator.h */ + +// transcoding from UTF-8 to Latin 1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -// -// Implementation-specific overrides -// namespace simdutf { -namespace rvv { -/* begin file src/rvv/rvv_helpers.inl.cpp */ -template -simdutf_really_inline static size_t -rvv_utf32_store_utf16_m4(uint16_t *dst, vuint32m4_t utf32, size_t vl, - vbool4_t m4even) { - /* convert [000000000000aaaa|aaaaaabbbbbbbbbb] - * to [110111bbbbbbbbbb|110110aaaaaaaaaa] */ - vuint32m4_t sur = __riscv_vsub_vx_u32m4(utf32, 0x10000, vl); - sur = __riscv_vor_vv_u32m4(__riscv_vsll_vx_u32m4(sur, 16, vl), - __riscv_vsrl_vx_u32m4(sur, 10, vl), vl); - sur = __riscv_vand_vx_u32m4(sur, 0x3FF03FF, vl); - sur = __riscv_vor_vx_u32m4(sur, 0xDC00D800, vl); - /* merge 1 byte utf32 and 2 byte sur */ - vbool8_t m4 = __riscv_vmsgtu_vx_u32m4_b8(utf32, 0xFFFF, vl); - vuint16m4_t utf32_16 = __riscv_vreinterpret_v_u32m4_u16m4( - __riscv_vmerge_vvm_u32m4(utf32, sur, m4, vl)); - /* compress and store */ - vbool4_t mOut = __riscv_vmor_mm_b4( - __riscv_vmsne_vx_u16m4_b4(utf32_16, 0, vl * 2), m4even, vl * 2); - vuint16m4_t vout = __riscv_vcompress_vm_u16m4(utf32_16, mOut, vl * 2); - vl = __riscv_vcpop_m_b4(mOut, vl * 2); - __riscv_vse16_v_u16m4(dst, simdutf_byteflip(vout, vl), vl); - return vl; -}; -/* end file src/rvv/rvv_helpers.inl.cpp */ +namespace lsx { +namespace { +namespace utf8_to_latin1 { +using namespace simd; -/* begin file src/rvv/rvv_length_from.inl.cpp */ +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; -simdutf_warn_unused size_t -implementation::count_utf16le(const char16_t *src, size_t len) const noexcept { - return utf32_length_from_utf16le(src, len); -} + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + FORBIDDEN, + // 1110____ ________ + FORBIDDEN, + // 1111____ ________ + FORBIDDEN); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, -simdutf_warn_unused size_t -implementation::count_utf16be(const char16_t *src, size_t len) const noexcept { - return utf32_length_from_utf16be(src, len); -} + // ____0100 ________ + FORBIDDEN, + // ____0101 ________ + FORBIDDEN, + // ____011_ ________ + FORBIDDEN, FORBIDDEN, -simdutf_warn_unused size_t -implementation::count_utf8(const char *src, size_t len) const noexcept { - return utf32_length_from_utf8(src, len); -} + // ____1___ ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + // ____1101 ________ + FORBIDDEN, FORBIDDEN, FORBIDDEN); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *src, size_t len) const noexcept { - return utf32_length_from_utf8(src, len); -} + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, -simdutf_warn_unused size_t -implementation::latin1_length_from_utf16(size_t len) const noexcept { - return len; + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); } -simdutf_warn_unused size_t -implementation::latin1_length_from_utf32(size_t len) const noexcept { - return len; -} +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; -simdutf_warn_unused size_t -implementation::utf16_length_from_latin1(size_t len) const noexcept { - return len; -} + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + } -simdutf_warn_unused size_t -implementation::utf32_length_from_latin1(size_t len) const noexcept { - return len; -} + simdutf_really_inline size_t convert(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + if (howmany == 0) { + return 0; + } + latin1_output += howmany; + } + return latin1_output - start; + } -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *src, size_t len) const noexcept { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); - count += __riscv_vcpop_m_b1(mask, vl); + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + latin1_output += res.count; + } + } + return result(error_code::SUCCESS, latin1_output - start); } - return count; -} -template -simdutf_really_inline static size_t -rvv_utf32_length_from_utf16(const char16_t *src, size_t len) { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - v = simdutf_byteflip(v, vl); - vbool2_t notHigh = - __riscv_vmor_mm_b2(__riscv_vmsgtu_vx_u16m8_b2(v, 0xDFFF, vl), - __riscv_vmsltu_vx_u16m8_b2(v, 0xDC00, vl), vl); - count += __riscv_vcpop_m_b2(notHigh, vl); + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); } - return count; -} -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *src, size_t len) const noexcept { - return rvv_utf32_length_from_utf16(src, len); -} +}; // struct utf8_checker +} // namespace utf8_to_latin1 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *src, size_t len) const noexcept { - if (supports_zvbb()) - return rvv_utf32_length_from_utf16(src, len); - else - return rvv_utf32_length_from_utf16(src, len); -} +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_latin1 { +using namespace simd; -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *src, size_t len) const noexcept { - size_t count = len; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - count += __riscv_vcpop_m_b1(__riscv_vmslt_vx_i8m8_b1(v, 0, vl), vl); +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... } - return count; -} - -template -simdutf_really_inline static size_t -rvv_utf8_length_from_utf16(const char16_t *src, size_t len) { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - v = simdutf_byteflip(v, vl); - vbool2_t m234 = __riscv_vmsgtu_vx_u16m8_b2(v, 0x7F, vl); - vbool2_t m34 = __riscv_vmsgtu_vx_u16m8_b2(v, 0x7FF, vl); - vbool2_t notSur = - __riscv_vmor_mm_b2(__riscv_vmsltu_vx_u16m8_b2(v, 0xD800, vl), - __riscv_vmsgtu_vx_u16m8_b2(v, 0xDFFF, vl), vl); - vbool2_t m3 = __riscv_vmand_mm_b2(m34, notSur, vl); - count += vl + __riscv_vcpop_m_b2(m234, vl) + __riscv_vcpop_m_b2(m3, vl); + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } } - return count; -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *src, size_t len) const noexcept { - return rvv_utf8_length_from_utf16(src, len); + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; } -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *src, size_t len) const noexcept { - if (supports_zvbb()) - return rvv_utf8_length_from_utf16(src, len); - else - return rvv_utf8_length_from_utf16(src, len); -} +} // namespace utf8_to_latin1 +} // namespace +} // namespace lsx +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +// transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *src, size_t len) const noexcept { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - vbool4_t m234 = __riscv_vmsgtu_vx_u32m8_b4(v, 0x7F, vl); - vbool4_t m34 = __riscv_vmsgtu_vx_u32m8_b4(v, 0x7FF, vl); - vbool4_t m4 = __riscv_vmsgtu_vx_u32m8_b4(v, 0xFFFF, vl); - count += vl + __riscv_vcpop_m_b4(m234, vl) + __riscv_vcpop_m_b4(m34, vl) + - __riscv_vcpop_m_b4(m4, vl); - } - return count; -} +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_utf16 { -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *src, size_t len) const noexcept { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - vbool1_t m1234 = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); - vbool1_t m4 = __riscv_vmsgtu_vx_u8m8_b1(__riscv_vreinterpret_u8m8(v), - (uint8_t)0b11101111, vl); - count += __riscv_vcpop_m_b1(m1234, vl) + __riscv_vcpop_m_b1(m4, vl); - } - return count; -} +using namespace simd; -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *src, size_t len) const noexcept { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - vbool4_t m4 = __riscv_vmsgtu_vx_u32m8_b4(v, 0xFFFF, vl); - count += vl + __riscv_vcpop_m_b4(m4, vl); +template +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. + size_t pos = 0; + char16_t *start{utf16_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } } - return count; + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; } -/* end file src/rvv/rvv_length_from.inl.cpp */ -/* begin file src/rvv/rvv_validate.inl.cpp */ +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -simdutf_warn_unused bool -implementation::validate_ascii(const char *src, size_t len) const noexcept { - size_t vlmax = __riscv_vsetvlmax_e8m8(); - vint8m8_t mask = __riscv_vmv_v_x_i8m8(0, vlmax); - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - mask = __riscv_vor_vv_i8m8_tu(mask, mask, v, vl); - } - return __riscv_vfirst_m_b1(__riscv_vmslt_vx_i8m8_b1(mask, 0, vlmax), vlmax) < - 0; -} +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_utf16 { +using namespace simd; -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *src, size_t len) const noexcept { - const char *beg = src; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - long idx = __riscv_vfirst_m_b1(__riscv_vmslt_vx_i8m8_b1(v, 0, vl), vl); - if (idx >= 0) - return result(error_code::TOO_LARGE, src - beg + idx); - } - return result(error_code::SUCCESS, src - beg); -} +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ -/* Returns a close estimation of the number of valid UTF-8 bytes up to the - * first invalid one, but never overestimating. */ -simdutf_really_inline static size_t rvv_count_valid_utf8(const char *src, - size_t len) { - const char *beg = src; - if (len < 32) - return 0; + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, - /* validate first three bytes */ - { - size_t idx = 3; - while (idx < len && (src[idx] >> 6) == 0b10) - ++idx; - if (idx > 3 + 3 || !scalar::utf8::validate(src, idx)) - return 0; - } + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, - static const uint64_t err1m[] = {0x0202020202020202, 0x4915012180808080}; - static const uint64_t err2m[] = {0xCBCBCB8B8383A3E7, 0xCBCBDBCBCBCBCBCB}; - static const uint64_t err3m[] = {0x0101010101010101, 0X01010101BABAAEE6}; + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, - const vuint8m1_t err1tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err1m, 2)); - const vuint8m1_t err2tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err2m, 2)); - const vuint8m1_t err3tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err3m, 2)); + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - size_t tail = 3; - size_t n = len - tail; + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); +} +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} - for (size_t vl; n > 0; n -= vl, src += vl) { - vl = __riscv_vsetvl_e8m4(n); - vuint8m4_t v0 = __riscv_vle8_v_u8m4((uint8_t const *)src, vl); +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; - uint8_t next0 = src[vl + 0]; - uint8_t next1 = src[vl + 1]; - uint8_t next2 = src[vl + 2]; + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } - /* fast path: ASCII */ - if (__riscv_vfirst_m_b2(__riscv_vmsgtu_vx_u8m4_b2(v0, 0b01111111, vl), vl) < - 0 && - (next0 | next1 | next2) < 0b10000000) - continue; + template + simdutf_really_inline size_t convert(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + return 0; + } + if (pos < size) { + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); + if (howmany == 0) { + return 0; + } + utf16_output += howmany; + } + return utf16_output - start; + } - /* see "Validating UTF-8 In Less Than One Instruction Per Byte" - * https://arxiv.org/abs/2010.03090 */ - vuint8m4_t v1 = __riscv_vslide1down_vx_u8m4(v0, next0, vl); - vuint8m4_t v2 = __riscv_vslide1down_vx_u8m4(v1, next1, vl); - vuint8m4_t v3 = __riscv_vslide1down_vx_u8m4(v2, next2, vl); + template + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char16_t *utf16_output) { + size_t pos = 0; + char16_t *start{utf16_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + res.count += pos; + return res; + } + if (pos < size) { + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf16_output += res.count; + } + } + return result(error_code::SUCCESS, utf16_output - start); + } - vuint8m4_t s1 = __riscv_vreinterpret_v_u16m4_u8m4(__riscv_vsrl_vx_u16m4( - __riscv_vreinterpret_v_u8m4_u16m4(v2), 4, __riscv_vsetvlmax_e16m4())); - vuint8m4_t s3 = __riscv_vreinterpret_v_u16m4_u8m4(__riscv_vsrl_vx_u16m4( - __riscv_vreinterpret_v_u8m4_u16m4(v3), 4, __riscv_vsetvlmax_e16m4())); + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); + } - vuint8m4_t idx2 = __riscv_vand_vx_u8m4(v2, 0xF, vl); - vuint8m4_t idx1 = __riscv_vand_vx_u8m4(s1, 0xF, vl); - vuint8m4_t idx3 = __riscv_vand_vx_u8m4(s3, 0xF, vl); +}; // struct utf8_checker +} // namespace utf8_to_utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +// transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ - vuint8m4_t err1 = simdutf_vrgather_u8m1x4(err1tbl, idx1); - vuint8m4_t err2 = simdutf_vrgather_u8m1x4(err2tbl, idx2); - vuint8m4_t err3 = simdutf_vrgather_u8m1x4(err3tbl, idx3); - vint8m4_t errs = __riscv_vreinterpret_v_u8m4_i8m4( - __riscv_vand_vv_u8m4(__riscv_vand_vv_u8m4(err1, err2, vl), err3, vl)); +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_utf32 { - vbool2_t is_3 = __riscv_vmsgtu_vx_u8m4_b2(v1, 0b11100000 - 1, vl); - vbool2_t is_4 = __riscv_vmsgtu_vx_u8m4_b2(v0, 0b11110000 - 1, vl); - vbool2_t is_34 = __riscv_vmor_mm_b2(is_3, is_4, vl); - vbool2_t err34 = - __riscv_vmxor_mm_b2(is_34, __riscv_vmslt_vx_i8m4_b2(errs, 0, vl), vl); - vbool2_t errm = - __riscv_vmor_mm_b2(__riscv_vmsgt_vx_i8m4_b2(errs, 0, vl), err34, vl); - if (__riscv_vfirst_m_b2(errm, vl) >= 0) - break; - } +using namespace simd; - /* we need to validate the last character */ - while (tail < len && (src[0] >> 6) == 0b10) - --src, ++tail; - return src - beg; +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { + size_t pos = 0; + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + } + } + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; } -simdutf_warn_unused bool -implementation::validate_utf8(const char *src, size_t len) const noexcept { - size_t count = rvv_count_valid_utf8(src, len); - return scalar::utf8::validate(src + count, len - count); -} +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *src, size_t len) const noexcept { - size_t count = rvv_count_valid_utf8(src, len); - result res = scalar::utf8::validate_with_errors(src + count, len - count); - return result(res.error, count + res.count); -} +namespace simdutf { +namespace lsx { +namespace { +namespace utf8_to_utf32 { +using namespace simd; -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *src, - size_t len) const noexcept { - return validate_utf16le_with_errors(src, len).error == error_code::SUCCESS; -} +simdutf_really_inline simd8 +check_special_cases(const simd8 input, const simd8 prev1) { + // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) + // Bit 1 = Too Long (ASCII followed by continuation) + // Bit 2 = Overlong 3-byte + // Bit 4 = Surrogate + // Bit 5 = Overlong 2-byte + // Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *src, - size_t len) const noexcept { - return validate_utf16be_with_errors(src, len).error == error_code::SUCCESS; -} + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + constexpr const uint8_t CARRY = + TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = + (prev1 & 0x0F) + .lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, CARRY, -template -simdutf_really_inline static result -rvv_validate_utf16_with_errors(const char16_t *src, size_t len) { - const char16_t *beg = src; - uint16_t last = 0; - for (size_t vl; len > 0; - len -= vl, src += vl, last = simdutf_byteflip(src[-1])) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v1 = __riscv_vle16_v_u16m8((const uint16_t *)src, vl); - v1 = simdutf_byteflip(v1, vl); - vuint16m8_t v0 = __riscv_vslide1up_vx_u16m8(v1, last, vl); + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, - vbool2_t surhi = __riscv_vmseq_vx_u16m8_b2( - __riscv_vand_vx_u16m8(v0, 0xFC00, vl), 0xD800, vl); - vbool2_t surlo = __riscv_vmseq_vx_u16m8_b2( - __riscv_vand_vx_u16m8(v1, 0xFC00, vl), 0xDC00, vl); + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, - long idx = __riscv_vfirst_m_b2(__riscv_vmxor_mm_b2(surhi, surlo, vl), vl); - if (idx >= 0) { - last = idx > 0 ? simdutf_byteflip(src[idx - 1]) : last; - return result(error_code::SURROGATE, - src - beg + idx - (last - 0xD800u < 0x400u)); - break; - } - } - if (last - 0xD800u < 0x400u) { - return result(error_code::SURROGATE, - src - beg - 1); /* end on high surrogate */ - } else { - return result(error_code::SUCCESS, src - beg); - } -} + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | + OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *src, size_t len) const noexcept { - return rvv_validate_utf16_with_errors(src, len); + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); + return (byte_1_high & byte_1_low & byte_2_high); } - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *src, size_t len) const noexcept { - if (supports_zvbb()) - return rvv_validate_utf16_with_errors(src, len); - else - return rvv_validate_utf16_with_errors(src, len); +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; } -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *src, size_t len) const noexcept { - size_t vlmax = __riscv_vsetvlmax_e32m8(); - vuint32m8_t max = __riscv_vmv_v_x_u32m8(0x10FFFF, vlmax); - vuint32m8_t maxOff = __riscv_vmv_v_x_u32m8(0xFFFFF7FF, vlmax); - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - vuint32m8_t off = __riscv_vadd_vx_u32m8(v, 0xFFFF2000, vl); - max = __riscv_vmaxu_vv_u32m8_tu(max, max, v, vl); - maxOff = __riscv_vmaxu_vv_u32m8_tu(maxOff, maxOff, off, vl); +struct validating_transcoder { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + + validating_transcoder() : error(uint8_t(0)) {} + // + // Check whether the current bytes are valid UTF-8. + // + simdutf_really_inline void check_utf8_bytes(const simd8 input, + const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ + // lead bytes (2, 3, 4-byte leads become large positive numbers instead of + // small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); } - return __riscv_vfirst_m_b4( - __riscv_vmor_mm_b4( - __riscv_vmsne_vx_u32m8_b4(max, 0x10FFFF, vlmax), - __riscv_vmsne_vx_u32m8_b4(maxOff, 0xFFFFF7FF, vlmax), vlmax), - vlmax) < 0; -} -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *src, size_t len) const noexcept { - const char32_t *beg = src; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - vuint32m8_t off = __riscv_vadd_vx_u32m8(v, 0xFFFF2000, vl); - long idx1 = - __riscv_vfirst_m_b4(__riscv_vmsgtu_vx_u32m8_b4(v, 0x10FFFF, vl), vl); - long idx2 = __riscv_vfirst_m_b4( - __riscv_vmsgtu_vx_u32m8_b4(off, 0xFFFFF7FF, vl), vl); - if (idx1 >= 0 && idx2 >= 0) { - if (idx1 <= idx2) { - return result(error_code::TOO_LARGE, src - beg + idx1); + simdutf_really_inline size_t convert(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 16 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; } else { - return result(error_code::SURROGATE, src - beg + idx2); + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. } } - if (idx1 >= 0) { - return result(error_code::TOO_LARGE, src - beg + idx1); + if (errors()) { + return 0; } - if (idx2 >= 0) { - return result(error_code::SURROGATE, src - beg + idx2); + if (pos < size) { + size_t howmany = + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + if (howmany == 0) { + return 0; + } + utf32_output += howmany; } + return utf32_output - start; } - return result(error_code::SUCCESS, src - beg); -} -/* end file src/rvv/rvv_validate.inl.cpp */ - -/* begin file src/rvv/rvv_latin1_to.inl.cpp */ -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *src, size_t len, char *dst) const noexcept { - char *beg = dst; - for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { - vl = __riscv_vsetvl_e8m2(len); - vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); - vbool4_t nascii = - __riscv_vmslt_vx_i8m2_b4(__riscv_vreinterpret_v_u8m2_i8m2(v1), 0, vl); - size_t cnt = __riscv_vcpop_m_b4(nascii, vl); - vlOut = vl + cnt; - if (cnt == 0) { - __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); - continue; + simdutf_really_inline result convert_with_errors(const char *in, size_t size, + char32_t *utf32_output) { + size_t pos = 0; + char32_t *start{utf32_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // last 16 bytes, and if the data is valid, then it is entirely safe because + // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot + // generally assume that you have valid UTF-8 input, so we are going to go + // back from the end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); } - - vuint8m2_t v0 = - __riscv_vor_vx_u8m2(__riscv_vsrl_vx_u8m2(v1, 6, vl), 0b11000000, vl); - v1 = __riscv_vand_vx_u8m2_mu(nascii, v1, v1, 0b10111111, vl); - - vuint8m4_t wide = - __riscv_vreinterpret_v_u16m4_u8m4(__riscv_vwmaccu_vx_u16m4( - __riscv_vwaddu_vv_u16m4(v0, v1, vl), 0xFF, v1, vl)); - vbool2_t mask = __riscv_vmsgtu_vx_u8m4_b2( - __riscv_vsub_vx_u8m4(wide, 0b11000000, vl * 2), 1, vl * 2); - vuint8m4_t comp = __riscv_vcompress_vm_u8m4(wide, mask, vl * 2); - - __riscv_vse8_v_u8m4((uint8_t *)dst, comp, vlOut); - } - return dst - beg; -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *src, size_t len, char16_t *dst) const noexcept { - char16_t *beg = dst; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e8m4(len); - vuint8m4_t v = __riscv_vle8_v_u8m4((uint8_t *)src, vl); - __riscv_vse16_v_u16m8((uint16_t *)dst, __riscv_vzext_vf2_u16m8(v, vl), vl); - } - return dst - beg; -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *src, size_t len, char16_t *dst) const noexcept { - char16_t *beg = dst; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e8m4(len); - vuint8m4_t v = __riscv_vle8_v_u8m4((uint8_t *)src, vl); - __riscv_vse16_v_u16m8( - (uint16_t *)dst, - __riscv_vsll_vx_u16m8(__riscv_vzext_vf2_u16m8(v, vl), 8, vl), vl); + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, + // it is not good enough. + static_assert( + (simd8x64::NUM_CHUNKS == 2) || + (simd8x64::NUM_CHUNKS == 4), + "We support either two or four chunks per 64-byte block."); + auto zero = simd8{uint8_t(0)}; + if (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], zero); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (errors()) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + res.count += pos; + return res; + } + if (pos < size) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); + if (res.error) { // In case of error, we want the error position + res.count += pos; + return res; + } else { // In case of success, we want the number of word written + utf32_output += res.count; + } + } + return result(error_code::SUCCESS, utf32_output - start); } - return dst - beg; -} -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *src, size_t len, char32_t *dst) const noexcept { - char32_t *beg = dst; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e8m2(len); - vuint8m2_t v = __riscv_vle8_v_u8m2((uint8_t *)src, vl); - __riscv_vse32_v_u32m8((uint32_t *)dst, __riscv_vzext_vf4_u32m8(v, vl), vl); + simdutf_really_inline bool errors() const { + return this->error.any_bits_set_anywhere(); } - return dst - beg; -} -/* end file src/rvv/rvv_latin1_to.inl.cpp */ -/* begin file src/rvv/rvv_utf16_to.inl.cpp */ -#include -template -simdutf_really_inline static result -rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) { - const char16_t *const beg = src; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - v = simdutf_byteflip(v, vl); - long idx = __riscv_vfirst_m_b2(__riscv_vmsgtu_vx_u16m8_b2(v, 255, vl), vl); - if (idx >= 0) - return result(error_code::TOO_LARGE, src - beg + idx); - __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m4(v, vl), vl); - } - return result(error_code::SUCCESS, src - beg); -} +}; // struct utf8_checker +} // namespace utf8_to_utf32 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf16le_to_latin1_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf16be_to_latin1_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} +// other functions +/* begin file src/generic/utf8.h */ -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *src, size_t len, char *dst) const noexcept { - return rvv_utf16_to_latin1_with_errors(src, len, dst); -} +namespace simdutf { +namespace lsx { +namespace { +namespace utf8 { -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *src, size_t len, char *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf16_to_latin1_with_errors(src, len, - dst); - else - return rvv_utf16_to_latin1_with_errors(src, len, dst); -} +using namespace simd; -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *src, size_t len, char *dst) const noexcept { - const char16_t *const beg = src; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m4(v, vl), vl); +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); } - return src - beg; + return count + scalar::utf8::count_code_points(in + pos, size - pos); } -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *src, size_t len, char *dst) const noexcept { - const char16_t *const beg = src; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vnsrl_wx_u8m4(v, 8, vl), vl); +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); } - return src - beg; + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); } +} // namespace utf8 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf8.h */ +/* begin file src/generic/utf16.h */ +namespace simdutf { +namespace lsx { +namespace { +namespace utf16 { -template -simdutf_really_inline static result -rvv_utf16_to_utf8_with_errors(const char16_t *src, size_t len, char *dst) { - size_t n = len; - const char16_t *srcBeg = src; - const char *dstBeg = dst; - size_t vl8m4 = __riscv_vsetvlmax_e8m4(); - vbool2_t m4mulp2 = __riscv_vmseq_vx_u8m4_b2( - __riscv_vand_vx_u8m4(__riscv_vid_v_u8m4(vl8m4), 3, vl8m4), 2, vl8m4); - - for (size_t vl, vlOut; n > 0;) { - vl = __riscv_vsetvl_e16m2(n); - - vuint16m2_t v = __riscv_vle16_v_u16m2((uint16_t const *)src, vl); - v = simdutf_byteflip(v, vl); - vbool8_t m234 = __riscv_vmsgtu_vx_u16m2_b8(v, 0x80 - 1, vl); - - if (__riscv_vfirst_m_b8(m234, vl) < 0) { /* 1 byte utf8 */ - vlOut = vl; - __riscv_vse8_v_u8m1((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m1(v, vlOut), - vlOut); - n -= vl, src += vl, dst += vlOut; - continue; +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); } + uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); + count += count_ones(not_pair) / 2; + } + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} - vbool8_t m34 = __riscv_vmsgtu_vx_u16m2_b8(v, 0x800 - 1, vl); - - if (__riscv_vfirst_m_b8(m34, vl) < 0) { /* 1/2 byte utf8 */ - /* 0: [ aaa|aabbbbbb] - * 1: [aabbbbbb| ] vsll 8 - * 2: [ | aaaaa] vsrl 6 - * 3: [00111111|00011111] - * 4: [ bbbbbb|000aaaaa] (1|2)&3 - * 5: [11000000|11000000] - * 6: [10bbbbbb|110aaaaa] 4|5 */ - vuint16m2_t twoByte = __riscv_vand_vx_u16m2( - __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(v, 8, vl), - __riscv_vsrl_vx_u16m2(v, 6, vl), vl), - 0b0011111100011111, vl); - vuint16m2_t vout16 = - __riscv_vor_vx_u16m2_mu(m234, v, twoByte, 0b1000000011000000, vl); - vuint8m2_t vout = __riscv_vreinterpret_v_u16m2_u8m2(vout16); - - /* Every high byte that is zero should be compressed - * low bytes should never be compressed, so we set them - * to all ones, and then create a non-zero bytes mask */ - vbool4_t mcomp = - __riscv_vmsne_vx_u8m2_b4(__riscv_vreinterpret_v_u16m2_u8m2( - __riscv_vor_vx_u16m2(vout16, 0xFF, vl)), - 0, vl * 2); - vlOut = __riscv_vcpop_m_b4(mcomp, vl * 2); - - vout = __riscv_vcompress_vm_u8m2(vout, mcomp, vl * 2); - __riscv_vse8_v_u8m2((uint8_t *)dst, vout, vlOut); - - n -= vl, src += vl, dst += vlOut; - continue; +template +simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); } + uint64_t ascii_mask = input.lteq(0x7F); + uint64_t twobyte_mask = input.lteq(0x7FF); + uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); - vbool8_t sur = __riscv_vmseq_vx_u16m2_b8( - __riscv_vand_vx_u16m2(v, 0xF800, vl), 0xD800, vl); - long first = __riscv_vfirst_m_b8(sur, vl); - size_t tail = vl - first; - vl = first < 0 ? vl : first; - - if (vl > 0) { /* 1/2/3 byte utf8 */ - /* in: [aaaabbbb|bbcccccc] - * v1: [0bcccccc| ] vsll 8 - * v1: [10cccccc| ] vsll 8 & 0b00111111 | 0b10000000 - * v2: [ |110bbbbb] vsrl 6 & 0b00111111 | 0b11000000 - * v2: [ |10bbbbbb] vsrl 6 & 0b00111111 | 0b10000000 - * v3: [ |1110aaaa] vsrl 12 | 0b11100000 - * 1: [00000000|0bcccccc|00000000|00000000] => [0bcccccc] - * 2: [00000000|10cccccc|110bbbbb|00000000] => [110bbbbb] [10cccccc] - * 3: [00000000|10cccccc|10bbbbbb|1110aaaa] => [1110aaaa] [10bbbbbb] - * [10cccccc] - */ - vuint16m2_t v1, v2, v3, v12; - v1 = __riscv_vor_vx_u16m2_mu( - m234, v, __riscv_vand_vx_u16m2(v, 0b00111111, vl), 0b10000000, vl); - v1 = __riscv_vsll_vx_u16m2(v1, 8, vl); - - v2 = __riscv_vor_vx_u16m2( - __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 6, vl), 0b00111111, - vl), - 0b10000000, vl); - v2 = __riscv_vor_vx_u16m2_mu(__riscv_vmnot_m_b8(m34, vl), v2, v2, - 0b01000000, vl); - v3 = __riscv_vor_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 12, vl), 0b11100000, - vl); - v12 = __riscv_vor_vv_u16m2_mu(m234, v1, v1, v2, vl); + size_t ascii_count = count_ones(ascii_mask) / 2; + size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; + size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; + size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; + count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + + ascii_count; + } + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); +} - vuint32m4_t w12 = __riscv_vwmulu_vx_u32m4(v12, 1 << 8, vl); - vuint32m4_t w123 = __riscv_vwaddu_wv_u32m4_mu(m34, w12, w12, v3, vl); - vuint8m4_t vout = __riscv_vreinterpret_v_u32m4_u8m4(w123); +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} - vbool2_t mcomp = __riscv_vmor_mm_b2( - m4mulp2, __riscv_vmsne_vx_u8m4_b2(vout, 0, vl * 4), vl * 4); - vlOut = __riscv_vcpop_m_b2(mcomp, vl * 4); +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; - vout = __riscv_vcompress_vm_u8m4(vout, mcomp, vl * 4); - __riscv_vse8_v_u8m4((uint8_t *)dst, vout, vlOut); + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } - n -= vl, src += vl, dst += vlOut; - } + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} - if (tail) - while (n) { - uint16_t word = simdutf_byteflip(src[0]); - if ((word & 0xFF80) == 0) { - break; - } else if ((word & 0xF800) == 0) { - break; - } else if ((word & 0xF800) != 0xD800) { - break; - } else { - // must be a surrogate pair - if (n <= 1) - return result(error_code::SURROGATE, src - srcBeg); - uint16_t diff = word - 0xD800; - if (diff > 0x3FF) - return result(error_code::SURROGATE, src - srcBeg); - uint16_t diff2 = simdutf_byteflip(src[1]) - 0xDC00; - if (diff2 > 0x3FF) - return result(error_code::SURROGATE, src - srcBeg); +} // namespace utf16 +} // unnamed namespace +} // namespace lsx +} // namespace simdutf +/* end file src/generic/utf16.h */ - uint32_t value = ((diff + 0x40) << 10) + diff2; +// +// Implementation-specific overrides +// +namespace simdutf { +namespace lsx { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *dst++ = (char)((value >> 18) | 0b11110000); - *dst++ = (char)(((value >> 12) & 0b111111) | 0b10000000); - *dst++ = (char)(((value >> 6) & 0b111111) | 0b10000000); - *dst++ = (char)((value & 0b111111) | 0b10000000); - src += 2; - n -= 2; - } - } +simdutf_warn_unused int +implementation::detect_encodings(const char *input, + size_t length) const noexcept { + // If there is a BOM, then we trust it. + auto bom_encoding = simdutf::BOM::check_bom(input, length); + // todo: reimplement as a one-pass algorithm. + if (bom_encoding != encoding_type::unspecified) { + return bom_encoding; } - - return result(error_code::SUCCESS, dst - dstBeg); + int out = 0; + if (validate_utf8(input, length)) { + out |= encoding_type::UTF8; + } + if ((length % 2) == 0) { + if (validate_utf16le(reinterpret_cast(input), + length / 2)) { + out |= encoding_type::UTF16_LE; + } + } + if ((length % 4) == 0) { + if (validate_utf32(reinterpret_cast(input), length / 4)) { + out |= encoding_type::UTF32_LE; + } + } + return out; } -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf16le_to_utf8_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; +simdutf_warn_unused bool +implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return lsx::utf8_validation::generic_validate_utf8(buf, len); } -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf16be_to_utf8_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; +simdutf_warn_unused result implementation::validate_utf8_with_errors( + const char *buf, size_t len) const noexcept { + return lsx::utf8_validation::generic_validate_utf8_with_errors(buf, len); } -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *src, size_t len, char *dst) const noexcept { - return rvv_utf16_to_utf8_with_errors(src, len, dst); +simdutf_warn_unused bool +implementation::validate_ascii(const char *buf, size_t len) const noexcept { + return lsx::utf8_validation::generic_validate_ascii(buf, len); } -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *src, size_t len, char *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf16_to_utf8_with_errors(src, len, dst); - else - return rvv_utf16_to_utf8_with_errors(src, len, dst); +simdutf_warn_unused result implementation::validate_ascii_with_errors( + const char *buf, size_t len) const noexcept { + return lsx::utf8_validation::generic_validate_ascii_with_errors(buf, len); } -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *src, size_t len, char *dst) const noexcept { - return convert_utf16le_to_utf8(src, len, dst); +simdutf_warn_unused bool +implementation::validate_utf16le(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char16_t *tail = lsx_validate_utf16(buf, len); + if (tail) { + return scalar::utf16::validate(tail, + len - (tail - buf)); + } else { + return false; + } } -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *src, size_t len, char *dst) const noexcept { - return convert_utf16be_to_utf8(src, len, dst); +simdutf_warn_unused bool +implementation::validate_utf16be(const char16_t *buf, + size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char16_t *tail = lsx_validate_utf16(buf, len); + if (tail) { + return scalar::utf16::validate(tail, len - (tail - buf)); + } else { + return false; + } } -template -simdutf_really_inline static result -rvv_utf16_to_utf32_with_errors(const char16_t *src, size_t len, char32_t *dst) { - const char16_t *const srcBeg = src; - char32_t *const dstBeg = dst; - - constexpr const uint16_t ANY_SURROGATE_MASK = 0xf800; - constexpr const uint16_t ANY_SURROGATE_VALUE = 0xd800; - constexpr const uint16_t LO_SURROGATE_MASK = 0xfc00; - constexpr const uint16_t LO_SURROGATE_VALUE = 0xdc00; - constexpr const uint16_t HI_SURROGATE_MASK = 0xfc00; - constexpr const uint16_t HI_SURROGATE_VALUE = 0xd800; - - uint16_t last = 0; - while (len > 0) { - size_t vl = __riscv_vsetvl_e16m2(len); - vuint16m2_t v0 = __riscv_vle16_v_u16m2((uint16_t const *)src, vl); - v0 = simdutf_byteflip(v0, vl); - - { // check fast-path - const vuint16m2_t v = __riscv_vand_vx_u16m2(v0, ANY_SURROGATE_MASK, vl); - const vbool8_t any_surrogate = - __riscv_vmseq_vx_u16m2_b8(v, ANY_SURROGATE_VALUE, vl); - if (__riscv_vfirst_m_b8(any_surrogate, vl) < 0) { - /* no surrogates */ - __riscv_vse32_v_u32m4((uint32_t *)dst, __riscv_vzext_vf2_u32m4(v0, vl), - vl); - len -= vl; - src += vl; - dst += vl; - continue; - } - } - - if ((simdutf_byteflip(src[0]) & LO_SURROGATE_MASK) == - LO_SURROGATE_VALUE) { - return result(error_code::SURROGATE, src - srcBeg); - } - - // decode surrogates - vuint16m2_t v1 = __riscv_vslide1down_vx_u16m2(v0, 0, vl); - vl = __riscv_vsetvl_e16m2(vl - 1); - if (vl == 0) { - return result(error_code::SURROGATE, src - srcBeg); - } - - const vbool8_t surhi = __riscv_vmseq_vx_u16m2_b8( - __riscv_vand_vx_u16m2(v0, HI_SURROGATE_MASK, vl), HI_SURROGATE_VALUE, - vl); - const vbool8_t surlo = __riscv_vmseq_vx_u16m2_b8( - __riscv_vand_vx_u16m2(v1, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, - vl); - - // compress everything but lo surrogates - const vbool8_t compress = __riscv_vmsne_vx_u16m2_b8( - __riscv_vand_vx_u16m2(v0, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, - vl); - - { - const vbool8_t diff = __riscv_vmxor_mm_b8(surhi, surlo, vl); - const long idx = __riscv_vfirst_m_b8(diff, vl); - if (idx >= 0) { - uint16_t word = simdutf_byteflip(src[idx]); - if (word < 0xD800 || word > 0xDBFF) { - return result(error_code::SURROGATE, src - srcBeg + idx + 1); - } - return result(error_code::SURROGATE, src - srcBeg + idx); - } - } - - last = simdutf_byteflip(src[vl]); - vuint32m4_t utf32 = __riscv_vzext_vf2_u32m4(v0, vl); - - // v0 = 110110yyyyyyyyyy (0xd800 + yyyyyyyyyy) --- hi surrogate - // v1 = 110111xxxxxxxxxx (0xdc00 + xxxxxxxxxx) --- lo surrogate - - // t0 = u16( 0000_00yy_yyyy_yyyy) - const vuint32m4_t t0 = - __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v0, 0x03ff, vl), vl); - // t1 = u32(0000_0000_0000_yyyy_yyyy_yy00_0000_0000) - const vuint32m4_t t1 = __riscv_vsll_vx_u32m4(t0, 10, vl); - - // t2 = u32(0000_0000_0000_0000_0000_00xx_xxxx_xxxx) - const vuint32m4_t t2 = - __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v1, 0x03ff, vl), vl); - - // t3 = u32(0000_0000_0000_yyyy_yyyy_yyxx_xxxx_xxxx) - const vuint32m4_t t3 = __riscv_vor_vv_u32m4(t1, t2, vl); +simdutf_warn_unused result implementation::validate_utf16le_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = lsx_validate_utf16_with_errors(buf, len); + if (res.count != len) { + result scalar_res = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} - // t4 = utf32 from surrogate pairs - const vuint32m4_t t4 = __riscv_vadd_vx_u32m4(t3, 0x10000, vl); +simdutf_warn_unused result implementation::validate_utf16be_with_errors( + const char16_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = lsx_validate_utf16_with_errors(buf, len); + if (res.count != len) { + result scalar_res = scalar::utf16::validate_with_errors( + buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} - const vuint32m4_t result = __riscv_vmerge_vvm_u32m4(utf32, t4, surhi, vl); +simdutf_warn_unused bool +implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + // empty input is valid. protected the implementation from nullptr. + return true; + } + const char32_t *tail = lsx_validate_utf32le(buf, len); + if (tail) { + return scalar::utf32::validate(tail, len - (tail - buf)); + } else { + return false; + } +} - const vuint32m4_t comp = __riscv_vcompress_vm_u32m4(result, compress, vl); - const size_t vlOut = __riscv_vcpop_m_b8(compress, vl); - __riscv_vse32_v_u32m4((uint32_t *)dst, comp, vlOut); +simdutf_warn_unused result implementation::validate_utf32_with_errors( + const char32_t *buf, size_t len) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = lsx_validate_utf32le_with_errors(buf, len); + if (res.count != len) { + result scalar_res = + scalar::utf32::validate_with_errors(buf + res.count, len - res.count); + return result(scalar_res.error, res.count + scalar_res.count); + } else { + return res; + } +} - len -= vl; - src += vl; - dst += vlOut; +simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( + const char *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + lsx_convert_latin1_to_utf8(buf, len, utf8_output); + size_t converted_chars = ret.second - utf8_output; - if ((last & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) { - // last item is lo surrogate and got already consumed - len -= 1; - src += 1; - } + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; } - - return result(error_code::SUCCESS, dst - dstBeg); + return converted_chars; } -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - result res = convert_utf16le_to_utf32_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lsx_convert_latin1_to_utf16le(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; } -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - result res = convert_utf16be_to_utf32_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; +simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lsx_convert_latin1_to_utf16be(buf, len, utf16_output); + size_t converted_chars = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = + scalar::latin1_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; } -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - return rvv_utf16_to_utf32_with_errors(src, len, dst); +simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + lsx_convert_latin1_to_utf32(buf, len, utf32_output); + size_t converted_chars = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + converted_chars += scalar_converted_chars; + } + return converted_chars; } -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf16_to_utf32_with_errors(src, len, - dst); - else - return rvv_utf16_to_utf32_with_errors(src, len, dst); +simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert(buf, len, latin1_output); } -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - return convert_utf16le_to_utf32(src, len, dst); +simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( + const char *buf, size_t len, char *latin1_output) const noexcept { + utf8_to_latin1::validating_transcoder converter; + return converter.convert_with_errors(buf, len, latin1_output); } -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - return convert_utf16be_to_utf32(src, len, dst); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( + const char *buf, size_t len, char *latin1_output) const noexcept { + return lsx::utf8_to_latin1::convert_valid(buf, len, latin1_output); } -/* end file src/rvv/rvv_utf16_to.inl.cpp */ -/* begin file src/rvv/rvv_utf32_to.inl.cpp */ -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf32_to_latin1_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); } -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *src, size_t len, char *dst) const noexcept { - const char32_t *const beg = src; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - long idx = __riscv_vfirst_m_b4(__riscv_vmsgtu_vx_u32m8_b4(v, 255, vl), vl); - if (idx >= 0) - return result(error_code::TOO_LARGE, src - beg + idx); - /* We don't use vcompress here, because its performance varies widely on - * current platforms. This might be worth reconsidering once there is more - * hardware available. */ - __riscv_vse8_v_u8m2( - (uint8_t *)dst, - __riscv_vncvt_x_x_w_u8m2(__riscv_vncvt_x_x_w_u16m4(v, vl), vl), vl); - } - return result(error_code::SUCCESS, src - beg); +simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert(buf, len, utf16_output); } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *src, size_t len, char *dst) const noexcept { - return convert_utf32_to_latin1(src, len, dst); +simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, + utf16_output); } -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *src, size_t len, char *dst) const noexcept { - size_t n = len; - const char32_t *srcBeg = src; - const char *dstBeg = dst; - size_t vl8m4 = __riscv_vsetvlmax_e8m4(); - vbool2_t m4mulp2 = __riscv_vmseq_vx_u8m4_b2( - __riscv_vand_vx_u8m4(__riscv_vid_v_u8m4(vl8m4), 3, vl8m4), 2, vl8m4); - - for (size_t vl, vlOut; n > 0;) { - vl = __riscv_vsetvl_e32m4(n); +simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( + const char *buf, size_t len, char16_t *utf16_output) const noexcept { + utf8_to_utf16::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf16_output); +} - vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t const *)src, vl); - vbool8_t m234 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x80 - 1, vl); - vuint16m2_t vn = __riscv_vncvt_x_x_w_u16m2(v, vl); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} - if (__riscv_vfirst_m_b8(m234, vl) < 0) { /* 1 byte utf8 */ - vlOut = vl; - __riscv_vse8_v_u8m1((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m1(vn, vlOut), - vlOut); - n -= vl, src += vl, dst += vlOut; - continue; - } +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( + const char *input, size_t size, char16_t *utf16_output) const noexcept { + return utf8_to_utf16::convert_valid(input, size, + utf16_output); +} - vbool8_t m34 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x800 - 1, vl); +simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert(buf, len, utf32_output); +} - if (__riscv_vfirst_m_b8(m34, vl) < 0) { /* 1/2 byte utf8 */ - /* 0: [ aaa|aabbbbbb] - * 1: [aabbbbbb| ] vsll 8 - * 2: [ | aaaaa] vsrl 6 - * 3: [00111111|00111111] - * 4: [ bbbbbb|000aaaaa] (1|2)&3 - * 5: [10000000|11000000] - * 6: [10bbbbbb|110aaaaa] 4|5 */ - vuint16m2_t twoByte = __riscv_vand_vx_u16m2( - __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(vn, 8, vl), - __riscv_vsrl_vx_u16m2(vn, 6, vl), vl), - 0b0011111100111111, vl); - vuint16m2_t vout16 = - __riscv_vor_vx_u16m2_mu(m234, vn, twoByte, 0b1000000011000000, vl); - vuint8m2_t vout = __riscv_vreinterpret_v_u16m2_u8m2(vout16); +simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( + const char *buf, size_t len, char32_t *utf32_output) const noexcept { + utf8_to_utf32::validating_transcoder converter; + return converter.convert_with_errors(buf, len, utf32_output); +} - /* Every high byte that is zero should be compressed - * low bytes should never be compressed, so we set them - * to all ones, and then create a non-zero bytes mask */ - vbool4_t mcomp = - __riscv_vmsne_vx_u8m2_b4(__riscv_vreinterpret_v_u16m2_u8m2( - __riscv_vor_vx_u16m2(vout16, 0xFF, vl)), - 0, vl * 2); - vlOut = __riscv_vcpop_m_b4(mcomp, vl * 2); +simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( + const char *input, size_t size, char32_t *utf32_output) const noexcept { + return utf8_to_utf32::convert_valid(input, size, utf32_output); +} - vout = __riscv_vcompress_vm_u8m2(vout, mcomp, vl * 2); - __riscv_vse8_v_u8m2((uint8_t *)dst, vout, vlOut); +simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; - n -= vl, src += vl, dst += vlOut; - continue; - } - long idx1 = - __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0x10FFFF, vl), vl); - vbool8_t sur = __riscv_vmseq_vx_u32m4_b8( - __riscv_vand_vx_u32m4(v, 0xFFFFF800, vl), 0xD800, vl); - long idx2 = __riscv_vfirst_m_b8(sur, vl); - if (idx1 >= 0 && idx2 >= 0) { - if (idx1 <= idx2) { - return result(error_code::TOO_LARGE, src - srcBeg + idx1); - } else { - return result(error_code::SURROGATE, src - srcBeg + idx2); - } - } - if (idx1 >= 0) { - return result(error_code::TOO_LARGE, src - srcBeg + idx1); - } - if (idx2 >= 0) { - return result(error_code::SURROGATE, src - srcBeg + idx2); + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - vbool8_t m4 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x10000 - 1, vl); - long first = __riscv_vfirst_m_b8(m4, vl); - size_t tail = vl - first; - vl = first < 0 ? vl : first; +simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; - if (vl > 0) { /* 1/2/3 byte utf8 */ - /* vn: [aaaabbbb|bbcccccc] - * v1: [0bcccccc| ] vsll 8 - * v1: [10cccccc| ] vsll 8 & 0b00111111 | 0b10000000 - * v2: [ |110bbbbb] vsrl 6 & 0b00111111 | 0b11000000 - * v2: [ |10bbbbbb] vsrl 6 & 0b00111111 | 0b10000000 - * v3: [ |1110aaaa] vsrl 12 | 0b11100000 - * 1: [00000000|0bcccccc|00000000|00000000] => [0bcccccc] - * 2: [00000000|10cccccc|110bbbbb|00000000] => [110bbbbb] [10cccccc] - * 3: [00000000|10cccccc|10bbbbbb|1110aaaa] => [1110aaaa] [10bbbbbb] - * [10cccccc] - */ - vuint16m2_t v1, v2, v3, v12; - v1 = __riscv_vor_vx_u16m2_mu( - m234, vn, __riscv_vand_vx_u16m2(vn, 0b00111111, vl), 0b10000000, vl); - v1 = __riscv_vsll_vx_u16m2(v1, 8, vl); + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - v2 = __riscv_vor_vx_u16m2( - __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(vn, 6, vl), 0b00111111, - vl), - 0b10000000, vl); - v2 = __riscv_vor_vx_u16m2_mu(__riscv_vmnot_m_b8(m34, vl), v2, v2, - 0b01000000, vl); - v3 = __riscv_vor_vx_u16m2(__riscv_vsrl_vx_u16m2(vn, 12, vl), 0b11100000, - vl); - v12 = __riscv_vor_vv_u16m2_mu(m234, v1, v1, v2, vl); +simdutf_warn_unused result +implementation::convert_utf16le_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_latin1_with_errors( + buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} - vuint32m4_t w12 = __riscv_vwmulu_vx_u32m4(v12, 1 << 8, vl); - vuint32m4_t w123 = __riscv_vwaddu_wv_u32m4_mu(m34, w12, w12, v3, vl); - vuint8m4_t vout = __riscv_vreinterpret_v_u32m4_u8m4(w123); +simdutf_warn_unused result +implementation::convert_utf16be_to_latin1_with_errors( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} - vbool2_t mcomp = __riscv_vmor_mm_b2( - m4mulp2, __riscv_vmsne_vx_u8m4_b2(vout, 0, vl * 4), vl * 4); - vlOut = __riscv_vcpop_m_b2(mcomp, vl * 4); +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16be_to_latin1(buf, len, latin1_output); +} - vout = __riscv_vcompress_vm_u8m4(vout, mcomp, vl * 4); - __riscv_vse8_v_u8m4((uint8_t *)dst, vout, vlOut); +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( + const char16_t *buf, size_t len, char *latin1_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf16le_to_latin1(buf, len, latin1_output); +} - n -= vl, src += vl, dst += vlOut; +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } - - if (tail) - while (n) { - uint32_t word = src[0]; - if (word < 0x10000) - break; - if (word > 0x10FFFF) - return result(error_code::TOO_LARGE, src - srcBeg); - *dst++ = (uint8_t)((word >> 18) | 0b11110000); - *dst++ = (uint8_t)(((word >> 12) & 0b111111) | 0b10000000); - *dst++ = (uint8_t)(((word >> 6) & 0b111111) | 0b10000000); - *dst++ = (uint8_t)((word & 0b111111) | 0b10000000); - ++src; - --n; - } + saved_bytes += scalar_saved_bytes; } - - return result(error_code::SUCCESS, dst - dstBeg); + return saved_bytes; } -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf32_to_utf8_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *src, size_t len, char *dst) const noexcept { - return convert_utf32_to_utf8(src, len, dst); +simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; } -template -simdutf_really_inline static result -rvv_convert_utf32_to_utf16_with_errors(const char32_t *src, size_t len, - char16_t *dst) { - size_t vl8m2 = __riscv_vsetvlmax_e8m2(); - vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( - __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); - const char16_t *dstBeg = dst; - const char32_t *srcBeg = src; - for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { - vl = __riscv_vsetvl_e32m4(len); - vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t *)src, vl); - vuint32m4_t off = __riscv_vadd_vx_u32m4(v, 0xFFFF2000, vl); - long idx1 = - __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0x10FFFF, vl), vl); - long idx2 = __riscv_vfirst_m_b8( - __riscv_vmsgtu_vx_u32m4_b8(off, 0xFFFFF7FF, vl), vl); - if (idx1 >= 0 && idx2 >= 0) { - if (idx1 <= idx2) - return result(error_code::TOO_LARGE, src - srcBeg + idx1); - return result(error_code::SURROGATE, src - srcBeg + idx2); - } - if (idx1 >= 0) - return result(error_code::TOO_LARGE, src - srcBeg + idx1); - if (idx2 >= 0) - return result(error_code::SURROGATE, src - srcBeg + idx2); - long idx = - __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0xFFFF, vl), vl); - if (idx < 0) { - vlOut = vl; - vuint16m2_t n = - simdutf_byteflip(__riscv_vncvt_x_x_w_u16m2(v, vlOut), vlOut); - __riscv_vse16_v_u16m2((uint16_t *)dst, n, vlOut); - continue; +simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; } - vlOut = rvv_utf32_store_utf16_m4((uint16_t *)dst, v, vl, m4even); } - return result(error_code::SUCCESS, dst - dstBeg); + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; } -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - result res = convert_utf32_to_utf16le_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16le_to_utf8(buf, len, utf8_output); } -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - result res = convert_utf32_to_utf16be_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( + const char16_t *buf, size_t len, char *utf8_output) const noexcept { + return convert_utf16be_to_utf8(buf, len, utf8_output); } -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - return rvv_convert_utf32_to_utf16_with_errors( - src, len, dst); +simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return 0; + } + std::pair ret = + lsx_convert_utf32_to_utf8(buf, len, utf8_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf8_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; } -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_convert_utf32_to_utf16_with_errors( - src, len, dst); - else - return rvv_convert_utf32_to_utf16_with_errors(src, len, - dst); +simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + if (ret.first.count != len) { + result scalar_res = scalar::utf32_to_utf8::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf8_output; // Set count to the number of 8-bit code units written + return ret.first; } -template -simdutf_really_inline static size_t -rvv_convert_valid_utf32_to_utf16(const char32_t *src, size_t len, - char16_t *dst) { - size_t vl8m2 = __riscv_vsetvlmax_e8m2(); - vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( - __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); - char16_t *dstBeg = dst; - for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { - vl = __riscv_vsetvl_e32m4(len); - vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t *)src, vl); - if (__riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0xFFFF, vl), vl) < - 0) { - vlOut = vl; - vuint16m2_t n = - simdutf_byteflip(__riscv_vncvt_x_x_w_u16m2(v, vlOut), vlOut); - __riscv_vse16_v_u16m2((uint16_t *)dst, n, vlOut); - continue; +simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } - vlOut = rvv_utf32_store_utf16_m4((uint16_t *)dst, v, vl, m4even); + saved_bytes += scalar_saved_bytes; } - return dst - dstBeg; + return saved_bytes; } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - return rvv_convert_valid_utf32_to_utf16(src, len, - dst); +simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + std::pair ret = + lsx_convert_utf16_to_utf32(buf, len, utf32_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf32_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf16_to_utf32::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; } -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_convert_valid_utf32_to_utf16(src, len, - dst); - else - return rvv_convert_valid_utf32_to_utf16(src, len, dst); +simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; } -/* end file src/rvv/rvv_utf32_to.inl.cpp */ -/* begin file src/rvv/rvv_utf8_to.inl.cpp */ -template -simdutf_really_inline static size_t rvv_utf8_to_common(char const *src, - size_t len, Tdst *dst) { - static_assert(std::is_same() || - std::is_same(), - "invalid type"); - constexpr bool is16 = std::is_same(); - constexpr endianness endian = - bflip == simdutf_ByteFlip::NONE ? endianness::LITTLE : endianness::BIG; - const auto scalar = [](char const *in, size_t count, Tdst *out) { - return is16 ? scalar::utf8_to_utf16::convert(in, count, - (char16_t *)out) - : scalar::utf8_to_utf32::convert(in, count, (char32_t *)out); - }; - if (len < 32) - return scalar(src, len, dst); - - /* validate first three bytes */ - if (validate) { - size_t idx = 3; - while (idx < len && (src[idx] >> 6) == 0b10) - ++idx; - if (idx > 3 + 3 || !scalar::utf8::validate(src, idx)) - return 0; +simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = + scalar::utf16_to_utf32::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } } + ret.first.count = + ret.second - + utf32_output; // Set count to the number of 8-bit code units written + return ret.first; +} - size_t tail = 3; - size_t n = len - tail; - Tdst *beg = dst; - - static const uint64_t err1m[] = {0x0202020202020202, 0x4915012180808080}; - static const uint64_t err2m[] = {0xCBCBCB8B8383A3E7, 0xCBCBDBCBCBCBCBCB}; - static const uint64_t err3m[] = {0x0101010101010101, 0X01010101BABAAEE6}; - - const vuint8m1_t err1tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err1m, 2)); - const vuint8m1_t err2tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err2m, 2)); - const vuint8m1_t err3tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err3m, 2)); - - size_t vl8m2 = __riscv_vsetvlmax_e8m2(); - vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( - __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); - - for (size_t vl, vlOut; n > 0; n -= vl, src += vl, dst += vlOut) { - vl = __riscv_vsetvl_e8m2(n); - - vuint8m2_t v0 = __riscv_vle8_v_u8m2((uint8_t const *)src, vl); - uint64_t max = __riscv_vmv_x_s_u8m1_u8( - __riscv_vredmaxu_vs_u8m2_u8m1(v0, __riscv_vmv_s_x_u8m1(0, vl), vl)); - - uint8_t next0 = src[vl + 0]; - uint8_t next1 = src[vl + 1]; - uint8_t next2 = src[vl + 2]; +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; - /* fast path: ASCII */ - if ((max | next0 | next1 | next2) < 0b10000000) { - vlOut = vl; - if (is16) - __riscv_vse16_v_u16m4( - (uint16_t *)dst, - simdutf_byteflip(__riscv_vzext_vf2_u16m4(v0, vlOut), vlOut), - vlOut); - else - __riscv_vse32_v_u32m8((uint32_t *)dst, - __riscv_vzext_vf4_u32m8(v0, vlOut), vlOut); - continue; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - /* see "Validating UTF-8 In Less Than One Instruction Per Byte" - * https://arxiv.org/abs/2010.03090 */ - vuint8m2_t v1 = __riscv_vslide1down_vx_u8m2(v0, next0, vl); - vuint8m2_t v2 = __riscv_vslide1down_vx_u8m2(v1, next1, vl); - vuint8m2_t v3 = __riscv_vslide1down_vx_u8m2(v2, next2, vl); - - if (validate) { - vuint8m2_t s1 = __riscv_vreinterpret_v_u16m2_u8m2(__riscv_vsrl_vx_u16m2( - __riscv_vreinterpret_v_u8m2_u16m2(v2), 4, __riscv_vsetvlmax_e16m2())); - vuint8m2_t s3 = __riscv_vreinterpret_v_u16m2_u8m2(__riscv_vsrl_vx_u16m2( - __riscv_vreinterpret_v_u8m2_u16m2(v3), 4, __riscv_vsetvlmax_e16m2())); - - vuint8m2_t idx2 = __riscv_vand_vx_u8m2(v2, 0xF, vl); - vuint8m2_t idx1 = __riscv_vand_vx_u8m2(s1, 0xF, vl); - vuint8m2_t idx3 = __riscv_vand_vx_u8m2(s3, 0xF, vl); - - vuint8m2_t err1 = simdutf_vrgather_u8m1x2(err1tbl, idx1); - vuint8m2_t err2 = simdutf_vrgather_u8m1x2(err2tbl, idx2); - vuint8m2_t err3 = simdutf_vrgather_u8m1x2(err3tbl, idx3); - vint8m2_t errs = __riscv_vreinterpret_v_u8m2_i8m2( - __riscv_vand_vv_u8m2(__riscv_vand_vv_u8m2(err1, err2, vl), err3, vl)); - - vbool4_t is_3 = __riscv_vmsgtu_vx_u8m2_b4(v1, 0b11100000 - 1, vl); - vbool4_t is_4 = __riscv_vmsgtu_vx_u8m2_b4(v0, 0b11110000 - 1, vl); - vbool4_t is_34 = __riscv_vmor_mm_b4(is_3, is_4, vl); - vbool4_t err34 = - __riscv_vmxor_mm_b4(is_34, __riscv_vmslt_vx_i8m2_b4(errs, 0, vl), vl); - vbool4_t errm = - __riscv_vmor_mm_b4(__riscv_vmsgt_vx_i8m2_b4(errs, 0, vl), err34, vl); - if (__riscv_vfirst_m_b4(errm, vl) >= 0) - return 0; +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = scalar::utf32_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} - /* decoding */ - - /* mask of non continuation bytes */ - vbool4_t m = - __riscv_vmsgt_vx_i8m2_b4(__riscv_vreinterpret_v_u8m2_i8m2(v0), -65, vl); - vlOut = __riscv_vcpop_m_b4(m, vl); - - /* extract first and second bytes */ - vuint8m2_t b1 = __riscv_vcompress_vm_u8m2(v0, m, vl); - vuint8m2_t b2 = __riscv_vcompress_vm_u8m2(v1, m, vl); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; - /* fast path: one and two byte */ - if (max < 0b11100000) { - b2 = __riscv_vand_vx_u8m2(b2, 0b00111111, vlOut); + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - vbool4_t m1 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b10111111, vlOut); - b1 = __riscv_vand_vx_u8m2_mu(m1, b1, b1, 63, vlOut); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( + const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // optimization opportunity: implement a custom function. + return convert_utf32_to_utf8(buf, len, utf8_output); +} - vuint16m4_t b12 = __riscv_vwmulu_vv_u16m4( - b1, - __riscv_vmerge_vxm_u8m2(__riscv_vmv_v_x_u8m2(1, vlOut), 1 << 6, m1, - vlOut), - vlOut); - b12 = __riscv_vwaddu_wv_u16m4_mu(m1, b12, b12, b2, vlOut); - if (is16) - __riscv_vse16_v_u16m4((uint16_t *)dst, - simdutf_byteflip(b12, vlOut), vlOut); - else - __riscv_vse32_v_u32m8((uint32_t *)dst, - __riscv_vzext_vf2_u32m8(b12, vlOut), vlOut); - continue; +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } + saved_bytes += scalar_saved_bytes; + } - /* fast path: one, two and three byte */ - if (max < 0b11110000) { - vuint8m2_t b3 = __riscv_vcompress_vm_u8m2(v2, m, vl); - - b2 = __riscv_vand_vx_u8m2(b2, 0b00111111, vlOut); - b3 = __riscv_vand_vx_u8m2(b3, 0b00111111, vlOut); - - vbool4_t m1 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b10111111, vlOut); - vbool4_t m3 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b11011111, vlOut); - - vuint8m2_t t1 = __riscv_vand_vx_u8m2_mu(m1, b1, b1, 63, vlOut); - b1 = __riscv_vand_vx_u8m2_mu(m3, t1, b1, 15, vlOut); + return saved_bytes; +} - vuint16m4_t b12 = __riscv_vwmulu_vv_u16m4( - b1, - __riscv_vmerge_vxm_u8m2(__riscv_vmv_v_x_u8m2(1, vlOut), 1 << 6, m1, - vlOut), - vlOut); - b12 = __riscv_vwaddu_wv_u16m4_mu(m1, b12, b12, b2, vlOut); - vuint16m4_t b123 = __riscv_vwaddu_wv_u16m4_mu( - m3, b12, __riscv_vsll_vx_u16m4_mu(m3, b12, b12, 6, vlOut), b3, vlOut); - if (is16) - __riscv_vse16_v_u16m4((uint16_t *)dst, - simdutf_byteflip(b123, vlOut), vlOut); - else - __riscv_vse32_v_u32m8((uint32_t *)dst, - __riscv_vzext_vf2_u32m8(b123, vlOut), vlOut); - continue; +simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + std::pair ret = + lsx_convert_utf32_to_utf16(buf, len, utf16_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - utf16_output; + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = + scalar::utf32_to_utf16::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} - /* extract third and fourth bytes */ - vuint8m2_t b3 = __riscv_vcompress_vm_u8m2(v2, m, vl); - vuint8m2_t b4 = __riscv_vcompress_vm_u8m2(v3, m, vl); - - /* remove prefix from leading bytes - * - * We could also use vrgather here, but it increases register pressure, - * and its performance varies widely on current platforms. It might be - * worth reconsidering, though, once there is more hardware available. - * Same goes for the __riscv_vsrl_vv_u32m4 correction step. - * - * We shift left and then right by the number of bytes in the prefix, - * which can be calculated as follows: - * x max(x-10, 0) - * 0xxx -> 0000-0111 -> sift by 0 or 1 -> 0 - * 10xx -> 1000-1011 -> don't care - * 110x -> 1100,1101 -> sift by 3 -> 2,3 - * 1110 -> 1110 -> sift by 4 -> 4 - * 1111 -> 1111 -> sift by 5 -> 5 - * - * vssubu.vx v, 10, (max(x-10, 0)) almost gives us what we want, we - * just need to manually detect and handle the one special case: - */ -#define SIMDUTF_RVV_UTF8_TO_COMMON_M1(idx) \ - vuint8m1_t c1 = __riscv_vget_v_u8m2_u8m1(b1, idx); \ - vuint8m1_t c2 = __riscv_vget_v_u8m2_u8m1(b2, idx); \ - vuint8m1_t c3 = __riscv_vget_v_u8m2_u8m1(b3, idx); \ - vuint8m1_t c4 = __riscv_vget_v_u8m2_u8m1(b4, idx); \ - /* remove prefix from trailing bytes */ \ - c2 = __riscv_vand_vx_u8m1(c2, 0b00111111, vlOut); \ - c3 = __riscv_vand_vx_u8m1(c3, 0b00111111, vlOut); \ - c4 = __riscv_vand_vx_u8m1(c4, 0b00111111, vlOut); \ - vuint8m1_t shift = __riscv_vsrl_vx_u8m1(c1, 4, vlOut); \ - shift = __riscv_vmerge_vxm_u8m1(__riscv_vssubu_vx_u8m1(shift, 10, vlOut), 3, \ - __riscv_vmseq_vx_u8m1_b8(shift, 12, vlOut), \ - vlOut); \ - c1 = __riscv_vsll_vv_u8m1(c1, shift, vlOut); \ - c1 = __riscv_vsrl_vv_u8m1(c1, shift, vlOut); \ - /* unconditionally widen and combine to c1234 */ \ - vuint16m2_t c34 = __riscv_vwaddu_wv_u16m2( \ - __riscv_vwmulu_vx_u16m2(c3, 1 << 6, vlOut), c4, vlOut); \ - vuint16m2_t c12 = __riscv_vwaddu_wv_u16m2( \ - __riscv_vwmulu_vx_u16m2(c1, 1 << 6, vlOut), c2, vlOut); \ - vuint32m4_t c1234 = __riscv_vwaddu_wv_u32m4( \ - __riscv_vwmulu_vx_u32m4(c12, 1 << 12, vlOut), c34, vlOut); \ - /* derive required right-shift amount from `shift` to reduce \ - * c1234 to the required number of bytes */ \ - c1234 = __riscv_vsrl_vv_u32m4( \ - c1234, \ - __riscv_vzext_vf4_u32m4( \ - __riscv_vmul_vx_u8m1( \ - __riscv_vrsub_vx_u8m1(__riscv_vssubu_vx_u8m1(shift, 2, vlOut), \ - 3, vlOut), \ - 6, vlOut), \ - vlOut), \ - vlOut); \ - /* store result in desired format */ \ - if (is16) \ - vlDst = rvv_utf32_store_utf16_m4((uint16_t *)dst, c1234, vlOut, \ - m4even); \ - else \ - vlDst = vlOut, __riscv_vse32_v_u32m4((uint32_t *)dst, c1234, vlOut); - - /* Unrolling this manually reduces register pressure and allows - * us to terminate early. */ - { - size_t vlOutm2 = vlOut, vlDst; - vlOut = __riscv_vsetvl_e8m1(vlOut); - SIMDUTF_RVV_UTF8_TO_COMMON_M1(0) - if (vlOutm2 == vlOut) { - vlOut = vlDst; - continue; - } - - dst += vlDst; - vlOut = vlOutm2 - vlOut; - } - { - size_t vlDst; - SIMDUTF_RVV_UTF8_TO_COMMON_M1(1) - vlOut = vlDst; +simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; } - -#undef SIMDUTF_RVV_UTF8_TO_COMMON_M1 } + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; +} - /* validate the last character and reparse it + tail */ - if (len > tail) { - if ((src[0] >> 6) == 0b10) - --dst; - while ((src[0] >> 6) == 0b10 && tail < len) - --src, ++tail; - if (is16) { - /* go back one more, when on high surrogate */ - if (simdutf_byteflip((uint16_t)dst[-1]) >= 0xD800 && - simdutf_byteflip((uint16_t)dst[-1]) <= 0xDBFF) - --dst; +simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + // ret.first.count is always the position in the buffer, not the number of + // code units written even if finished + std::pair ret = + lsx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); + if (ret.first.count != len) { + result scalar_res = + scalar::utf32_to_utf16::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; } } - size_t ret = scalar(src, tail, dst); - if (ret == 0) - return 0; - return (size_t)(dst - beg) + ret; + ret.first.count = + ret.second - + utf16_output; // Set count to the number of 8-bit code units written + return ret.first; } -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *src, size_t len, char *dst) const noexcept { - const char *beg = dst; - uint8_t last = 0; - for (size_t vl, vlOut; len > 0; - len -= vl, src += vl, dst += vlOut, last = src[-1]) { - vl = __riscv_vsetvl_e8m2(len); - vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); - // check which bytes are ASCII - vbool4_t ascii = __riscv_vmsltu_vx_u8m2_b4(v1, 0b10000000, vl); - // count ASCII bytes - vlOut = __riscv_vcpop_m_b4(ascii, vl); - // The original code would only enter the next block after this check: - // vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); - // vlOut = __riscv_vcpop_m_b4(m, vl); - // if (vlOut != vl || last > 0b01111111) {...}q - // So that everything is ASCII or continuation bytes, we just proceeded - // without any processing, going straight to __riscv_vse8_v_u8m2. - // But you need the __riscv_vslide1up_vx_u8m2 whenever there is a non-ASCII - // byte. - if (vlOut != vl) { // If not pure ASCII - // Non-ASCII characters - // We now want to mark the ascii and continuation bytes - vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); - // We count them, that's our new vlOut (output vector length) - vlOut = __riscv_vcpop_m_b4(m, vl); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16le(buf, len, utf16_output); +} - vuint8m2_t v0 = __riscv_vslide1up_vx_u8m2(v1, last, vl); +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( + const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { + return convert_utf32_to_utf16be(buf, len, utf16_output); +} - vbool4_t leading0 = __riscv_vmsgtu_vx_u8m2_b4(v0, 0b10111111, vl); - vbool4_t trailing1 = __riscv_vmslt_vx_i8m2_b4( - __riscv_vreinterpret_v_u8m2_i8m2(v1), (uint8_t)0b11000000, vl); - // -62 i 0b11000010, so we check whether any of v0 is too big - vbool4_t tobig = __riscv_vmand_mm_b4( - leading0, - __riscv_vmsgtu_vx_u8m2_b4(__riscv_vxor_vx_u8m2(v0, (uint8_t)-62, vl), - 1, vl), - vl); - if (__riscv_vfirst_m_b4( - __riscv_vmor_mm_b4( - tobig, __riscv_vmxor_mm_b4(leading0, trailing1, vl), vl), - vl) >= 0) - return 0; +simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16le_to_utf32(buf, len, utf32_output); +} - v1 = __riscv_vor_vx_u8m2_mu(__riscv_vmseq_vx_u8m2_b4(v0, 0b11000011, vl), - v1, v1, 0b01000000, vl); - v1 = __riscv_vcompress_vm_u8m2(v1, m, vl); - } else if (last >= 0b11000000) { // If last byte is a leading byte and we - // got only ASCII, error! - return 0; - } - __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); - } - if (last > 0b10111111) - return 0; - return dst - beg; +simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( + const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { + return convert_utf16be_to_utf32(buf, len, utf32_output); } -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *src, size_t len, char *dst) const noexcept { - size_t res = convert_utf8_to_latin1(src, len, dst); - if (res) - return result(error_code::SUCCESS, res); - return scalar::utf8_to_latin1::convert_with_errors(src, len, dst); +void implementation::change_endianness_utf16(const char16_t *input, + size_t length, + char16_t *output) const noexcept { + utf16::change_endianness_utf16(input, length, output); } -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *src, size_t len, char *dst) const noexcept { - const char *beg = dst; - uint8_t last = 0; - for (size_t vl, vlOut; len > 0; - len -= vl, src += vl, dst += vlOut, last = src[-1]) { - vl = __riscv_vsetvl_e8m2(len); - vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); - vbool4_t ascii = __riscv_vmsltu_vx_u8m2_b4(v1, 0b10000000, vl); - vlOut = __riscv_vcpop_m_b4(ascii, vl); - if (vlOut != vl) { // If not pure ASCII - vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); - vlOut = __riscv_vcpop_m_b4(m, vl); - vuint8m2_t v0 = __riscv_vslide1up_vx_u8m2(v1, last, vl); - v1 = __riscv_vor_vx_u8m2_mu(__riscv_vmseq_vx_u8m2_b4(v0, 0b11000011, vl), - v1, v1, 0b01000000, vl); - v1 = __riscv_vcompress_vm_u8m2(v1, m, vl); - } - __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); - } - return dst - beg; +simdutf_warn_unused size_t implementation::count_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); } -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *src, size_t len, char16_t *dst) const noexcept { - return rvv_utf8_to_common(src, len, - (uint16_t *)dst); +simdutf_warn_unused size_t implementation::count_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::count_code_points(input, length); } -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *src, size_t len, char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf8_to_common( - src, len, (uint16_t *)dst); - else - return rvv_utf8_to_common(src, len, - (uint16_t *)dst); +simdutf_warn_unused size_t +implementation::count_utf8(const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); } -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *src, size_t len, char16_t *dst) const noexcept { - size_t res = convert_utf8_to_utf16le(src, len, dst); - if (res) - return result(error_code::SUCCESS, res); - return scalar::utf8_to_utf16::convert_with_errors( - src, len, dst); +simdutf_warn_unused size_t implementation::latin1_length_from_utf8( + const char *buf, size_t len) const noexcept { + return count_utf8(buf, len); } -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *src, size_t len, char16_t *dst) const noexcept { - size_t res = convert_utf8_to_utf16be(src, len, dst); - if (res) - return result(error_code::SUCCESS, res); - return scalar::utf8_to_utf16::convert_with_errors(src, len, - dst); +simdutf_warn_unused size_t +implementation::latin1_length_from_utf16(size_t length) const noexcept { + return length; } -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *src, size_t len, char16_t *dst) const noexcept { - return rvv_utf8_to_common( - src, len, (uint16_t *)dst); +simdutf_warn_unused size_t +implementation::latin1_length_from_utf32(size_t length) const noexcept { + return length; } -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *src, size_t len, char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf8_to_common( - src, len, (uint16_t *)dst); - else - return rvv_utf8_to_common( - src, len, (uint16_t *)dst); +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + const uint8_t *data = reinterpret_cast(input); + const uint8_t *data_end = data + length; + uint64_t result = 0; + while (data + 16 < data_end) { + uint64_t two_bytes = 0; + __m128i input_vec = __lsx_vld(data, 0); + two_bytes = + __lsx_vpickve2gr_hu(__lsx_vpcnt_h(__lsx_vmskltz_b(input_vec)), 0); + result += 16 + two_bytes; + data += 16; + } + return result + scalar::latin1::utf8_length_from_latin1((const char *)data, + data_end - data); } -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *src, size_t len, char32_t *dst) const noexcept { - return rvv_utf8_to_common(src, len, - (uint32_t *)dst); +simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16(input, length); } -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *src, size_t len, char32_t *dst) const noexcept { - size_t res = convert_utf8_to_utf32(src, len, dst); - if (res) - return result(error_code::SUCCESS, res); - return scalar::utf8_to_utf32::convert_with_errors(src, len, dst); +simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf8_length_from_utf16(input, length); } -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *src, size_t len, char32_t *dst) const noexcept { - return rvv_utf8_to_common( - src, len, (uint32_t *)dst); +simdutf_warn_unused size_t +implementation::utf16_length_from_latin1(size_t length) const noexcept { + return length; } -/* end file src/rvv/rvv_utf8_to.inl.cpp */ -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) - return bom_encoding; - // todo: reimplement as a one-pass algorithm. - int out = 0; - if (validate_utf8(input, length)) - out |= encoding_type::UTF8; - if (length % 2 == 0) { - if (validate_utf16(reinterpret_cast(input), length / 2)) - out |= encoding_type::UTF16_LE; - } - if (length % 4 == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) - out |= encoding_type::UTF32_LE; - } +simdutf_warn_unused size_t +implementation::utf32_length_from_latin1(size_t length) const noexcept { + return length; +} - return out; +simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); } -template -simdutf_really_inline static void -rvv_change_endianness_utf16(const char16_t *src, size_t len, char16_t *dst) { - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - __riscv_vse16_v_u16m8((uint16_t *)dst, simdutf_byteflip(v, vl), vl); +simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( + const char16_t *input, size_t length) const noexcept { + return utf16::utf32_length_from_utf16(input, length); +} + +simdutf_warn_unused size_t implementation::utf16_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::utf16_length_from_utf8(input, length); +} + +simdutf_warn_unused size_t implementation::utf8_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const __m128i v_80 = __lsx_vrepli_w(0x80); /*0x00000080*/ + const __m128i v_800 = __lsx_vldi(-3832); /*0x00000800*/ + const __m128i v_10000 = __lsx_vldi(-3583); /*0x00010000*/ + size_t pos = 0; + size_t count = 0; + for (; pos + 4 <= length; pos += 4) { + __m128i in = __lsx_vld(reinterpret_cast(input + pos), 0); + const __m128i ascii_bytes_bytemask = __lsx_vslt_w(in, v_80); + const __m128i one_two_bytes_bytemask = __lsx_vslt_w(in, v_800); + const __m128i two_bytes_bytemask = + __lsx_vxor_v(one_two_bytes_bytemask, ascii_bytes_bytemask); + const __m128i three_bytes_bytemask = + __lsx_vxor_v(__lsx_vslt_w(in, v_10000), one_two_bytes_bytemask); + + const uint32_t ascii_bytes_count = __lsx_vpickve2gr_bu( + __lsx_vpcnt_b(__lsx_vmskltz_w(ascii_bytes_bytemask)), 0); + const uint32_t two_bytes_count = __lsx_vpickve2gr_bu( + __lsx_vpcnt_b(__lsx_vmskltz_w(two_bytes_bytemask)), 0); + const uint32_t three_bytes_count = __lsx_vpickve2gr_bu( + __lsx_vpcnt_b(__lsx_vmskltz_w(three_bytes_bytemask)), 0); + + count += + 16 - 3 * ascii_bytes_count - 2 * two_bytes_count - three_bytes_count; + } + return count + + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); +} + +simdutf_warn_unused size_t implementation::utf16_length_from_utf32( + const char32_t *input, size_t length) const noexcept { + const __m128i v_ffff = __lsx_vldi(-2304); /*0x0000ffff*/ + size_t pos = 0; + size_t count = 0; + for (; pos + 4 <= length; pos += 4) { + __m128i in = __lsx_vld(reinterpret_cast(input + pos), 0); + const __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); + size_t surrogate_count = __lsx_vpickve2gr_bu( + __lsx_vpcnt_b(__lsx_vmskltz_w(surrogate_bytemask)), 0); + count += 4 + surrogate_count; } + return count + + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); } -void implementation::change_endianness_utf16(const char16_t *src, size_t len, - char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_change_endianness_utf16(src, len, dst); - else - return rvv_change_endianness_utf16(src, len, dst); +simdutf_warn_unused size_t implementation::utf32_length_from_utf8( + const char *input, size_t length) const noexcept { + return utf8::count_code_points(input, length); } simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( @@ -34982,86 +49864,21 @@ simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( simdutf_warn_unused result implementation::base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - } - return r; + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); } simdutf_warn_unused full_result implementation::base64_to_binary_details( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - full_result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; - } - } - return r; + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); } simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( @@ -35072,86 +49889,21 @@ simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( simdutf_warn_unused result implementation::base64_to_binary( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - auto equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - } - return r; + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); } simdutf_warn_unused full_result implementation::base64_to_binary_details( const char16_t *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept { - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - full_result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; - } - } - return r; + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); } simdutf_warn_unused size_t implementation::base64_length_from_binary( @@ -35162,148 +49914,148 @@ simdutf_warn_unused size_t implementation::base64_length_from_binary( size_t implementation::binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept { - return scalar::base64::tail_encode_base64(output, input, length, options); + if (options & base64_url) { + return encode_base64(output, input, length, options); + } else { + return encode_base64(output, input, length, options); + } } -} // namespace rvv +} // namespace lsx } // namespace simdutf -/* begin file src/simdutf/rvv/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_RVV -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -/* end file src/simdutf/rvv/end.h */ -/* end file src/rvv/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_WESTMERE -/* begin file src/westmere/implementation.cpp */ -/* begin file src/simdutf/westmere/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "westmere" -// #define SIMDUTF_IMPLEMENTATION westmere - -#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE -// nothing needed. -#else -SIMDUTF_TARGET_WESTMERE +/* begin file src/simdutf/lsx/end.h */ +/* end file src/simdutf/lsx/end.h */ +/* end file src/lsx/implementation.cpp */ #endif -/* end file src/simdutf/westmere/begin.h */ +#if SIMDUTF_IMPLEMENTATION_LASX +/* begin file src/lasx/implementation.cpp */ +/* begin file src/simdutf/lasx/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "lasx" +// #define SIMDUTF_IMPLEMENTATION lasx +/* end file src/simdutf/lasx/begin.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { -#ifndef SIMDUTF_WESTMERE_H - #error "westmere.h must be included" +#ifndef SIMDUTF_LASX_H + #error "lasx.h must be included" #endif using namespace simd; +// convert vmskltz/vmskgez/vmsknz to +// simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index +const uint8_t lasx_1_2_utf8_bytes_mask[] = { + 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, + 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, + 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, + 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, + 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, + 101, 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99, + 102, 103, 114, 115, 118, 119, 40, 41, 44, 45, 56, 57, 60, 61, 104, + 105, 108, 109, 120, 121, 124, 125, 42, 43, 46, 47, 58, 59, 62, 63, + 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148, + 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147, + 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152, + 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143, + 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164, + 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163, + 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168, + 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, + 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, + 255}; + +simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { + return __lsx_vshuf4i_b(vec, 0b10110001); +} +simdutf_really_inline __m256i lasx_swap_bytes(__m256i vec) { + return __lasx_xvshuf4i_b(vec, 0b10110001); +} + simdutf_really_inline bool is_ascii(const simd8x64 &input) { - return input.reduce_or().is_ascii(); + return input.is_ascii(); } simdutf_unused simdutf_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = - prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0 - simd8 is_third_byte = - prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = - prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction - // will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > - int8_t(0); + simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller + // is using ^ as well. This will work fine because we only have to report + // errors for cases with 0-1 lead bytes. Multiple lead bytes implies 2 + // overlapping multibyte characters, and if that happens, there is guaranteed + // to be at least *one* lead byte that is part of only 1 other multibyte + // character. The error will be detected there. + return is_second_byte ^ is_third_byte ^ is_fourth_byte; } simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = - prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80 - simd8 is_fourth_byte = - prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80 - return simd8(is_third_byte | is_fourth_byte); + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + return is_third_byte ^ is_fourth_byte; } -/* begin file src/westmere/internal/loader.cpp */ -namespace internal { -namespace westmere { - -/* begin file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */ -/* - * reads a vector of uint16 values - * bits after 11th are ignored - * first 11 bits are encoded into utf8 - * !important! utf8_output must have at least 16 writable bytes - */ - -inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, - const __m128i one_byte_bytemask, - const uint16_t one_byte_bitmask) { - // 0b1100_0000_1000_0000 - const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080); - // 0b0001_1111_0000_0000 - const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); - // 0b0000_0000_0011_1111 - const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); - - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - - // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = _mm_slli_epi16(v_u16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = _mm_and_si128(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = _mm_and_si128(v_u16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = _mm_or_si128(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = _mm_or_si128(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m128i utf8_unpacked = _mm_blendv_epi8(t4, v_u16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a - // - LSB) - const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a - const uint16_t m1 = static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 - const uint8_t m2 = static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); - - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); +// common functions for utf8 conversions +simdutf_really_inline __m128i convert_utf8_3_byte_to_utf16(__m128i in) { + // Low half contains 10bbbbbb|10cccccc + // High half contains 1110aaaa|1110aaaa + const v16u8 sh = {2, 1, 5, 4, 8, 7, 11, 10, 0, 0, 3, 3, 6, 6, 9, 9}; + const v8u16 v0fff = {0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff}; + + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, (__m128i)sh); + // 1110aaaa => aaaa0000 + __m128i perm_high = __lsx_vslli_b(__lsx_vbsrl_v(perm, 8), 4); + // 10bbbbbb 10cccccc => 0010bbbb bbcccccc + __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), /* perm >> 2*/ + perm, __lsx_vrepli_h(0x3f) /* 0x003f */); + // 0010bbbb bbcccccc => aaaabbbb bbcccccc + composed = __lsx_vbitsel_v(perm_high, composed, (__m128i)v0fff); - // 6. adjust pointers - utf8_output += row[0]; + return composed; } -inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, - const __m128i v_0000, - const __m128i v_ff80) { - // no bits set above 7th bit - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(v_u16, v_ff80), v_0000); - const uint16_t one_byte_bitmask = - static_cast(_mm_movemask_epi8(one_byte_bytemask)); - - write_v_u16_11bits_to_utf8(v_u16, utf8_output, one_byte_bytemask, - one_byte_bitmask); +simdutf_really_inline __m128i convert_utf8_2_byte_to_utf16(__m128i in) { + // 10bbbbb 110aaaaa => 00bbbbb 000aaaaa + __m128i composed = __lsx_vand_v(in, __lsx_vldi(0x3f)); + // 00bbbbbb 000aaaaa => 00000aaa aabbbbbb + composed = __lsx_vbitsel_v( + __lsx_vsrli_h(__lsx_vslli_h(composed, 8), 2), /* (aaaaa << 8) >> 2 */ + __lsx_vsrli_h(composed, 8), /* bbbbbb >> 8 */ + __lsx_vrepli_h(0x3f)); /* 0x003f */ + return composed; } -/* end file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */ -} // namespace westmere -} // namespace internal -/* end file src/westmere/internal/loader.cpp */ +simdutf_really_inline __m128i +convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { + // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. + // This is a relatively easy scenario + // we process SIX (6) input code-code units. The max length in bytes of six + // code code units spanning between 1 and 2 bytes each is 12 bytes. + __m128i sh = + __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx]), + 0); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000000 00bbbbbb + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_h(0x7f)); // 6 or 7 bits + // 1 byte: 00000000 00000000 + // 2 byte: 00000aaa aa000000 + __m128i v1f00 = __lsx_vldi(-2785); // -2785(13bit) => 151f + __m128i composed = __lsx_vsrli_h(__lsx_vand_v(perm, v1f00), 2); // 5 bits + // Combine with a shift right accumulate + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 00000aaa aabbbbbb + composed = __lsx_vadd_h(ascii, composed); + return composed; +} -/* begin file src/westmere/sse_validate_utf16.cpp */ +/* begin file src/lasx/lasx_validate_utf16.cpp */ /* In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. @@ -35324,7 +50076,7 @@ inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - there must not be sole low surrogate nor high surrogate - We are going to build three bitmasks based on the 3rd nibble: + We're going to build three bitmasks based on the 3rd nibble: - V = valid word, - L = low surrogate (0xd800 .. 0xdbff) - H = high surrogate (0xdc00 .. 0xdfff) @@ -35351,7 +50103,7 @@ inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, - nullptr if an error was detected. */ template -const char16_t *sse_validate_utf16(const char16_t *input, size_t size) { +const char16_t *lasx_validate_utf16(const char16_t *input, size_t size) { const char16_t *end = input + size; const auto v_d8 = simd8::splat(0xd8); @@ -35359,29 +50111,26 @@ const char16_t *sse_validate_utf16(const char16_t *input, size_t size) { const auto v_fc = simd8::splat(0xfc); const auto v_dc = simd8::splat(0xdc); - while (input + simd16::SIZE * 2 < end) { + while (input + simd16::ELEMENTS * 2 < end) { // 0. Load data: since the validation takes into account only higher // byte of each word, we compress the two vectors into one which // consists only the higher bytes. auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); + auto in1 = simd16(input + simd16::ELEMENTS); + if (big_endian) { in0 = in0.swap_bytes(); in1 = in1.swap_bytes(); } - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - - const auto in = simd16::pack(t0, t1); + const auto in = simd8(__lasx_xvpermi_d( + __lasx_xvssrlni_bu_h(in1.value, in0.value, 8), 0b11011000)); // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; + const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); + if (surrogates_bitmask == 0x0) { + input += simd16::ELEMENTS * 2; } else { // 2. We have some surrogates that have to be distinguished: // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) @@ -35391,36 +50140,35 @@ const char16_t *sse_validate_utf16(const char16_t *input, size_t size) { // V - non-surrogate code units // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); + const uint32_t V = ~surrogates_bitmask; // H - word-mask for high surrogates: the six highest bits are 0b1101'11 const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); + const uint32_t H = vH.to_bitmask(); // L - word mask for low surrogates // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); + const uint32_t L = ~H & surrogates_bitmask; - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. + const uint32_t a = + L & (H >> 1); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint32_t b = + a << 1; // Just mark that the opposite fact is hold, + // thanks to that we have only two masks for valid case. + const uint32_t c = V | a | b; // Combine all the masks into the final one. - if (c == 0xffff) { + if (c == 0xffffffff) { // The whole input register contains valid UTF-16, i.e., // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next + input += simd16::ELEMENTS * 2; + } else if (c == 0x7fffffff) { + // The 31 lower code units of the input register contains valid UTF-16. + // The 31 word may be either a low or high surrogate. It the next // iteration we 1) check if the low surrogate is followed by a high // one, 2) reject sole high surrogate. - input += 15; + input += simd16::ELEMENTS * 2 - 1; } else { return nullptr; } @@ -35431,8 +50179,8 @@ const char16_t *sse_validate_utf16(const char16_t *input, size_t size) { } template -const result sse_validate_utf16_with_errors(const char16_t *input, - size_t size) { +const result lasx_validate_utf16_with_errors(const char16_t *input, + size_t size) { if (simdutf_unlikely(size == 0)) { return result(error_code::SUCCESS, 0); } @@ -35444,30 +50192,25 @@ const result sse_validate_utf16_with_errors(const char16_t *input, const auto v_fc = simd8::splat(0xfc); const auto v_dc = simd8::splat(0xdc); - while (input + simd16::SIZE * 2 < end) { + while (input + simd16::ELEMENTS * 2 < end) { // 0. Load data: since the validation takes into account only higher // byte of each word, we compress the two vectors into one which // consists only the higher bytes. auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); + auto in1 = simd16(input + simd16::ELEMENTS); if (big_endian) { in0 = in0.swap_bytes(); in1 = in1.swap_bytes(); } - - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - - const auto in = simd16::pack(t0, t1); + const auto in = simd8(__lasx_xvpermi_d( + __lasx_xvssrlni_bu_h(in1.value, in0.value, 8), 0b11011000)); // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; + const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); + if (surrogates_bitmask == 0x0) { + input += simd16::ELEMENTS * 2; } else { // 2. We have some surrogates that have to be distinguished: // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) @@ -35477,36 +50220,35 @@ const result sse_validate_utf16_with_errors(const char16_t *input, // V - non-surrogate code units // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); + const uint32_t V = ~surrogates_bitmask; // H - word-mask for high surrogates: the six highest bits are 0b1101'11 const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); + const uint32_t H = vH.to_bitmask(); // L - word mask for low surrogates // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); + const uint32_t L = ~H & surrogates_bitmask; - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. + const uint32_t a = + L & (H >> 1); // A low surrogate must be followed by high one. + // (A low surrogate placed in the 7th register's word + // is an exception we handle.) + const uint32_t b = + a << 1; // Just mark that the opposite fact is hold, + // thanks to that we have only two masks for valid case. + const uint32_t c = V | a | b; // Combine all the masks into the final one. - if (c == 0xffff) { + if (c == 0xffffffff) { // The whole input register contains valid UTF-16, i.e., // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next + input += simd16::ELEMENTS * 2; + } else if (c == 0x7fffffff) { + // The 31 lower code units of the input register contains valid UTF-16. + // The 31 word may be either a low or high surrogate. It the next // iteration we 1) check if the low surrogate is followed by a high // one, 2) reject sole high surrogate. - input += 15; + input += simd16::ELEMENTS * 2 - 1; } else { return result(error_code::SURROGATE, input - start); } @@ -35515,200 +50257,289 @@ const result sse_validate_utf16_with_errors(const char16_t *input, return result(error_code::SUCCESS, input - start); } -/* end file src/westmere/sse_validate_utf16.cpp */ -/* begin file src/westmere/sse_validate_utf32le.cpp */ -/* Returns: - - pointer to the last unprocessed character (a scalar fallback should check - the rest); - - nullptr if an error was detected. -*/ -const char32_t *sse_validate_utf32le(const char32_t *input, size_t size) { +/* end file src/lasx/lasx_validate_utf16.cpp */ +/* begin file src/lasx/lasx_validate_utf32le.cpp */ + +const char32_t *lasx_validate_utf32le(const char32_t *input, size_t size) { const char32_t *end = input + size; - const __m128i standardmax = _mm_set1_epi32(0x10ffff); - const __m128i offset = _mm_set1_epi32(0xffff2000); - const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff); - __m128i currentmax = _mm_setzero_si128(); - __m128i currentoffsetmax = _mm_setzero_si128(); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)input & 0x1F) && input < end) { + uint32_t word = *input++; + if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { + return nullptr; + } + } - while (input + 4 < end) { - const __m128i in = _mm_loadu_si128((__m128i *)input); - currentmax = _mm_max_epu32(in, currentmax); + __m256i offset = __lasx_xvreplgr2vr_w(uint32_t(0xffff2000)); + __m256i standardoffsetmax = __lasx_xvreplgr2vr_w(uint32_t(0xfffff7ff)); + __m256i standardmax = __lasx_xvldi(-2288); /*0x10ffff*/ + __m256i currentmax = __lasx_xvldi(0x0); + __m256i currentoffsetmax = __lasx_xvldi(0x0); + + while (input + 8 < end) { + __m256i in = __lasx_xvld(reinterpret_cast(input), 0); + currentmax = __lasx_xvmax_wu(in, currentmax); + // 0xD8__ + 0x2000 = 0xF8__ => 0xF8__ > 0xF7FF currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in, offset), currentoffsetmax); - input += 4; + __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); + input += 8; } - __m128i is_zero = - _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax); - if (_mm_test_all_zeros(is_zero, is_zero) == 0) { + __m256i is_zero = + __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); + if (__lasx_xbnz_v(is_zero)) { return nullptr; } - is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (_mm_test_all_zeros(is_zero, is_zero) == 0) { + is_zero = __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lasx_xbnz_v(is_zero)) { return nullptr; } - return input; } -const result sse_validate_utf32le_with_errors(const char32_t *input, - size_t size) { +const result lasx_validate_utf32le_with_errors(const char32_t *input, + size_t size) { const char32_t *start = input; const char32_t *end = input + size; - const __m128i standardmax = _mm_set1_epi32(0x10ffff); - const __m128i offset = _mm_set1_epi32(0xffff2000); - const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff); - __m128i currentmax = _mm_setzero_si128(); - __m128i currentoffsetmax = _mm_setzero_si128(); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)input & 0x1F) && input < end) { + uint32_t word = *input; + if (word > 0x10FFFF) { + return result(error_code::TOO_LARGE, input - start); + } + if (word >= 0xD800 && word <= 0xDFFF) { + return result(error_code::SURROGATE, input - start); + } + input++; + } - while (input + 4 < end) { - const __m128i in = _mm_loadu_si128((__m128i *)input); - currentmax = _mm_max_epu32(in, currentmax); + __m256i offset = __lasx_xvreplgr2vr_w(uint32_t(0xffff2000)); + __m256i standardoffsetmax = __lasx_xvreplgr2vr_w(uint32_t(0xfffff7ff)); + __m256i standardmax = __lasx_xvldi(-2288); /*0x10ffff*/ + __m256i currentmax = __lasx_xvldi(0x0); + __m256i currentoffsetmax = __lasx_xvldi(0x0); + + while (input + 8 < end) { + __m256i in = __lasx_xvld(reinterpret_cast(input), 0); + currentmax = __lasx_xvmax_wu(in, currentmax); currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in, offset), currentoffsetmax); + __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); - __m128i is_zero = - _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax); - if (_mm_test_all_zeros(is_zero, is_zero) == 0) { + __m256i is_zero = + __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); + if (__lasx_xbnz_v(is_zero)) { return result(error_code::TOO_LARGE, input - start); } - - is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (_mm_test_all_zeros(is_zero, is_zero) == 0) { + is_zero = + __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + if (__lasx_xbnz_v(is_zero)) { return result(error_code::SURROGATE, input - start); } - input += 4; + input += 8; } return result(error_code::SUCCESS, input - start); } -/* end file src/westmere/sse_validate_utf32le.cpp */ - -/* begin file src/westmere/sse_convert_latin1_to_utf8.cpp */ -std::pair -sse_convert_latin1_to_utf8(const char *latin_input, - const size_t latin_input_length, char *utf8_output) { - const char *end = latin_input + latin_input_length; - - const __m128i v_0000 = _mm_setzero_si128(); - // 0b1000_0000 - const __m128i v_80 = _mm_set1_epi8((uint8_t)0x80); - // 0b1111_1111_1000_0000 - const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); - - const __m128i latin_1_half_into_u16_byte_mask = - _mm_setr_epi8(0, '\x80', 1, '\x80', 2, '\x80', 3, '\x80', 4, '\x80', 5, - '\x80', 6, '\x80', 7, '\x80'); +/* end file src/lasx/lasx_validate_utf32le.cpp */ - const __m128i latin_2_half_into_u16_byte_mask = - _mm_setr_epi8(8, '\x80', 9, '\x80', 10, '\x80', 11, '\x80', 12, '\x80', - 13, '\x80', 14, '\x80', 15, '\x80'); +/* begin file src/lasx/lasx_convert_latin1_to_utf8.cpp */ +/* + Returns a pair: the first unprocessed byte from buf and utf8_output + A scalar routing should carry on the conversion of the tail. +*/ - // each latin1 takes 1-2 utf8 bytes - // slow path writes useful 8-15 bytes twice (eagerly writes 16 bytes and then - // adjust the pointer) so the last write can exceed the utf8_output size by - // 8-1 bytes by reserving 8 extra input bytes, we expect the output to have - // 8-16 bytes free - while (end - latin_input >= 16 + 8) { - // Load 16 Latin1 characters (16 bytes) into a 128-bit register - __m128i v_latin = _mm_loadu_si128((__m128i *)latin_input); +std::pair +lasx_convert_latin1_to_utf8(const char *latin1_input, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); + const size_t safety_margin = 12; + const char *end = latin1_input + len - safety_margin; - if (_mm_testz_si128(v_latin, v_80)) { // ASCII fast path!!!! - _mm_storeu_si128((__m128i *)utf8_output, v_latin); - latin_input += 16; + // We always write 16 bytes, of which more than the first 8 bytes + // are valid. A safety margin of 8 is more than sufficient. + while (latin1_input + 16 <= end) { + __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); + uint32_t ascii_mask = __lsx_vpickve2gr_wu(__lsx_vmskgez_b(in8), 0); + if (ascii_mask == 0xFFFF) { + __lsx_vst(in8, utf8_output, 0); utf8_output += 16; + latin1_input += 16; continue; } + // We just fallback on UTF-16 code. This could be optimized/simplified + // further. + __m256i in16 = __lasx_vext2xv_hu_bu(____m256i(in8)); + // 1. prepare 2-byte values + // input 8-bit word : [aabb|bbbb] x 16 + // expected output : [1100|00aa|10bb|bbbb] x 16 + // t0 = [0000|00aa|bbbb|bb00] + __m256i t0 = __lasx_xvslli_h(in16, 2); + // t1 = [0000|00aa|0000|0000] + __m256i t1 = __lasx_xvand_v(t0, __lasx_xvldi(-2785)); + // t3 = [0000|00aa|00bb|bbbb] + __m256i t2 = __lasx_xvbitsel_v(t1, in16, __lasx_xvrepli_h(0x3f)); + // t4 = [1100|00aa|10bb|bbbb] + __m256i t3 = __lasx_xvor_v(t2, __lasx_xvreplgr2vr_h(uint16_t(0xc080))); + // merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = __lasx_xvsle_hu(in16, __lasx_xvrepli_h(0x7F)); + __m256i utf8_unpacked = __lasx_xvbitsel_v(t3, in16, one_byte_bytemask); + + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[(ascii_mask & 0xFF)]][0]; + __m128i shuffle0 = __lsx_vld(row0 + 1, 0); + __m128i utf8_unpacked_lo = lasx_extracti128_lo(utf8_unpacked); + __m128i utf8_packed0 = + __lsx_vshuf_b(utf8_unpacked_lo, utf8_unpacked_lo, shuffle0); + __lsx_vst(utf8_packed0, utf8_output, 0); + utf8_output += row0[0]; + + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[(ascii_mask >> 8)]][0]; + __m128i shuffle1 = __lsx_vld(row1 + 1, 0); + __m128i utf8_unpacked_hi = lasx_extracti128_hi(utf8_unpacked); + __m128i utf8_packed1 = + __lsx_vshuf_b(utf8_unpacked_hi, utf8_unpacked_hi, shuffle1); + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; - // assuming a/b are bytes and A/B are uint16 of the same value - // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA - __m128i v_u16_latin_1_half = - _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask); - // aaaa_aaaa_bbbb_bbbb -> BBBB_BBBB - __m128i v_u16_latin_2_half = - _mm_shuffle_epi8(v_latin, latin_2_half_into_u16_byte_mask); + latin1_input += 16; + } // while - internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_1_half, - utf8_output, v_0000, v_ff80); - internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_2_half, - utf8_output, v_0000, v_ff80); - latin_input += 16; + return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); +} +/* end file src/lasx/lasx_convert_latin1_to_utf8.cpp */ +/* begin file src/lasx/lasx_convert_latin1_to_utf16.cpp */ +std::pair +lasx_convert_latin1_to_utf16le(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + *utf16_output++ = uint8_t(*buf) & 0xFF; + buf++; } - if (end - latin_input >= 16) { - // Load 16 Latin1 characters (16 bytes) into a 128-bit register - __m128i v_latin = _mm_loadu_si128((__m128i *)latin_input); + while (buf + 32 <= end) { + __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); - if (_mm_testz_si128(v_latin, v_80)) { // ASCII fast path!!!! - _mm_storeu_si128((__m128i *)utf8_output, v_latin); - latin_input += 16; - utf8_output += 16; - } else { - // assuming a/b are bytes and A/B are uint16 of the same value - // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA - __m128i v_u16_latin_1_half = - _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask); - internal::westmere::write_v_u16_11bits_to_utf8( - v_u16_latin_1_half, utf8_output, v_0000, v_ff80); - latin_input += 8; - } + __m256i inlow = __lasx_vext2xv_hu_bu(in8); + __m256i in8_high = __lasx_xvpermi_q(in8, in8, 0b00000001); + __m256i inhigh = __lasx_vext2xv_hu_bu(in8_high); + __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); + __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); + + utf16_output += 32; + buf += 32; } - return std::make_pair(latin_input, utf8_output); + if (buf + 16 <= end) { + __m128i zero = __lsx_vldi(0); + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i inlow = __lsx_vilvl_b(zero, in8); + __m128i inhigh = __lsx_vilvh_b(zero, in8); + __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); + __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); + + utf16_output += 16; + buf += 16; + } + return std::make_pair(buf, utf16_output); } -/* end file src/westmere/sse_convert_latin1_to_utf8.cpp */ -/* begin file src/westmere/sse_convert_latin1_to_utf16.cpp */ -template + std::pair -sse_convert_latin1_to_utf16(const char *latin1_input, size_t len, - char16_t *utf16_output) { - size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - for (size_t i = 0; i < rounded_len; i += 16) { - // Load 16 Latin1 characters into a 128-bit register - __m128i in = - _mm_loadu_si128(reinterpret_cast(&latin1_input[i])); - __m128i out1 = big_endian ? _mm_unpacklo_epi8(_mm_setzero_si128(), in) - : _mm_unpacklo_epi8(in, _mm_setzero_si128()); - __m128i out2 = big_endian ? _mm_unpackhi_epi8(_mm_setzero_si128(), in) - : _mm_unpackhi_epi8(in, _mm_setzero_si128()); - // Zero extend each Latin1 character to 16-bit integers and store the - // results back to memory - _mm_storeu_si128(reinterpret_cast<__m128i *>(&utf16_output[i]), out1); - _mm_storeu_si128(reinterpret_cast<__m128i *>(&utf16_output[i + 8]), out2); +lasx_convert_latin1_to_utf16be(const char *buf, size_t len, + char16_t *utf16_output) { + const char *end = buf + len; + + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + *utf16_output++ = (uint16_t(*buf++) << 8); } - // return pointers pointing to where we left off - return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len); + + __m256i zero = __lasx_xvldi(0); + while (buf + 32 <= end) { + __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); + + __m256i in8_shuf = __lasx_xvpermi_d(in8, 0b11011000); + + __m256i inlow = __lasx_xvilvl_b(in8_shuf, zero); + __m256i inhigh = __lasx_xvilvh_b(in8_shuf, zero); + __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); + __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); + utf16_output += 32; + buf += 32; + } + + if (buf + 16 <= end) { + __m128i zero_128 = __lsx_vldi(0); + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i inlow = __lsx_vilvl_b(in8, zero_128); + __m128i inhigh = __lsx_vilvh_b(in8, zero_128); + __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); + __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); + utf16_output += 16; + buf += 16; + } + + return std::make_pair(buf, utf16_output); } -/* end file src/westmere/sse_convert_latin1_to_utf16.cpp */ -/* begin file src/westmere/sse_convert_latin1_to_utf32.cpp */ +/* end file src/lasx/lasx_convert_latin1_to_utf16.cpp */ +/* begin file src/lasx/lasx_convert_latin1_to_utf32.cpp */ std::pair -sse_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { +lasx_convert_latin1_to_utf32(const char *buf, size_t len, + char32_t *utf32_output) { const char *end = buf + len; - while (end - buf >= 16) { - // Load 16 Latin1 characters (16 bytes) into a 128-bit register - __m128i in = _mm_loadu_si128((__m128i *)buf); + // LASX requires 32-byte alignment, otherwise performance will be degraded + while (((uint64_t)utf32_output & 0x1F) && buf < end) { + *utf32_output++ = ((uint32_t)*buf) & 0xFF; + buf++; + } - // Shift input to process next 4 bytes - __m128i in_shifted1 = _mm_srli_si128(in, 4); - __m128i in_shifted2 = _mm_srli_si128(in, 8); - __m128i in_shifted3 = _mm_srli_si128(in, 12); + while (buf + 32 <= end) { + __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); - // expand 8-bit to 32-bit unit - __m128i out1 = _mm_cvtepu8_epi32(in); - __m128i out2 = _mm_cvtepu8_epi32(in_shifted1); - __m128i out3 = _mm_cvtepu8_epi32(in_shifted2); - __m128i out4 = _mm_cvtepu8_epi32(in_shifted3); + __m256i in32_0 = __lasx_vext2xv_wu_bu(in8); + __lasx_xvst(in32_0, reinterpret_cast(utf32_output), 0); - _mm_storeu_si128((__m128i *)utf32_output, out1); - _mm_storeu_si128((__m128i *)(utf32_output + 4), out2); - _mm_storeu_si128((__m128i *)(utf32_output + 8), out3); - _mm_storeu_si128((__m128i *)(utf32_output + 12), out4); + __m256i in8_1 = __lasx_xvpermi_d(in8, 0b00000001); + __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); + __lasx_xvst(in32_1, reinterpret_cast(utf32_output), 32); + + __m256i in8_2 = __lasx_xvpermi_d(in8, 0b00000010); + __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); + __lasx_xvst(in32_2, reinterpret_cast(utf32_output), 64); + + __m256i in8_3 = __lasx_xvpermi_d(in8, 0b00000011); + __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); + __lasx_xvst(in32_3, reinterpret_cast(utf32_output), 96); + + utf32_output += 32; + buf += 32; + } + + if (buf + 16 <= end) { + __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); + + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, in8); + __m128i in16high = __lsx_vilvh_b(zero, in8); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + + __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); + __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); + __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); + __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); utf32_output += 16; buf += 16; @@ -35716,15 +50547,13 @@ sse_convert_latin1_to_utf32(const char *buf, size_t len, return std::make_pair(buf, utf32_output); } -/* end file src/westmere/sse_convert_latin1_to_utf32.cpp */ - -/* begin file src/westmere/sse_convert_utf8_to_utf16.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" +/* end file src/lasx/lasx_convert_latin1_to_utf32.cpp */ -// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the +/* begin file src/lasx/lasx_convert_utf8_to_utf16.cpp */ +// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask // are accessed. -// It returns how many bytes were consumed (up to 12). +// It returns how many bytes were consumed (up to 16, usually 12). template size_t convert_masked_utf8_to_utf16(const char *input, uint64_t utf8_end_of_code_point_mask, @@ -35733,204 +50562,304 @@ size_t convert_masked_utf8_to_utf16(const char *input, // Why 12 input bytes and not 16? Because we are concerned with the size of // the lookup tables. Also 12 is nicely divisible by two and three. // + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; // // Optimization note: our main path below is load-latency dependent. Thus it // is maybe beneficial to have fast paths that depend on branch prediction but // have less latency. This results in more instructions but, potentially, also // higher speeds. - // + // We first try a few fast paths. - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m128i in = _mm_loadu_si128((__m128i *)input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - // Note: using 16 bytes is unsafe, see issue_ossfuzz_71218 - __m128i ascii_first = _mm_cvtepu8_epi16(in); - __m128i ascii_second = _mm_cvtepu8_epi16(_mm_srli_si128(in, 8)); - if (big_endian) { - ascii_first = _mm_shuffle_epi8(ascii_first, swap); - ascii_second = _mm_shuffle_epi8(ascii_second, swap); + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { + __m128i zero = __lsx_vldi(0); + if (match_system(big_endian)) { + __lsx_vst(__lsx_vilvl_b(zero, in), + reinterpret_cast(utf16_output), 0); + __lsx_vst(__lsx_vilvh_b(zero, in), + reinterpret_cast(utf16_output), 16); + } else { + __lsx_vst(__lsx_vilvl_b(in, zero), + reinterpret_cast(utf16_output), 0); + __lsx_vst(__lsx_vilvh_b(in, zero), + reinterpret_cast(utf16_output), 16); } - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output), ascii_first); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + 8), - ascii_second); - utf16_output += 12; // We wrote 12 16-bit characters. - return 12; // We consumed 12 bytes. + utf16_output += 16; // We wrote 16 16-bit characters. + return 16; // We consumed 16 bytes. } - if (((utf8_end_of_code_point_mask & 0xFFFF) == 0xaaaa)) { - // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte - // UTF-16 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - if (big_endian) - composed = _mm_shuffle_epi8(composed, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed); - utf16_output += 8; // We wrote 16 bytes, 8 code points. - return 16; + + // 3 byte sequences are the next most common, as seen in CJK, which has long + // sequences of these. + if (input_utf8_end_of_code_point_mask == 0x924) { + // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte + // UTF-16 code units. + __m128i composed = convert_utf8_3_byte_to_utf16(in); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 4; // We wrote 4 16-bit characters. + return 12; // We consumed 12 bytes. } - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte - // UTF-16 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - __m128i composed_repacked = _mm_packus_epi32(composed, composed); - if (big_endian) - composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); - utf16_output += 4; - return 12; + + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xAAAA) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte + // UTF-16 code units. + __m128i composed = convert_utf8_2_byte_to_utf16(in); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 8; // We wrote 6 16-bit characters. + return 16; // We consumed 12 bytes. } - /// We do not have a fast path available, so we fallback. - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; + const __m128i zero = __lsx_vldi(0); if (idx < 64) { // SIX (6) input code-code units - // this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small - // lookup table. - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - if (big_endian) - composed = _mm_shuffle_epi8(composed, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed); - utf16_output += 6; // We wrote 12 bytes, 6 code points. + // Convert to UTF-16 + __m128i composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + // Store + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 6; // We wrote 6 16-bit characters. + return consumed; } else if (idx < 145) { // FOUR (4) input code-code units - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - __m128i composed_repacked = _mm_packus_epi32(composed, composed); - if (big_endian) - composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); - utf16_output += 4; + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // XXX: depending on the system scalar instructions might be faster. + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // 1 byte: 00000000 0ccccccc + // 2 byte: xx0bbbbb x0cccccc + // 3 byte: xxbbbbbb x0cccccc + __m128i lowperm = __lsx_vpickev_h(perm, perm); + // 1 byte: 00000000 00000000 + // 2 byte: 00000000 00000000 + // 3 byte: 00000000 1110aaaa + __m128i highperm = __lsx_vpickod_h(perm, perm); + // 3 byte: aaaa0000 00000000 + highperm = __lsx_vslli_h(highperm, 12); + // ASCII + // 1 byte: 00000000 0ccccccc + // 2+byte: 00000000 00cccccc + __m128i ascii = __lsx_vand_v(lowperm, __lsx_vrepli_h(0x7f)); + // 1 byte: 00000000 00000000 + // 2 byte: xx0bbbbb 00000000 + // 3 byte: xxbbbbbb 00000000 + __m128i middlebyte = __lsx_vand_v(lowperm, __lsx_vldi(-2561) /*0xFF00*/); + // 1 byte: 00000000 0ccccccc + // 2 byte: 0010bbbb bbcccccc + // 3 byte: 0010bbbb bbcccccc + __m128i composed = __lsx_vor_v(__lsx_vsrli_h(middlebyte, 2), ascii); + + __m128i v0fff = __lsx_vreplgr2vr_h(uint16_t(0xfff)); + // aaaabbbb bbcccccc + composed = __lsx_vbitsel_v(highperm, composed, v0fff); + + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 4; // We wrote 4 16-bit codepoints + return consumed; } else if (idx < 209) { - // TWO (2) input code-code units - ////////////// - // There might be garbage inputs where a leading byte mascarades as a - // four-byte leading byte (by being followed by 3 continuation byte), but is - // not greater than 0xf0. This could trigger a buffer overflow if we only - // counted leading bytes of the form 0xf0 as generating surrogate pairs, - // without further UTF-8 validation. Thus we must be careful to ensure that - // only leading bytes at least as large as 0xf0 generate surrogate pairs. We - // do as at the cost of an extra mask. - ///////////// - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); - const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); - // correct for spurious high bit - const __m128i correct = - _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); - middlehighbyte = _mm_xor_si128(correct, middlehighbyte); - const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); - // We deliberately carry the leading four bits in highbyte if they are - // present, we remove them later when computing hightenbits. - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); - // When we need to generate a surrogate pair (leading byte > 0xF0), then - // the corresponding 32-bit value in 'composed' will be greater than - // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the - // location of the surrogate pairs. - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), - _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); - const __m128i composedminus = - _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); - const __m128i lowtenbits = - _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); - // Notice the 0x3ff mask: - const __m128i hightenbits = - _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); - const __m128i lowtenbitsadd = - _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); - const __m128i hightenbitsadd = - _mm_add_epi32(hightenbits, _mm_set1_epi32(0xD800)); - const __m128i lowtenbitsaddshifted = _mm_slli_epi32(lowtenbitsadd, 16); - __m128i surrogates = _mm_or_si128(hightenbitsadd, lowtenbitsaddshifted); - uint32_t basic_buffer[4]; - uint32_t basic_buffer_swap[4]; - if (big_endian) { - _mm_storeu_si128((__m128i *)basic_buffer_swap, - _mm_shuffle_epi8(composed, swap)); - surrogates = _mm_shuffle_epi8(surrogates, swap); + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but + // it is easier when we can assume they are all pairs. This version does + // not use the LUT, but 4 byte sequences are less common and the overhead + // of the extra memory access is less important than the early branch + // overhead in shorter sequences. + + // Swap byte pairs + // 10dddddd 10cccccc|10bbbbbb 11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + __m128i swap = lsx_swap_bytes(in); + // Shift left 2 bits + // cccccc00 dddddd00 xxxxxxxx bbbbbb00 + __m128i shift = __lsx_vslli_b(swap, 2); + // Create a magic number containing the low 2 bits of the trail surrogate + // and all the corrections needed to create the pair. UTF-8 4b prefix = + // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) + // surrogate high = +0x0000|0xD800 + // surrogate low = +0xDC00|0x0000 + // ------------------------------- + // = +0xDC00|0xE7C0 + __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xDC00E7C0)); + // Generate unadjusted trail surrogate minus lowest 2 bits + // vec(0000FF00) = __lsx_vldi(-1758) + // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 + __m128i trail = + __lsx_vbitsel_v(shift, swap, __lsx_vldi(-1758 /*0000FF00*/)); + // Insert low 2 bits of trail surrogate to magic number for later + // 11011100 00000000 11100111 110000cc + __m128i magic_with_low_2 = __lsx_vor_v(__lsx_vsrli_w(shift, 30), magic); + + // Generate lead surrogate + // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx + // 000000cc ccdddddd|xxxxxxxx xxxxxxxx + __m128i lead = __lsx_vbitsel_v( + __lsx_vsrli_h(__lsx_vand_v(shift, __lsx_vldi(0x3F)), 4), swap, + __lsx_vrepli_h(0x3f /* 0x003f*/)); + + // Blend pairs + // __lsx_vldi(-1741) => vec(0x0000FFFF) + // 000000cc ccdddddd|11110aaa bbbbbb00 + __m128i blend = + __lsx_vbitsel_v(lead, trail, __lsx_vldi(-1741) /* (0x0000FFFF)*4 */); + + // Add magic number to finish the result + // 110111CC CCDDDDDD|110110AA BBBBBBCC + __m128i composed = __lsx_vadd_h(blend, magic_with_low_2); + // Byte swap if necessary + if (!match_system(big_endian)) { + composed = lsx_swap_bytes(composed); + } + __lsx_vst(composed, reinterpret_cast(utf16_output), 0); + utf16_output += 6; // We 3 32-bit surrogate pairs. + return 12; // We consumed 12 bytes. } - _mm_storeu_si128((__m128i *)basic_buffer, composed); - uint32_t surrogate_buffer[4]; - _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); + // 3 1-4 byte sequences + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 3 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // added to fix issue https://github.com/simdutf/simdutf/issues/514 + // We only want to write 2 * 16-bit code units when that is actually what we + // have. Unfortunately, we cannot trust the input. So it is possible to get + // 0xff as an input byte and it should not result in a surrogate pair. We + // need to check for that. + uint32_t permbuffer[4]; + __lsx_vst(perm, permbuffer, 0); + // Mask the low and middle bytes + // 00000000 00000000 00000000 0ddddddd + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7f)); + // Because the surrogates need more work, the high surrogate is computed + // first. + __m128i middlehigh = __lsx_vslli_w(perm, 2); + // 00000000 00000000 00cccccc 00000000 + __m128i middlebyte = __lsx_vand_v(perm, __lsx_vldi(-3777) /* 0x00003F00 */); + // Start assembling the sequence. Since the 4th byte is in the same position + // as it would be in a surrogate and there is no dependency, shift left + // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: + // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx + __m128i ab = + __lsx_vbitsel_v(middlehigh, perm, __lsx_vldi(-1656) /*0xFF000000*/); + // Top 16 bits contains the high ten bits of the surrogate pair before + // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa + // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction + __m128i v_fffc0000 = __lsx_vreplgr2vr_w(uint32_t(0xFFFC0000)); + __m128i abc = __lsx_vbitsel_v(__lsx_vslli_w(middlebyte, 4), ab, v_fffc0000); + // Combine the low 6 or 7 bits by a shift right accumulate + // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct + // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o + // correction + __m128i composed = __lsx_vor_v(ascii, __lsx_vsrli_w(abc, 6)); + // After this is for surrogates + // Blend the low and high surrogates + // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd + __m128i mixed = + __lsx_vbitsel_v(abc, composed, __lsx_vldi(-1741) /*0x0000FFFF*/); + // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits + // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: + // 11110aaa bbbbbbcc|000000cc ccdddddd + __m128i v_ffff03ff = __lsx_vreplgr2vr_w(uint32_t(0xFFFF03FF)); + __m128i masked_pair = __lsx_vand_v(mixed, v_ffff03ff); + // Correct the remaining UTF-8 prefix, surrogate offset, and add the + // surrogate prefixes in one magic 16-bit addition. similar magic number but + // without the continue byte adjust and halfword swapped UTF-8 4b prefix = + // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) + // surrogate high = +0xD800|0x0000 + // surrogate low = +0x0000|0xDC00 + // ----------------------------------- + // = +0xE7C0|0xDC00 + __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xE7C0DC00)); + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete + __m128i surrogates = __lsx_vadd_w(masked_pair, magic); + // If the high bit is 1 (s32 less than zero), this needs a surrogate pair + __m128i is_pair = __lsx_vslt_w(perm, zero); + // Select either the 4 byte surrogate pair or the 2 byte solo codepoint + // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd + // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD + __m128i selected = __lsx_vbitsel_v(composed, surrogates, is_pair); + // Byte swap if necessary + if (!match_system(big_endian)) { + selected = lsx_swap_bytes(selected); + } + // Attempting to shuffle and store would be complex, just scalarize. + uint32_t buffer_tmp[4]; + __lsx_vst(selected, buffer_tmp, 0); + // Test for the top bit of the surrogate mask. Remove due to issue 514 + // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : + // 0x00800000; for (size_t i = 0; i < 3; i++) { - if (basic_buffer[i] > 0x3c00000) { - utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); - utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); + // Surrogate + // Used to be if (buffer[i] & SURROGATE_MASK) { + // See discussion above. + // patch for issue https://github.com/simdutf/simdutf/issues/514 + if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { + utf16_output[0] = uint16_t(buffer_tmp[i] >> 16); + utf16_output[1] = uint16_t(buffer_tmp[i] & 0xFFFF); utf16_output += 2; } else { - utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) - : uint16_t(basic_buffer[i]); + utf16_output[0] = uint16_t(buffer_tmp[i] & 0xFFFF); utf16_output++; } } + return consumed; } else { // here we know that there is an error but we do not handle errors + return 12; } - return consumed; } -/* end file src/westmere/sse_convert_utf8_to_utf16.cpp */ -/* begin file src/westmere/sse_convert_utf8_to_utf32.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - +/* end file src/lasx/lasx_convert_utf8_to_utf16.cpp */ +/* begin file src/lasx/lasx_convert_utf8_to_utf32.cpp */ // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the // end of the code points. Only the least significant 12 bits of the mask // are accessed. // It returns how many bytes were consumed (up to 12). size_t convert_masked_utf8_to_utf32(const char *input, uint64_t utf8_end_of_code_point_mask, - char32_t *&utf32_output) { + char32_t *&utf32_out) { // we use an approach where we try to process up to 12 input bytes. // Why 12 input bytes and not 16? Because we are concerned with the size of // the lookup tables. Also 12 is nicely divisible by two and three. // + uint32_t *&utf32_output = reinterpret_cast(utf32_out); + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xFFF; // // Optimization note: our main path below is load-latency dependent. Thus it // is maybe beneficial to have fast paths that depend on branch prediction but @@ -35938,135 +50867,179 @@ size_t convert_masked_utf8_to_utf32(const char *input, // higher speeds. // // We first try a few fast paths. - const __m128i in = _mm_loadu_si128((__m128i *)input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu8_epi32(in)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu8_epi32(_mm_srli_si128(in, 4))); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 8), - _mm_cvtepu8_epi32(_mm_srli_si128(in, 8))); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 12), - _mm_cvtepu8_epi32(_mm_srli_si128(in, 12))); - utf32_output += 12; // We wrote 12 32-bit characters. - return 12; // We consumed 12 bytes. - } - if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { - // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte - // UTF-32 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu16_epi32(composed)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu16_epi32(_mm_srli_si128(composed, 8))); - utf32_output += 8; // We wrote 32 bytes, 8 code points. - return 16; + if ((utf8_end_of_code_point_mask & 0xffff) == 0xffff) { + // We process in chunks of 16 bytes. + // use fast implementation in src/simdutf/arm64/simd.h + // Ideally the compiler can keep the tables in registers. + __m128i zero = __lsx_vldi(0); + __m128i in16low = __lsx_vilvl_b(zero, in); + __m128i in16high = __lsx_vilvh_b(zero, in); + __m128i in32_0 = __lsx_vilvl_h(zero, in16low); + __m128i in32_1 = __lsx_vilvh_h(zero, in16low); + __m128i in32_2 = __lsx_vilvl_h(zero, in16high); + __m128i in32_3 = __lsx_vilvh_h(zero, in16high); + + __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); + __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); + __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); + __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); + + utf32_output += 16; // We wrote 16 32-bit characters. + return 16; // We consumed 16 bytes. } + __m128i zero = __lsx_vldi(0); if (input_utf8_end_of_code_point_mask == 0x924) { // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte - // UTF-32 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += 4; - return 12; + // UTF-32 code units. Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_3_byte_to_utf16(in); + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + utf32_output += 4; // We wrote 4 32-bit characters. + return 12; // We consumed 12 bytes. } - /// We do not have a fast path available, so we fallback. + // 2 byte sequences occur in short bursts in languages like Greek and Russian. + if (input_utf8_end_of_code_point_mask == 0xaaa) { + // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte + // UTF-32 code units. Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_2_byte_to_utf16(in); + + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); + utf32_output += 6; + return 12; // We consumed 12 bytes. + } + // Either no fast path or an unimportant fast path. + + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; if (idx < 64) { // SIX (6) input code-code units - // this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small - // lookup table. - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu16_epi32(composed)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu16_epi32(_mm_srli_si128(composed, 8))); - utf32_output += 6; // We wrote 12 bytes, 6 code points. + // Convert to UTF-16 + __m128i composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); + __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); + __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); + + __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); + __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); + utf32_output += 6; + return consumed; } else if (idx < 145) { // FOUR (4) input code-code units - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += 4; + // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // Shuffle + // 1 byte: 00000000 00000000 0ccccccc + // 2 byte: 00000000 110bbbbb 10cccccc + // 3 byte: 1110aaaa 10bbbbbb 10cccccc + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + // Split + // 00000000 00000000 0ccccccc + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); // 6 or 7 bits + // Note: unmasked + // xxxxxxxx aaaaxxxx xxxxxxxx + __m128i high = + __lsx_vsrli_w(__lsx_vand_v(perm, __lsx_vldi(0xf)), 4); // 4 bits + // Use 16 bit bic instead of and. + // The top bits will be corrected later in the bsl + // 00000000 10bbbbbb 00000000 + __m128i middle = + __lsx_vand_v(perm, __lsx_vldi(-1758 /*0x0000FF00*/)); // 5 or 6 bits + // Combine low and middle with shift right accumulate + // 00000000 00xxbbbb bbcccccc + __m128i lowmid = __lsx_vor_v(ascii, __lsx_vsrli_w(middle, 2)); + // Insert top 4 bits from high byte with bitwise select + // 00000000 aaaabbbb bbcccccc + __m128i composed = + __lsx_vbitsel_v(lowmid, high, __lsx_vldi(-3600 /*0x0000F000*/)); + __lsx_vst(composed, utf32_output, 0); + utf32_output += 4; // We wrote 4 32-bit characters. + return consumed; } else if (idx < 209) { - // TWO (2) input code-code units - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); - const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); - // correct for spurious high bit - const __m128i correct = - _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); - middlehighbyte = _mm_xor_si128(correct, middlehighbyte); - const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), - _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); - _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += 3; + // THREE (3) input code-code units + if (input_utf8_end_of_code_point_mask == 0x888) { + // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte + // UTF-32 code units. This uses the same method as the fixed 3 byte + // version, reversing and shift left insert. However, there is no need for + // a shuffle mask now, just rev16 and rev32. + // + // This version does not use the LUT, but 4 byte sequences are less common + // and the overhead of the extra memory access is less important than the + // early branch overhead in shorter sequences, so it comes last. + + // Swap pairs of bytes + // 10dddddd|10cccccc|10bbbbbb|11110aaa + // 10cccccc 10dddddd|11110aaa 10bbbbbb + __m128i swap = lsx_swap_bytes(in); + // Shift left and insert + // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb + __m128i merge1 = __lsx_vbitsel_v(__lsx_vsrli_h(swap, 2), swap, + __lsx_vrepli_h(0x3f /*0x003F*/)); + // Shift insert again + // xxxxxxxx xxxaaabb bbbbcccc ccdddddd + __m128i merge2 = + __lsx_vbitsel_v(__lsx_vslli_w(merge1, 12), /* merge1 << 12 */ + __lsx_vsrli_w(merge1, 16), /* merge1 >> 16 */ + __lsx_vldi(-2545)); /*0x00000FFF*/ + // Clear the garbage + // 00000000 000aaabb bbbbcccc ccdddddd + __m128i composed = __lsx_vand_v(merge2, __lsx_vldi(-2273 /*0x1FFFFF*/)); + // Store + __lsx_vst(composed, utf32_output, 0); + utf32_output += 3; // We wrote 3 32-bit characters. + return 12; // We consumed 12 bytes. + } + // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit + // due to surrogates no longer being involved. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // 1 byte: 00000000 00000000 00000000 0ddddddd + // 2 byte: 00000000 00000000 110ccccc 10dddddd + // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd + // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(zero, in, sh); + + // Ascii + __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); + __m128i middle = __lsx_vand_v(perm, __lsx_vldi(-3777 /*0x00003f00*/)); + // 00000000 00000000 0000cccc ccdddddd + __m128i cd = + __lsx_vbitsel_v(__lsx_vsrli_w(middle, 2), ascii, __lsx_vrepli_w(0x3f)); + + __m128i correction = __lsx_vand_v(perm, __lsx_vldi(-3520 /*0x00400000*/)); + __m128i corrected = __lsx_vadd_b(perm, __lsx_vsrli_w(correction, 1)); + // Insert twice + // 00000000 000aaabb bbbbxxxx xxxxxxxx + __m128i corrected_srli2 = + __lsx_vsrli_w(__lsx_vand_v(corrected, __lsx_vrepli_b(0x7)), 2); + __m128i ab = + __lsx_vbitsel_v(corrected_srli2, corrected, __lsx_vrepli_h(0x3f)); + ab = __lsx_vsrli_w(ab, 4); + // 00000000 000aaabb bbbbcccc ccdddddd + __m128i composed = + __lsx_vbitsel_v(ab, cd, __lsx_vldi(-2545 /*0x00000FFF*/)); + // Store + __lsx_vst(composed, utf32_output, 0); + utf32_output += 3; // We wrote 3 32-bit characters. + return consumed; } else { // here we know that there is an error but we do not handle errors + return 12; } - return consumed; } -/* end file src/westmere/sse_convert_utf8_to_utf32.cpp */ -/* begin file src/westmere/sse_convert_utf8_to_latin1.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). +/* end file src/lasx/lasx_convert_utf8_to_utf32.cpp */ +/* begin file src/lasx/lasx_convert_utf8_to_latin1.cpp */ size_t convert_masked_utf8_to_latin1(const char *input, uint64_t utf8_end_of_code_point_mask, char *&latin1_output) { @@ -36074,27 +51047,30 @@ size_t convert_masked_utf8_to_latin1(const char *input, // Why 12 input bytes and not 16? Because we are concerned with the size of // the lookup tables. Also 12 is nicely divisible by two and three. // - // + __m128i in = __lsx_vld(reinterpret_cast(input), 0); + + const uint16_t input_utf8_end_of_code_point_mask = + utf8_end_of_code_point_mask & 0xfff; // Optimization note: our main path below is load-latency dependent. Thus it // is maybe beneficial to have fast paths that depend on branch prediction but // have less latency. This results in more instructions but, potentially, also // higher speeds. - // - const __m128i in = _mm_loadu_si128((__m128i *)input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & - 0xfff; // we are only processing 12 bytes in case it is not all ASCII - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in); - latin1_output += 12; // We wrote 12 characters. - return 12; // We consumed 12 bytes. + + // We first try a few fast paths. + // The obvious first test is ASCII, which actually consumes the full 16. + if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { + // We process in chunks of 16 bytes + __lsx_vst(in, reinterpret_cast(latin1_output), 0); + latin1_output += 16; // We wrote 16 18-bit characters. + return 16; // We consumed 16 bytes. } - /// We do not have a fast path available, so we fallback. - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; + /// We do not have a fast path available, or the fast path is unimportant, so + /// we fallback. + const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][0]; + + const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex + [input_utf8_end_of_code_point_mask][1]; // this indicates an invalid input: if (idx >= 64) { return consumed; @@ -36102,50 +51078,63 @@ size_t convert_masked_utf8_to_latin1(const char *input, // Here we should have (idx < 64), if not, there is a bug in the validation or // elsewhere. SIX (6) input code-code units this is a relatively easy scenario // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small lookup - // table. - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - const __m128i latin1_packed = _mm_packus_epi16(composed, composed); + // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 + // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy + // scenario we process SIX (6) input code-code units. The max length in bytes + // of six code code units spanning between 1 and 2 bytes each is 12 bytes. + __m128i sh = __lsx_vld(reinterpret_cast( + simdutf::tables::utf8_to_utf16::shufutf8[idx]), + 0); + // Shuffle + // 1 byte: 00000000 0bbbbbbb + // 2 byte: 110aaaaa 10bbbbbb + sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); + __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); + // ascii mask + // 1 byte: 11111111 11111111 + // 2 byte: 00000000 00000000 + __m128i ascii_mask = __lsx_vslt_bu(perm, __lsx_vldi(0x80)); + // utf8 mask + // 1 byte: 00000000 00000000 + // 2 byte: 00111111 00111111 + __m128i utf8_mask = __lsx_vand_v(__lsx_vsle_bu(__lsx_vldi(0x80), perm), + __lsx_vldi(0b00111111)); + // mask + // 1 byte: 11111111 11111111 + // 2 byte: 00111111 00111111 + __m128i mask = __lsx_vor_v(utf8_mask, ascii_mask); + + __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), perm, mask); // writing 8 bytes even though we only care about the first 6 bytes. - // performance note: it would be faster to use _mm_storeu_si128, we should - // investigate. - _mm_storel_epi64((__m128i *)latin1_output, latin1_packed); + __m128i latin1_packed = __lsx_vpickev_b(__lsx_vldi(0), composed); + + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); latin1_output += 6; // We wrote 6 bytes. return consumed; } -/* end file src/westmere/sse_convert_utf8_to_latin1.cpp */ +/* end file src/lasx/lasx_convert_utf8_to_latin1.cpp */ -/* begin file src/westmere/sse_convert_utf16_to_latin1.cpp */ +/* begin file src/lasx/lasx_convert_utf16_to_latin1.cpp */ template std::pair -sse_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { +lasx_convert_utf16_to_latin1(const char16_t *buf, size_t len, + char *latin1_output) { const char16_t *end = buf + len; - while (end - buf >= 8) { - // Load 8 UTF-16 characters into 128-bit SSE register - __m128i in = _mm_loadu_si128(reinterpret_cast(buf)); - + while (buf + 16 <= end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); if (!match_system(big_endian)) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); + in = lsx_swap_bytes(in); + in1 = lsx_swap_bytes(in1); } - - __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00); - if (_mm_testz_si128(in, high_byte_mask)) { - // Pack 16-bit characters into 8-bit and store in latin1_output - __m128i latin1_packed = _mm_packus_epi16(in, in); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), - latin1_packed); - // Adjust pointers for next iteration - buf += 8; - latin1_output += 8; + if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vpickev_b(in1, in); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; } else { return std::make_pair(nullptr, reinterpret_cast(latin1_output)); } @@ -36155,29 +51144,28 @@ sse_convert_utf16_to_latin1(const char16_t *buf, size_t len, template std::pair -sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { +lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, + char *latin1_output) { const char16_t *start = buf; const char16_t *end = buf + len; - while (end - buf >= 8) { - __m128i in = _mm_loadu_si128(reinterpret_cast(buf)); - + while (buf + 16 <= end) { + __m128i in = __lsx_vld(reinterpret_cast(buf), 0); + __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); if (!match_system(big_endian)) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); + in = lsx_swap_bytes(in); + in1 = lsx_swap_bytes(in1); } - - __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00); - if (_mm_testz_si128(in, high_byte_mask)) { - __m128i latin1_packed = _mm_packus_epi16(in, in); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), - latin1_packed); - buf += 8; - latin1_output += 8; + if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { + // 1. pack the bytes + __m128i latin1_packed = __lsx_vpickev_b(in1, in); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; } else { - // Fallback to scalar code for handling errors - for (int k = 0; k < 8; k++) { + // Let us do a scalar fallback. + for (int k = 0; k < 16; k++) { uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; @@ -36188,16 +51176,15 @@ sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, latin1_output); } } - buf += 8; } } // while return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output); } -/* end file src/westmere/sse_convert_utf16_to_latin1.cpp */ -/* begin file src/westmere/sse_convert_utf16_to_utf8.cpp */ +/* end file src/lasx/lasx_convert_utf16_to_latin1.cpp */ +/* begin file src/lasx/lasx_convert_utf16_to_utf8.cpp */ /* - The vectorized algorithm works on single SSE register i.e., it + The vectorized algorithm works on single LASX register i.e., it loads eight 16-bit code units. We consider three cases: @@ -36211,11 +51198,11 @@ sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, Ad 1. When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII + can be converted into: 1) single UTF8 byte (when it's an ASCII char) or 2) two UTF8 bytes. For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single + codes and finally compress the whole LASX register with a single shuffle. We need 256-entry lookup table to get a compression pattern @@ -36233,7 +51220,7 @@ sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, the three-UTF8-bytes case. Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. + array of 32-bit values. The array spans two LASX registers. The bytes from the registers are compressed using two shuffles. We need 256-entry lookup table to get a compression pattern @@ -36244,187 +51231,210 @@ sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, To summarize: - We need two 256-entry tables that have 8704 bytes in total. */ - /* Returns a pair: the first unprocessed byte from buf and utf8_output A scalar routing should carry on the conversion of the tail. */ + template std::pair -sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { - +lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); const char16_t *end = buf + len; - const __m128i v_0000 = _mm_setzero_si128(); - const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); + __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + while (buf + 16 + safety_margin <= end) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + if (!match_system(big_endian)) { + in = lasx_swap_bytes(in); } - // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80); - if (_mm_testz_si128(in, v_ff80)) { // ASCII fast path!!!! - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - nextin = _mm_shuffle_epi8(nextin, swap); - } - if (!_mm_testz_si128(nextin, v_ff80)) { - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in, in); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } else { - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in, nextin); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } + if (__lasx_xbnz_h(__lasx_xvslt_hu( + in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! + // 1. pack the bytes + __m256i utf8_packed = + __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); + // 2. store (16 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! } - // no bits set above 7th bit - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000); - const uint16_t one_byte_bitmask = - static_cast(_mm_movemask_epi8(one_byte_bytemask)); + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 + // expected output : [110a|aaaa|10bb|bbbb] x 16 + // t0 = [000a|aaaa|bbbb|bb00] + __m256i t0 = __lasx_xvslli_h(in, 2); + // t1 = [000a|aaaa|0000|0000] + __m256i t1 = __lasx_xvand_v(t0, __lasx_xvldi(-2785 /*0x1f00*/)); + // t2 = [0000|0000|00bb|bbbb] + __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + __m256i t3 = __lasx_xvor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); + __m256i t4 = __lasx_xvor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = + __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + // 4. pack the bytes + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + // 5. store bytes + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; - // no bits set above 11th bit - const __m128i one_or_two_bytes_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000); - const uint16_t one_or_two_bytes_bitmask = - static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; - if (one_or_two_bytes_bitmask == 0xffff) { - internal::westmere::write_v_u16_11bits_to_utf8( - in, utf8_output, one_byte_bytemask, one_byte_bitmask); - buf += 8; + buf += 16; continue; } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m128i surrogates_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint16_t surrogates_bitmask = - static_cast(_mm_movemask_epi8(surrogates_bytemask)); + __m256i surrogates_bytemask = + __lasx_xvseq_h(__lasx_xvand_v(in, __lasx_xvldi(-2568 /*0xF800*/)), + __lasx_xvldi(-2600 /*0xD800*/)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { + if (__lasx_xbz_v(surrogates_bytemask)) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ /** * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ -#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m128i t0 = _mm_shuffle_epi8(in, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); + __m256i t0 = __lasx_xvpickev_b(in, in); + t0 = __lasx_xvilvl_b(t0, t0); + + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); + __m256i t2 = __lasx_xvor_v(t1, __lasx_xvldi(-2688)); - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m128i s0 = _mm_srli_epi16(in, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); - const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m128i s4 = _mm_xor_si128(s3, m0); -#undef simdutf_vec + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m256i s0 = __lasx_xvsrli_h(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m256i s1 = __lasx_xvslli_h(in, 2); + // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] + s1 = __lasx_xvand_v(s1, __lasx_xvldi(-2753 /*0x3F00*/)); + + // [00bb|bbbb|0000|aaaa] + __m256i s2 = __lasx_xvor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); + __m256i m0 = __lasx_xvandn_v(one_or_two_bytes_bytemask, + __lasx_xvldi(-2752 /*0x4000*/)); + __m256i s4 = __lasx_xvxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - const __m128i out0 = _mm_unpacklo_epi16(t2, s4); - const __m128i out1 = _mm_unpackhi_epi16(t2, s4); + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16_t mask = - (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); - if (mask == 0) { - // We only have three-byte code units. Use fast path. - const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, - 15, 13, -1, -1, -1, -1); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += 12; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - } - const uint8_t mask0 = uint8_t(mask); - + __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); + __m256i one_byte_bytemask_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + + __m256i one_or_two_bytes_bytemask_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); + __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( + one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); + + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - buf += 8; + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -36436,7 +51446,9 @@ sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -36449,12 +51461,14 @@ sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, utf8_output); + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); } uint32_t value = (diff << 10) + diff2 + 0x10000; *utf8_output++ = char((value >> 18) | 0b11110000); @@ -36466,8 +51480,7 @@ sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { buf += k; } } // while - - return std::make_pair(buf, utf8_output); + return std::make_pair(buf, reinterpret_cast(utf8_output)); } /* @@ -36479,181 +51492,205 @@ sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { */ template std::pair -sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_output) { +lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); const char16_t *start = buf; const char16_t *end = buf + len; - const __m128i v_0000 = _mm_setzero_si128(); - const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); + __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + while (buf + 16 + safety_margin <= end) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + if (!match_system(big_endian)) { + in = lasx_swap_bytes(in); } - // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80); - if (_mm_testz_si128(in, v_ff80)) { // ASCII fast path!!!! - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - nextin = _mm_shuffle_epi8(nextin, swap); - } - if (!_mm_testz_si128(nextin, v_ff80)) { - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in, in); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } else { - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in, nextin); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } + if (__lasx_xbnz_h(__lasx_xvslt_hu( + in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! + // 1. pack the bytes + __m256i utf8_packed = + __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); + // 2. store (16 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! } - // no bits set above 7th bit - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000); - const uint16_t one_byte_bitmask = - static_cast(_mm_movemask_epi8(one_byte_bytemask)); + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 + // expected output : [110a|aaaa|10bb|bbbb] x 16 + // t0 = [000a|aaaa|bbbb|bb00] + __m256i t0 = __lasx_xvslli_h(in, 2); + // t1 = [000a|aaaa|0000|0000] + __m256i t1 = __lasx_xvand_v(t0, __lasx_xvldi(-2785 /*0x1f00*/)); + // t2 = [0000|0000|00bb|bbbb] + __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + __m256i t3 = __lasx_xvor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); + __m256i t4 = __lasx_xvor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = + __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + // 4. pack the bytes + const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + // 5. store bytes + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; - // no bits set above 11th bit - const __m128i one_or_two_bytes_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000); - const uint16_t one_or_two_bytes_bitmask = - static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; - if (one_or_two_bytes_bitmask == 0xffff) { - internal::westmere::write_v_u16_11bits_to_utf8( - in, utf8_output, one_byte_bytemask, one_byte_bitmask); - buf += 8; + buf += 16; continue; } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m128i surrogates_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint16_t surrogates_bitmask = - static_cast(_mm_movemask_epi8(surrogates_bytemask)); + __m256i surrogates_bytemask = + __lasx_xvseq_h(__lasx_xvand_v(in, __lasx_xvldi(-2568 /*0xF800*/)), + __lasx_xvldi(-2600 /*0xD800*/)); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { + if (__lasx_xbz_v(surrogates_bytemask)) { // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. + We precompute byte 1 for case #1 and the common byte for cases #2 & #3 + in register t2. - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ /** * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: * t2 => [0ccc|cccc] [10cc|cccc] * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) */ -#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m128i t0 = _mm_shuffle_epi8(in, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); + __m256i t0 = __lasx_xvpickev_b(in, in); + t0 = __lasx_xvilvl_b(t0, t0); + + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); + __m256i t2 = __lasx_xvor_v(t1, __lasx_xvldi(-2688)); - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m128i s0 = _mm_srli_epi16(in, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); - const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m128i s4 = _mm_xor_si128(s3, m0); -#undef simdutf_vec + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m256i s0 = __lasx_xvsrli_h(in, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m256i s1 = __lasx_xvslli_h(in, 2); + // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] + s1 = __lasx_xvand_v(s1, __lasx_xvldi(-2753 /*0x3F00*/)); + + // [00bb|bbbb|0000|aaaa] + __m256i s2 = __lasx_xvor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); + __m256i m0 = __lasx_xvandn_v(one_or_two_bytes_bytemask, + __lasx_xvldi(-2752 /*0x4000*/)); + __m256i s4 = __lasx_xvxor_v(s3, m0); // 4. expand code units 16-bit => 32-bit - const __m128i out0 = _mm_unpacklo_epi16(t2, s4); - const __m128i out1 = _mm_unpackhi_epi16(t2, s4); + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16_t mask = - (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); - if (mask == 0) { - // We only have three-byte code units. Use fast path. - const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, - 15, 13, -1, -1, -1, -1); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += 12; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - } - const uint8_t mask0 = uint8_t(mask); - + __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); + __m256i one_byte_bytemask_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + + __m256i one_or_two_bytes_bytemask_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); + __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( + one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); + + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; + mask = __lasx_xvpickve2gr_wu(mask1, 0); const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __lsx_vst(utf8_1, utf8_output, 0); utf8_output += row1[0]; - buf += 8; + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -36665,7 +51702,9 @@ sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; if ((word & 0xFF80) == 0) { *utf8_output++ = char(word); } else if ((word & 0xF800) == 0) { @@ -36678,14 +51717,15 @@ sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { return std::make_pair( result(error_code::SURROGATE, buf - start + k - 1), - utf8_output); + reinterpret_cast(utf8_output)); } uint32_t value = (diff << 10) + diff2 + 0x10000; *utf8_output++ = char((value >> 18) | 0b11110000); @@ -36698,101 +51738,67 @@ sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, } } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); -} -/* end file src/westmere/sse_convert_utf16_to_utf8.cpp */ -/* begin file src/westmere/sse_convert_utf16_to_utf32.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it's an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ - -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); +} +/* end file src/lasx/lasx_convert_utf16_to_utf8.cpp */ +/* begin file src/lasx/lasx_convert_utf16_to_utf32.cpp */ template std::pair -sse_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_output) { +lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); const char16_t *end = buf + len; - const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf32_output & 0x1f) && buf < end) { + uint16_t word = + !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[0]) : buf[0]; + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + buf++; + } else { + if (buf + 1 >= end) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[1]) + : buf[1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + buf += 2; + } + } - while (end - buf >= 8) { - __m128i in = _mm_loadu_si128((__m128i *)buf); + __m256i v_f800 = __lasx_xvldi(-2568); /*0xF800*/ + __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); + while (buf + 16 <= end) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + if (!match_system(big_endian)) { + in = lasx_swap_bytes(in); } - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m128i surrogates_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint16_t surrogates_bitmask = - static_cast(_mm_movemask_epi8(surrogates_bytemask)); + __m256i surrogates_bytemask = + __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { - // case: no surrogate pair, extend 16-bit code units to 32-bit code units - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu16_epi32(in)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu16_epi32(_mm_srli_si128(in, 8))); - utf32_output += 8; - buf += 8; + if (__lasx_xbz_v(surrogates_bytemask)) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); + __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); + __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); + utf32_output += 16; + buf += 16; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -36804,18 +51810,22 @@ sse_convert_utf16_to_utf32(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, utf32_output); + return std::make_pair(nullptr, + reinterpret_cast(utf32_output)); } uint32_t value = (diff << 10) + diff2 + 0x10000; *utf32_output++ = char32_t(value); @@ -36824,7 +51834,7 @@ sse_convert_utf16_to_utf32(const char16_t *buf, size_t len, buf += k; } } // while - return std::make_pair(buf, utf32_output); + return std::make_pair(buf, reinterpret_cast(utf32_output)); } /* @@ -36836,43 +51846,59 @@ sse_convert_utf16_to_utf32(const char16_t *buf, size_t len, */ template std::pair -sse_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_output) { +lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, + char32_t *utf32_out) { + uint32_t *utf32_output = reinterpret_cast(utf32_out); const char16_t *start = buf; const char16_t *end = buf + len; - const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); - - while (end - buf >= 8) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf32_output & 0x1f) && buf < end) { + uint16_t word = + !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[0]) : buf[0]; + if ((word & 0xF800) != 0xD800) { + *utf32_output++ = char32_t(word); + buf++; + } else if (buf + 1 < end) { + // must be a surrogate pair + uint16_t diff = uint16_t(word - 0xD800); + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[1]) + : buf[1]; + uint16_t diff2 = uint16_t(next_word - 0xDC00); + if ((diff | diff2) > 0x3FF) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf32_output)); + } + uint32_t value = (diff << 10) + diff2 + 0x10000; + *utf32_output++ = char32_t(value); + buf += 2; + } else { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf32_output)); } + } - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m128i surrogates_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); + __m256i v_f800 = __lasx_xvldi(-2568); /*0xF800*/ + __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ + while (buf + 16 <= end) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + if (!match_system(big_endian)) { + in = lasx_swap_bytes(in); + } - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint16_t surrogates_bitmask = - static_cast(_mm_movemask_epi8(surrogates_bytemask)); + __m256i surrogates_bytemask = + __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); // It might seem like checking for surrogates_bitmask == 0xc000 could help. // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { - // case: no surrogate pair, extend 16-bit code units to 32-bit code units - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu16_epi32(in)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu16_epi32(_mm_srli_si128(in, 8))); - utf32_output += 8; - buf += 8; + if (__lasx_xbz_v(surrogates_bytemask)) { + // case: no surrogate pairs, extend all 16-bit code units to 32-bit code + // units + __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); + __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); + __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); + utf32_output += 16; + buf += 16; // surrogate pair(s) in a register } else { // Let us do a scalar fallback. @@ -36884,20 +51910,23 @@ sse_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, forward = size_t(end - buf - 1); } for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::utf16::swap_bytes(buf[k]) : buf[k]; + uint16_t word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k]) + : buf[k]; if ((word & 0xF800) != 0xD800) { *utf32_output++ = char32_t(word); } else { // must be a surrogate pair uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1]; + uint16_t next_word = !match_system(big_endian) + ? scalar::utf16::swap_bytes(buf[k + 1]) + : buf[k + 1]; k++; uint16_t diff2 = uint16_t(next_word - 0xDC00); if ((diff | diff2) > 0x3FF) { return std::make_pair( result(error_code::SURROGATE, buf - start + k - 1), - utf32_output); + reinterpret_cast(utf32_output)); } uint32_t value = (diff << 10) + diff2 + 0x10000; *utf32_output++ = char32_t(value); @@ -36906,379 +51935,337 @@ sse_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, buf += k; } } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output); + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf32_output)); } -/* end file src/westmere/sse_convert_utf16_to_utf32.cpp */ +/* end file src/lasx/lasx_convert_utf16_to_utf32.cpp */ -/* begin file src/westmere/sse_convert_utf32_to_latin1.cpp */ +/* begin file src/lasx/lasx_convert_utf32_to_latin1.cpp */ std::pair -sse_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - - __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00); - __m128i shufmask = - _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); - - for (size_t i = 0; i < rounded_len; i += 16) { - __m128i in1 = _mm_loadu_si128((__m128i *)buf); - __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4)); - __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8)); - __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12)); +lasx_convert_utf32_to_latin1(const char32_t *buf, size_t len, + char *latin1_output) { + const char32_t *end = buf + len; + const __m256i shuf_mask = ____m256i( + (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); + __m256i v_ff = __lasx_xvrepli_w(0xFF); - __m128i check_combined = _mm_or_si128(in1, in2); - check_combined = _mm_or_si128(check_combined, in3); - check_combined = _mm_or_si128(check_combined, in4); + while (buf + 16 <= end) { + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); - if (!_mm_testz_si128(check_combined, high_bytes_mask)) { - return std::make_pair(nullptr, latin1_output); + __m256i in12 = __lasx_xvor_v(in1, in2); + if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { + // 1. pack the bytes + __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); + latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); + __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); + latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + return std::make_pair(nullptr, reinterpret_cast(latin1_output)); } - __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), - _mm_shuffle_epi8(in2, shufmask)); - __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), - _mm_shuffle_epi8(in4, shufmask)); - __m128i pack = _mm_unpacklo_epi64(pack1, pack2); - _mm_storeu_si128((__m128i *)latin1_output, pack); - latin1_output += 16; - buf += 16; - } - + } // while return std::make_pair(buf, latin1_output); } std::pair -sse_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { +lasx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, + char *latin1_output) { const char32_t *start = buf; - const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - - __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00); - __m128i shufmask = - _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); + const char32_t *end = buf + len; - for (size_t i = 0; i < rounded_len; i += 16) { - __m128i in1 = _mm_loadu_si128((__m128i *)buf); - __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4)); - __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8)); - __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12)); + const __m256i shuf_mask = ____m256i( + (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); + __m256i v_ff = __lasx_xvrepli_w(0xFF); - __m128i check_combined = _mm_or_si128(in1, in2); - check_combined = _mm_or_si128(check_combined, in3); - check_combined = _mm_or_si128(check_combined, in4); + while (buf + 16 <= end) { + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); - if (!_mm_testz_si128(check_combined, high_bytes_mask)) { - // Fallback to scalar code for handling errors + __m256i in12 = __lasx_xvor_v(in1, in2); + if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { + // 1. pack the bytes + __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); + latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); + __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); + latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); + // 2. store (8 bytes) + __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); + // 3. adjust pointers + buf += 16; + latin1_output += 16; + } else { + // Let us do a scalar fallback. for (int k = 0; k < 16; k++) { - char32_t codepoint = buf[k]; - if (codepoint <= 0xff) { - *latin1_output++ = char(codepoint); + uint32_t word = buf[k]; + if (word <= 0xff) { + *latin1_output++ = char(word); } else { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), latin1_output); } } - buf += 16; - continue; } - __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), - _mm_shuffle_epi8(in2, shufmask)); - __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), - _mm_shuffle_epi8(in4, shufmask)); - __m128i pack = _mm_unpacklo_epi64(pack1, pack2); - _mm_storeu_si128((__m128i *)latin1_output, pack); - latin1_output += 16; - buf += 16; - } - + } // while return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output); } -/* end file src/westmere/sse_convert_utf32_to_latin1.cpp */ -/* begin file src/westmere/sse_convert_utf32_to_utf8.cpp */ +/* end file src/lasx/lasx_convert_utf32_to_latin1.cpp */ +/* begin file src/lasx/lasx_convert_utf32_to_utf8.cpp */ std::pair -sse_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { +lasx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); const char32_t *end = buf + len; - const __m128i v_0000 = _mm_setzero_si128(); //__m128 = 128 bits - const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); // 1111 1000 0000 - // 0000 - const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); // 1100 0000 1000 - // 0000 - const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); // 1111 1111 1000 - // 0000 - const __m128i v_ffff0000 = _mm_set1_epi32( - (uint32_t)0xffff0000); // 1111 1111 1111 1111 0000 0000 0000 0000 - const __m128i v_7fffffff = _mm_set1_epi32( - (uint32_t)0x7fffffff); // 0111 1111 1111 1111 1111 1111 1111 1111 - __m128i running_max = _mm_setzero_si128(); - __m128i forbidden_bytemask = _mm_setzero_si128(); + // load addr align 32 + while (((uint64_t)buf & 0x1F) && buf < end) { + uint32_t word = *buf; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + buf++; + } + + __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xC080)); + __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7FF)); + __m256i v_dfff = __lasx_xvreplgr2vr_h(uint16_t(0xDFFF)); + __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + __m256i forbidden_bytemask = __lasx_xvldi(0x0); + const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (end - buf >= - std::ptrdiff_t( - 16 + safety_margin)) { // buf is a char32_t pointer, each char32_t - // has 4 bytes or 32 bits, thus buf + 16 * - // char_32t = 512 bits = 64 bytes - // We load two 16 bytes registers for a total of 32 bytes or 16 characters. - __m128i in = _mm_loadu_si128((__m128i *)buf); - __m128i nextin = _mm_loadu_si128( - (__m128i *)buf + 1); // These two values can hold only 8 UTF32 chars - running_max = _mm_max_epu32( - _mm_max_epu32(in, running_max), // take element-wise max char32_t from - // in and running_max vector - nextin); // and take element-wise max element from nextin and - // running_max vector - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m128i in_16 = _mm_packus_epi32( - _mm_and_si128(in, v_7fffffff), - _mm_and_si128( - nextin, - v_7fffffff)); // in this context pack the two __m128 into a single - // By ensuring the highest bit is set to 0(&v_7fffffff), we are making sure - // all values are interpreted as non-negative, or specifically, the values - // are within the range of valid Unicode code points. remember : having - // leading byte 0 means a positive number by the two complements system. - // Unicode is well beneath the range where you'll start getting issues so - // that's OK. - - // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp + while (buf + 16 + safety_margin < end) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); - // Check for ASCII fast path + // Check if no bits set above 16th + if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); - // ASCII fast path!!!! - // We eagerly load another 32 bytes, hoping that they will be ASCII too. - // The intuition is that we try to collect 16 ASCII characters which - // requires a total of 64 bytes of input. If we fail, we just pass thirdin - // and fourthin as our new inputs. - if (_mm_testz_si128(in_16, v_ff80)) { // if the first two blocks are ASCII - __m128i thirdin = _mm_loadu_si128((__m128i *)buf + 2); - __m128i fourthin = _mm_loadu_si128((__m128i *)buf + 3); - running_max = _mm_max_epu32( - _mm_max_epu32(thirdin, running_max), - fourthin); // take the running max of all 4 vectors thus far - __m128i nextin_16 = _mm_packus_epi32( - _mm_and_si128(thirdin, v_7fffffff), - _mm_and_si128(fourthin, - v_7fffffff)); // pack into 1 vector, now you have two - if (!_mm_testz_si128( - nextin_16, - v_ff80)) { // checks if the second packed vector is ASCII, if not: + if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! // 1. pack the bytes // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16( - in_16, in_16); // creates two copy of in_16 in 1 vector - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, - utf8_packed); // put them into the output - // 3. adjust pointers - buf += 8; // the char32_t buffer pointer goes up 8 char32_t chars* 32 - // bits = 256 bits - utf8_output += - 8; // same with output, e.g. lift the first two blocks alone. - // Proceed with next input - in_16 = nextin_16; - // We need to update in and nextin because they are used later. - in = thirdin; - nextin = fourthin; - } else { - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16(in_16, nextin_16); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); + __m256i utf8_packed = __lasx_xvpermi_d( + __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); + // 2. store (8 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); // 3. adjust pointers buf += 16; utf8_output += 16; continue; // we are done for this round! } - } - - // no bits set above 7th bit -- find out all the ASCII characters - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16( // this takes four bytes at a time and compares: - _mm_and_si128(in_16, v_ff80), // the vector that get only the first - // 9 bits of each 16-bit/2-byte units - v_0000 // - ); // they should be all zero if they are ASCII. E.g. ASCII in UTF32 is - // of format 0000 0000 0000 0XXX XXXX - // _mm_cmpeq_epi16 should now return a 1111 1111 1111 1111 for equals, and - // 0000 0000 0000 0000 if not for each 16-bit/2-byte units - const uint16_t one_byte_bitmask = static_cast(_mm_movemask_epi8( - one_byte_bytemask)); // collect the MSB from previous vector and put - // them into uint16_t mas - - // no bits set above 11th bit - const __m128i one_or_two_bytes_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000); - const uint16_t one_or_two_bytes_bitmask = - static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); - - if (one_or_two_bytes_bitmask == 0xffff) { - // case: all code units either produce 1 or 2 UTF-8 bytes (at least one - // produces 2 bytes) - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m128i v_1f00 = - _mm_set1_epi16((int16_t)0x1f00); // 0001 1111 0000 0000 - const __m128i v_003f = - _mm_set1_epi16((int16_t)0x003f); // 0000 0000 0011 1111 - - // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = _mm_slli_epi16(in_16, 2); // shift packed vector by two - // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = - _mm_and_si128(t0, v_1f00); // potentital first utf8 byte - // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = - _mm_and_si128(in_16, v_003f); // potential second utf8 byte - // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = - _mm_or_si128(t1, t2); // first and second potential utf8 byte together - // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = _mm_or_si128( - t3, - v_c080); // t3 | 1100 0000 1000 0000 = full potential 2-byte utf8 unit - - // 2. merge ASCII and 2-byte codewords - const __m128i utf8_unpacked = - _mm_blendv_epi8(t4, in_16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - - // MSB, a - LSB) - const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a - const uint16_t m1 = - static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 - const uint8_t m2 = - static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); - - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - - // Check for overflow in packing - - const __m128i saturation_bytemask = _mm_cmpeq_epi32( - _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); - forbidden_bytemask = - _mm_or_si128(forbidden_bytemask, - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800)); - - const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m128i s0 = _mm_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); - const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m128i s4 = _mm_xor_si128(s3, m0); -#undef simdutf_vec + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = __lasx_xvand_v(t0, __lasx_xvldi(-2785 /*0x1f00*/)); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = __lasx_xvor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = __lasx_xvor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = + __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + // 4. pack the bytes + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = __lsx_vshuf_b( + zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = __lsx_vshuf_b( + zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + // 5. store bytes + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; - // 4. expand code units 16-bit => 32-bit - const __m128i out0 = _mm_unpacklo_epi16(t2, s4); - const __m128i out1 = _mm_unpackhi_epi16(t2, s4); + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16_t mask = - (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); - if (mask == 0) { - // We only have three-byte code units. Use fast path. - const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, - 15, 13, -1, -1, -1, -1); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += 12; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += 12; - buf += 8; + buf += 16; continue; - } - const uint8_t mask0 = uint8_t(mask); + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & + #3 in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); + t0 = __lasx_xvilvl_b(t0, t0); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m256i t2 = __lasx_xvor_v(t1, __lasx_xvldi(-2688 /*0x8000*/)); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + s1 = __lasx_xvand_v(s1, __lasx_xvldi(-2753 /*0x3F00*/)); + // [00bb|bbbb|0000|aaaa] + __m256i s2 = __lasx_xvor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m256i one_or_two_bytes_bytemask = + __lasx_xvsle_hu(utf16_packed, v_07ff); + __m256i m0 = __lasx_xvandn_v(one_or_two_bytes_bytemask, + __lasx_xvldi(-2752 /*0x4000*/)); + __m256i s4 = __lasx_xvxor_v(s3, m0); - const uint8_t mask1 = static_cast(mask >> 8); + // 4. expand code units 16-bit => 32-bit + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); + + __m256i one_or_two_bytes_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + + __m256i one_byte_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)); + __m256i mask1 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)); + + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; + mask = __lasx_xvpickve2gr_wu(mask1, 0); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; - buf += 8; + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. } else { - // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD in the - // presence of surrogate pairs may require non-trivial tables. + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. size_t forward = 15; size_t k = 0; if (size_t(end - buf) < forward + 1) { @@ -37293,14 +52280,16 @@ sse_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { *utf8_output++ = char((word & 0b111111) | 0b10000000); } else if ((word & 0xFFFF0000) == 0) { if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, utf8_output); + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); } *utf8_output++ = char((word >> 12) | 0b11100000); *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); *utf8_output++ = char((word & 0b111111) | 0b10000000); } else { if (word > 0x10FFFF) { - return std::make_pair(nullptr, utf8_output); + return std::make_pair(nullptr, + reinterpret_cast(utf8_output)); } *utf8_output++ = char((word >> 18) | 0b11110000); *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); @@ -37313,242 +52302,269 @@ sse_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { } // while // check for invalid input - const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff); - if (static_cast(_mm_movemask_epi8(_mm_cmpeq_epi32( - _mm_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffff) { - return std::make_pair(nullptr, utf8_output); - } - - if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(nullptr, utf8_output); + if (__lasx_xbnz_v(forbidden_bytemask)) { + return std::make_pair(nullptr, reinterpret_cast(utf8_output)); } - - return std::make_pair(buf, utf8_output); + return std::make_pair(buf, reinterpret_cast(utf8_output)); } std::pair -sse_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_output) { - const char32_t *end = buf + len; +lasx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, + char *utf8_out) { + uint8_t *utf8_output = reinterpret_cast(utf8_out); const char32_t *start = buf; + const char32_t *end = buf + len; - const __m128i v_0000 = _mm_setzero_si128(); - const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); - const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); - const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); - const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); - const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff); - const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff); + // load addr align 32 + while (((uint64_t)buf & 0x1F) && buf < end) { + uint32_t word = *buf; + if ((word & 0xFFFFFF80) == 0) { + *utf8_output++ = char(word); + } else if ((word & 0xFFFFF800) == 0) { + *utf8_output++ = char((word >> 6) | 0b11000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else if ((word & 0xFFFF0000) == 0) { + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 12) | 0b11100000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } else { + if (word > 0x10FFFF) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start), + reinterpret_cast(utf8_output)); + } + *utf8_output++ = char((word >> 18) | 0b11110000); + *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); + *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); + *utf8_output++ = char((word & 0b111111) | 0b10000000); + } + buf++; + } + __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xC080)); + __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7FF)); + __m256i v_dfff = __lasx_xvreplgr2vr_h(uint16_t(0xDFFF)); + __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ + __m256i zero = __lasx_xvldi(0); + __m128i zero_128 = __lsx_vldi(0); + __m256i forbidden_bytemask = __lasx_xvldi(0x0); const size_t safety_margin = 12; // to avoid overruns, see issue // https://github.com/simdutf/simdutf/issues/92 - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - // We load two 16 bytes registers for a total of 32 bytes or 8 characters. - __m128i in = _mm_loadu_si128((__m128i *)buf); - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - // Check for too large input - __m128i max_input = _mm_max_epu32(_mm_max_epu32(in, nextin), v_10ffff); - if (static_cast(_mm_movemask_epi8( - _mm_cmpeq_epi32(max_input, v_10ffff))) != 0xffff) { - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - utf8_output); - } - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m128i in_16 = _mm_packus_epi32(_mm_and_si128(in, v_7fffffff), - _mm_and_si128(nextin, v_7fffffff)); - - // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp - - // Check for ASCII fast path - if (_mm_testz_si128(in_16, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in_16, in_16); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - continue; - } - - // no bits set above 7th bit - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_ff80), v_0000); - const uint16_t one_byte_bitmask = - static_cast(_mm_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m128i one_or_two_bytes_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000); - const uint16_t one_or_two_bytes_bitmask = - static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); - - if (one_or_two_bytes_bitmask == 0xffff) { - // case: all code units either produce 1 or 2 UTF-8 bytes (at least one - // produces 2 bytes) - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); - const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = _mm_slli_epi16(in_16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = _mm_and_si128(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = _mm_and_si128(in_16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = _mm_or_si128(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = _mm_or_si128(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m128i utf8_unpacked = - _mm_blendv_epi8(t4, in_16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - - // MSB, a - LSB) - const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a - const uint16_t m1 = - static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 - const uint8_t m2 = - static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); - - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - - // Check for overflow in packing - const __m128i saturation_bytemask = _mm_cmpeq_epi32( - _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm_movemask_epi8(saturation_bytemask)); + while (buf + 16 + safety_margin < end) { + __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); - if (saturation_bitmask == 0xffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + // Check if no bits set above 16th + if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { + // Pack UTF-32 to UTF-16 safely (without surrogate pairs) + // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); - // Check for illegal surrogate code units - const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); - const __m128i forbidden_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800); - if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - utf8_output); + if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), + utf16_packed))) { // ASCII fast path!!!! + // 1. pack the bytes + // obviously suboptimal. + __m256i utf8_packed = __lasx_xvpermi_d( + __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); + // 2. store (8 bytes) + __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); + // 3. adjust pointers + buf += 16; + utf8_output += 16; + continue; // we are done for this round! } - const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); + if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { + // 1. prepare 2-byte values + // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 + // expected output : [110a|aaaa|10bb|bbbb] x 8 - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m128i s0 = _mm_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); - const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m128i s4 = _mm_xor_si128(s3, m0); -#undef simdutf_vec + // t0 = [000a|aaaa|bbbb|bb00] + const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); + // t1 = [000a|aaaa|0000|0000] + const __m256i t1 = __lasx_xvand_v(t0, __lasx_xvldi(-2785 /*0x1f00*/)); + // t2 = [0000|0000|00bb|bbbb] + const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); + // t3 = [000a|aaaa|00bb|bbbb] + const __m256i t3 = __lasx_xvor_v(t1, t2); + // t4 = [110a|aaaa|10bb|bbbb] + const __m256i t4 = __lasx_xvor_v(t3, v_c080); + // 2. merge ASCII and 2-byte codewords + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); + __m256i utf8_unpacked = + __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); + // 3. prepare bitmask for 8-bit lookup + __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); + uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); + uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); + // 4. pack the bytes + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m1]][0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_packed1 = __lsx_vshuf_b( + zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); + + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes + [lasx_1_2_utf8_bytes_mask[m2]][0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_packed2 = __lsx_vshuf_b( + zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); + // 5. store bytes + __lsx_vst(utf8_packed1, utf8_output, 0); + utf8_output += row1[0]; - // 4. expand code units 16-bit => 32-bit - const __m128i out0 = _mm_unpacklo_epi16(t2, s4); - const __m128i out1 = _mm_unpackhi_epi16(t2, s4); + __lsx_vst(utf8_packed2, utf8_output, 0); + utf8_output += row2[0]; - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16_t mask = - (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); - if (mask == 0) { - // We only have three-byte code units. Use fast path. - const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, - 15, 13, -1, -1, -1, -1); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += 12; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += 12; - buf += 8; + buf += 16; continue; - } - const uint8_t mask0 = uint8_t(mask); + } else { + // case: code units from register produce either 1, 2 or 3 UTF-8 bytes + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + if (__lasx_xbnz_v(forbidden_bytemask)) { + return std::make_pair(result(error_code::SURROGATE, buf - start), + reinterpret_cast(utf8_output)); + } + /* In this branch we handle three cases: + 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - + single UFT-8 byte + 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - + two UTF-8 bytes + 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - + three UTF-8 bytes + + We expand the input word (16-bit) into two code units (32-bit), thus + we have room for four bytes. However, we need five distinct bit + layouts. Note that the last byte in cases #2 and #3 is the same. + + We precompute byte 1 for case #1 and the common byte for cases #2 & + #3 in register t2. + + We precompute byte 1 for case #3 and -- **conditionally** -- + precompute either byte 1 for case #2 or byte 2 for case #3. Note that + they differ by exactly one bit. + + Finally from these two code units we build proper UTF-8 sequence, + taking into account the case (i.e, the number of bytes to write). + */ + /** + * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: + * t2 => [0ccc|cccc] [10cc|cccc] + * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) + */ + // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] + __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); + t0 = __lasx_xvilvl_b(t0, t0); + // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] + __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); + __m256i t1 = __lasx_xvand_v(t0, v_3f7f); + // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] + __m256i t2 = __lasx_xvor_v(t1, __lasx_xvldi(-2688 /*0x8000*/)); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); + // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] + __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); + // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] + __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); + // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] + s1 = __lasx_xvand_v(s1, __lasx_xvldi(-2753 /*0x3F00*/)); + // [00bb|bbbb|0000|aaaa] + __m256i s2 = __lasx_xvor_v(s0, s1); + // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] + __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); + __m256i s3 = __lasx_xvor_v(s2, v_c0e0); + // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); + __m256i one_or_two_bytes_bytemask = + __lasx_xvsle_hu(utf16_packed, v_07ff); + __m256i m0 = __lasx_xvandn_v(one_or_two_bytes_bytemask, + __lasx_xvldi(-2752 /*0x4000*/)); + __m256i s4 = __lasx_xvxor_v(s3, m0); - const uint8_t mask1 = static_cast(mask >> 8); + // 4. expand code units 16-bit => 32-bit + __m256i out0 = __lasx_xvilvl_h(s4, t2); + __m256i out1 = __lasx_xvilvh_h(s4, t2); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); + // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle + __m256i one_byte_bytemask = + __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); + + __m256i one_or_two_bytes_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); + __m256i one_or_two_bytes_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); + + __m256i one_byte_bytemask_u16_to_u32_low = + __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); + __m256i one_byte_bytemask_u16_to_u32_high = + __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); + + __m256i mask0 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, + one_byte_bytemask_u16_to_u32_low)); + __m256i mask1 = __lasx_xvmskltz_h( + __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, + one_byte_bytemask_u16_to_u32_high)); + + uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); + const uint8_t *row0 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle0 = __lsx_vld(row0, 1); + __m128i utf8_0 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); + __lsx_vst(utf8_0, utf8_output, 0); + utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; + mask = __lasx_xvpickve2gr_wu(mask1, 0); + const uint8_t *row1 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle1 = __lsx_vld(row1, 1); + __m128i utf8_1 = + __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); + __lsx_vst(utf8_1, utf8_output, 0); + utf8_output += row1[0]; - buf += 8; + mask = __lasx_xvpickve2gr_wu(mask0, 4); + const uint8_t *row2 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle2 = __lsx_vld(row2, 1); + __m128i utf8_2 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); + __lsx_vst(utf8_2, utf8_output, 0); + utf8_output += row2[0]; + + mask = __lasx_xvpickve2gr_wu(mask1, 4); + const uint8_t *row3 = + &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] + [0]; + __m128i shuffle3 = __lsx_vld(row3, 1); + __m128i utf8_3 = + __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); + __lsx_vst(utf8_3, utf8_output, 0); + utf8_output += row3[0]; + + buf += 16; + } + // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> + // will produce four UTF-8 bytes. } else { - // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD in the - // presence of surrogate pairs may require non-trivial tables. + // Let us do a scalar fallback. + // It may seem wasteful to use scalar code, but being efficient with SIMD + // in the presence of surrogate pairs may require non-trivial tables. size_t forward = 15; size_t k = 0; if (size_t(end - buf) < forward + 1) { @@ -37564,7 +52580,8 @@ sse_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, } else if ((word & 0xFFFF0000) == 0) { if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf8_output); + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf8_output)); } *utf8_output++ = char((word >> 12) | 0b11100000); *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); @@ -37572,7 +52589,8 @@ sse_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, } else { if (word > 0x10FFFF) { return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf8_output); + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf8_output)); } *utf8_output++ = char((word >> 18) | 0b11110000); *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); @@ -37583,51 +52601,76 @@ sse_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, buf += k; } } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); + + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf8_output)); } -/* end file src/westmere/sse_convert_utf32_to_utf8.cpp */ -/* begin file src/westmere/sse_convert_utf32_to_utf16.cpp */ +/* end file src/lasx/lasx_convert_utf32_to_utf8.cpp */ +/* begin file src/lasx/lasx_convert_utf32_to_utf16.cpp */ template std::pair -sse_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_output) { - +lasx_convert_utf32_to_utf16(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); const char32_t *end = buf + len; - const __m128i v_0000 = _mm_setzero_si128(); - const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); - __m128i forbidden_bytemask = _mm_setzero_si128(); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + uint32_t word = *buf++; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + // buf++; + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + // buf++; + } + } - while (end - buf >= 8) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - const __m128i saturation_bytemask = _mm_cmpeq_epi32( - _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm_movemask_epi8(saturation_bytemask)); + __m256i forbidden_bytemask = __lasx_xvrepli_h(0); + __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ + __m256i v_dfff = __lasx_xvreplgr2vr_h(uint16_t(0xdfff)); + while (buf + 16 <= end) { + __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); // Check if no bits set above 16th - if (saturation_bitmask == 0xffff) { - // Pack UTF-32 to UTF-16 - __m128i utf16_packed = _mm_packus_epi32(in, nextin); - - const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); - forbidden_bytemask = _mm_or_si128( - forbidden_bytemask, - _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800)); + if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); + if (!match_system(big_endian)) { + utf16_packed = lasx_swap_bytes(utf16_packed); } - - _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; + __lasx_xvst(utf16_packed, utf16_output, 0); + utf16_output += 16; + buf += 16; } else { - size_t forward = 7; + size_t forward = 15; size_t k = 0; if (size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1); @@ -37637,25 +52680,25 @@ sse_convert_utf32_to_utf16(const char32_t *buf, size_t len, if ((word & 0xFFFF0000) == 0) { // will not generate a surrogate pair if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, utf16_output); + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); } - *utf16_output++ = - big_endian - ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) - : char16_t(word); + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); } else { // will generate a surrogate pair if (word > 0x10FFFF) { - return std::make_pair(nullptr, utf16_output); + return std::make_pair(nullptr, + reinterpret_cast(utf16_output)); } word -= 0x10000; uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (big_endian) { + if (!match_system(big_endian)) { high_surrogate = - uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); - low_surrogate = - uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); } *utf16_output++ = char16_t(high_surrogate); *utf16_output++ = char16_t(low_surrogate); @@ -37666,56 +52709,80 @@ sse_convert_utf32_to_utf16(const char32_t *buf, size_t len, } // check for invalid input - if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(nullptr, utf16_output); + if (__lasx_xbnz_v(forbidden_bytemask)) { + return std::make_pair(nullptr, reinterpret_cast(utf16_output)); } - - return std::make_pair(buf, utf16_output); + return std::make_pair(buf, reinterpret_cast(utf16_output)); } template std::pair -sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_output) { +lasx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, + char16_t *utf16_out) { + uint16_t *utf16_output = reinterpret_cast(utf16_out); const char32_t *start = buf; const char32_t *end = buf + len; - const __m128i v_0000 = _mm_setzero_si128(); - const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)utf16_output & 0x1F) && buf < end) { + uint32_t word = *buf++; + if ((word & 0xFFFF0000) == 0) { + // will not generate a surrogate pair + if (word >= 0xD800 && word <= 0xDFFF) { + return std::make_pair(result(error_code::SURROGATE, buf - start - 1), + reinterpret_cast(utf16_output)); + } + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); + } else { + // will generate a surrogate pair + if (word > 0x10FFFF) { + return std::make_pair(result(error_code::TOO_LARGE, buf - start - 1), + reinterpret_cast(utf16_output)); + } + word -= 0x10000; + uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); + uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); + if (!match_system(big_endian)) { + high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); + } + *utf16_output++ = char16_t(high_surrogate); + *utf16_output++ = char16_t(low_surrogate); + } + } - while (end - buf >= 8) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - const __m128i saturation_bytemask = _mm_cmpeq_epi32( - _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm_movemask_epi8(saturation_bytemask)); + __m256i forbidden_bytemask = __lasx_xvrepli_h(0); + __m256i v_d800 = __lasx_xvldi(-2600); /*0xD800*/ + __m256i v_dfff = __lasx_xvreplgr2vr_h(uint16_t(0xdfff)); + while (buf + 16 <= end) { + __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); + __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); // Check if no bits set above 16th - if (saturation_bitmask == 0xffff) { - // Pack UTF-32 to UTF-16 - __m128i utf16_packed = _mm_packus_epi32(in, nextin); - - const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); - const __m128i forbidden_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800); - if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { + if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { + __m256i utf16_packed = + __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); + forbidden_bytemask = __lasx_xvor_v( + __lasx_xvand_v( + __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff + __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 + forbidden_bytemask); + if (__lasx_xbnz_v(forbidden_bytemask)) { return std::make_pair(result(error_code::SURROGATE, buf - start), - utf16_output); + reinterpret_cast(utf16_output)); } - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); + if (!match_system(big_endian)) { + utf16_packed = lasx_swap_bytes(utf16_packed); } - _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; + __lasx_xvst(utf16_packed, utf16_output, 0); + utf16_output += 16; + buf += 16; } else { - size_t forward = 7; + size_t forward = 15; size_t k = 0; if (size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1); @@ -37726,26 +52793,26 @@ sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, // will not generate a surrogate pair if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf16_output); + result(error_code::SURROGATE, buf - start + k), + reinterpret_cast(utf16_output)); } - *utf16_output++ = - big_endian - ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) - : char16_t(word); + *utf16_output++ = !match_system(big_endian) + ? char16_t(word >> 8 | word << 8) + : char16_t(word); } else { // will generate a surrogate pair if (word > 0x10FFFF) { return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf16_output); + result(error_code::TOO_LARGE, buf - start + k), + reinterpret_cast(utf16_output)); } word -= 0x10000; uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (big_endian) { + if (!match_system(big_endian)) { high_surrogate = - uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); - low_surrogate = - uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); + uint16_t(high_surrogate >> 8 | high_surrogate << 8); + low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); } *utf16_output++ = char16_t(high_surrogate); *utf16_output++ = char16_t(low_surrogate); @@ -37755,10 +52822,11 @@ sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, } } - return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); + return std::make_pair(result(error_code::SUCCESS, buf - start), + reinterpret_cast(utf16_output)); } -/* end file src/westmere/sse_convert_utf32_to_utf16.cpp */ -/* begin file src/westmere/sse_base64.cpp */ +/* end file src/lasx/lasx_convert_utf32_to_utf16.cpp */ +/* begin file src/lasx/lasx_base64.cpp */ /** * References and further reading: * @@ -37786,36 +52854,6 @@ sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, * Nick Kopp. 2013. Base64 Encoding on a GPU. * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). */ -template __m128i lookup_pshufb_improved(const __m128i input) { - // credit: Wojciech Muła - // reduce 0..51 -> 0 - // 52..61 -> 1 .. 10 - // 62 -> 11 - // 63 -> 12 - __m128i result = _mm_subs_epu8(input, _mm_set1_epi8(51)); - - // distinguish between ranges 0..25 and 26..51: - // 0 .. 25 -> remains 0 - // 26 .. 51 -> becomes 13 - const __m128i less = _mm_cmpgt_epi8(_mm_set1_epi8(26), input); - result = _mm_or_si128(result, _mm_and_si128(less, _mm_set1_epi8(13))); - - __m128i shift_LUT; - if (base64_url) { - shift_LUT = _mm_setr_epi8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0); - } else { - shift_LUT = _mm_setr_epi8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); - } - - // read shift - result = _mm_shuffle_epi8(shift_LUT, result); - - return _mm_add_epi8(result, input); -} template size_t encode_base64(char *dst, const char *src, size_t srclen, @@ -37823,71 +52861,124 @@ size_t encode_base64(char *dst, const char *src, size_t srclen, // credit: Wojciech Muła // SSE (lookup: pshufb improved unrolled) const uint8_t *input = (const uint8_t *)src; - + static const char *lookup_tbl = + isbase64url + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; uint8_t *out = (uint8_t *)dst; - const __m128i shuf = - _mm_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1); + v32u8 shuf; + __m256i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, + base64_tbl2, base64_tbl3; + if (srclen >= 28) { + shuf = v32u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10, + 1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; + + v_fc0fc00 = __lasx_xvreplgr2vr_w(uint32_t(0x0fc0fc00)); + v_3f03f0 = __lasx_xvreplgr2vr_w(uint32_t(0x003f03f0)); + shift_r = __lasx_xvreplgr2vr_w(uint32_t(0x0006000a)); + shift_l = __lasx_xvreplgr2vr_w(uint32_t(0x00080004)); + base64_tbl0 = ____m256i(__lsx_vld(lookup_tbl, 0)); + base64_tbl1 = ____m256i(__lsx_vld(lookup_tbl, 16)); + base64_tbl2 = ____m256i(__lsx_vld(lookup_tbl, 32)); + base64_tbl3 = ____m256i(__lsx_vld(lookup_tbl, 48)); + } size_t i = 0; - for (; i + 52 <= srclen; i += 48) { - __m128i in0 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 0)); - __m128i in1 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 1)); - __m128i in2 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 2)); - __m128i in3 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 3)); - - in0 = _mm_shuffle_epi8(in0, shuf); - in1 = _mm_shuffle_epi8(in1, shuf); - in2 = _mm_shuffle_epi8(in2, shuf); - in3 = _mm_shuffle_epi8(in3, shuf); - - const __m128i t0_0 = _mm_and_si128(in0, _mm_set1_epi32(0x0fc0fc00)); - const __m128i t0_1 = _mm_and_si128(in1, _mm_set1_epi32(0x0fc0fc00)); - const __m128i t0_2 = _mm_and_si128(in2, _mm_set1_epi32(0x0fc0fc00)); - const __m128i t0_3 = _mm_and_si128(in3, _mm_set1_epi32(0x0fc0fc00)); - - const __m128i t1_0 = _mm_mulhi_epu16(t0_0, _mm_set1_epi32(0x04000040)); - const __m128i t1_1 = _mm_mulhi_epu16(t0_1, _mm_set1_epi32(0x04000040)); - const __m128i t1_2 = _mm_mulhi_epu16(t0_2, _mm_set1_epi32(0x04000040)); - const __m128i t1_3 = _mm_mulhi_epu16(t0_3, _mm_set1_epi32(0x04000040)); - - const __m128i t2_0 = _mm_and_si128(in0, _mm_set1_epi32(0x003f03f0)); - const __m128i t2_1 = _mm_and_si128(in1, _mm_set1_epi32(0x003f03f0)); - const __m128i t2_2 = _mm_and_si128(in2, _mm_set1_epi32(0x003f03f0)); - const __m128i t2_3 = _mm_and_si128(in3, _mm_set1_epi32(0x003f03f0)); - - const __m128i t3_0 = _mm_mullo_epi16(t2_0, _mm_set1_epi32(0x01000010)); - const __m128i t3_1 = _mm_mullo_epi16(t2_1, _mm_set1_epi32(0x01000010)); - const __m128i t3_2 = _mm_mullo_epi16(t2_2, _mm_set1_epi32(0x01000010)); - const __m128i t3_3 = _mm_mullo_epi16(t2_3, _mm_set1_epi32(0x01000010)); - - const __m128i input0 = _mm_or_si128(t1_0, t3_0); - const __m128i input1 = _mm_or_si128(t1_1, t3_1); - const __m128i input2 = _mm_or_si128(t1_2, t3_2); - const __m128i input3 = _mm_or_si128(t1_3, t3_3); - - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(input0)); - out += 16; + for (; i + 100 <= srclen; i += 96) { + __m128i in0_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 0); + __m128i in0_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); + __m128i in1_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 2); + __m128i in1_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 3); + __m128i in2_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 4); + __m128i in2_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 5); + __m128i in3_lo = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 6); + __m128i in3_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 7); + + __m256i in0 = lasx_set_q(in0_hi, in0_lo); + __m256i in1 = lasx_set_q(in1_hi, in1_lo); + __m256i in2 = lasx_set_q(in2_hi, in2_lo); + __m256i in3 = lasx_set_q(in3_hi, in3_lo); + + in0 = __lasx_xvshuf_b(in0, in0, (__m256i)shuf); + in1 = __lasx_xvshuf_b(in1, in1, (__m256i)shuf); + in2 = __lasx_xvshuf_b(in2, in2, (__m256i)shuf); + in3 = __lasx_xvshuf_b(in3, in3, (__m256i)shuf); + + __m256i t0_0 = __lasx_xvand_v(in0, v_fc0fc00); + __m256i t0_1 = __lasx_xvand_v(in1, v_fc0fc00); + __m256i t0_2 = __lasx_xvand_v(in2, v_fc0fc00); + __m256i t0_3 = __lasx_xvand_v(in3, v_fc0fc00); + + __m256i t1_0 = __lasx_xvsrl_h(t0_0, shift_r); + __m256i t1_1 = __lasx_xvsrl_h(t0_1, shift_r); + __m256i t1_2 = __lasx_xvsrl_h(t0_2, shift_r); + __m256i t1_3 = __lasx_xvsrl_h(t0_3, shift_r); + + __m256i t2_0 = __lasx_xvand_v(in0, v_3f03f0); + __m256i t2_1 = __lasx_xvand_v(in1, v_3f03f0); + __m256i t2_2 = __lasx_xvand_v(in2, v_3f03f0); + __m256i t2_3 = __lasx_xvand_v(in3, v_3f03f0); + + __m256i t3_0 = __lasx_xvsll_h(t2_0, shift_l); + __m256i t3_1 = __lasx_xvsll_h(t2_1, shift_l); + __m256i t3_2 = __lasx_xvsll_h(t2_2, shift_l); + __m256i t3_3 = __lasx_xvsll_h(t2_3, shift_l); + + __m256i input0 = __lasx_xvor_v(t1_0, t3_0); + __m256i input0_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input0); + __m256i input0_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input0, __lasx_xvldi(32))); + __m256i input0_mask = __lasx_xvslei_bu(input0, 31); + __m256i input0_result = + __lasx_xvbitsel_v(input0_shuf1, input0_shuf0, input0_mask); + __lasx_xvst(input0_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(input1)); - out += 16; + __m256i input1 = __lasx_xvor_v(t1_1, t3_1); + __m256i input1_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input1); + __m256i input1_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input1, __lasx_xvldi(32))); + __m256i input1_mask = __lasx_xvslei_bu(input1, 31); + __m256i input1_result = + __lasx_xvbitsel_v(input1_shuf1, input1_shuf0, input1_mask); + __lasx_xvst(input1_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(input2)); - out += 16; + __m256i input2 = __lasx_xvor_v(t1_2, t3_2); + __m256i input2_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input2); + __m256i input2_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input2, __lasx_xvldi(32))); + __m256i input2_mask = __lasx_xvslei_bu(input2, 31); + __m256i input2_result = + __lasx_xvbitsel_v(input2_shuf1, input2_shuf0, input2_mask); + __lasx_xvst(input2_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(input3)); - out += 16; + __m256i input3 = __lasx_xvor_v(t1_3, t3_3); + __m256i input3_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input3); + __m256i input3_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(input3, __lasx_xvldi(32))); + __m256i input3_mask = __lasx_xvslei_bu(input3, 31); + __m256i input3_result = + __lasx_xvbitsel_v(input3_shuf1, input3_shuf0, input3_mask); + __lasx_xvst(input3_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; } - for (; i + 16 <= srclen; i += 12) { + for (; i + 28 <= srclen; i += 24) { - __m128i in = _mm_loadu_si128(reinterpret_cast(input + i)); + __m128i in_lo = __lsx_vld(reinterpret_cast(input + i), 0); + __m128i in_hi = + __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); + + __m256i in = lasx_set_q(in_hi, in_lo); // bytes from groups A, B and C are needed in separate 32-bit lanes // in = [DDDD|CCCC|BBBB|AAAA] @@ -37901,40 +52992,43 @@ size_t encode_base64(char *dst, const char *src, size_t srclen, // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ // processed bits - in = _mm_shuffle_epi8(in, shuf); + in = __lasx_xvshuf_b(in, in, (__m256i)shuf); // unpacking - // t0 = [0000cccc|cc000000|aaaaaa00|00000000] - const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); + __m256i t0 = __lasx_xvand_v(in, v_fc0fc00); // t1 = [00000000|00cccccc|00000000|00aaaaaa] - // (c * (1 << 10), a * (1 << 6)) >> 16 (note: an unsigned - // multiplication) - const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); + // ((c >> 6), (a >> 10)) + __m256i t1 = __lasx_xvsrl_h(t0, shift_r); // t2 = [00000000|00dddddd|000000bb|bbbb0000] - const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); - // t3 = [00dddddd|00000000|00bbbbbb|00000000]( - // (d * (1 << 8), b * (1 << 4)) - const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); + __m256i t2 = __lasx_xvand_v(in, v_3f03f0); + // t3 = [00dddddd|00000000|00bbbbbb|00000000] + // ((d << 8), (b << 4)) + __m256i t3 = __lasx_xvsll_h(t2, shift_l); // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 - const __m128i indices = _mm_or_si128(t1, t3); - - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(indices)); - out += 16; + __m256i indices = __lasx_xvor_v(t1, t3); + + __m256i indices_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, indices); + __m256i indices_shuf1 = __lasx_xvshuf_b( + base64_tbl3, base64_tbl2, __lasx_xvsub_b(indices, __lasx_xvldi(32))); + __m256i indices_mask = __lasx_xvslei_bu(indices, 31); + __m256i indices_result = + __lasx_xvbitsel_v(indices_shuf1, indices_shuf0, indices_mask); + __lasx_xvst(indices_result, reinterpret_cast<__m256i *>(out), 0); + out += 32; } return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, srclen - i, options); } + static inline void compress(__m128i data, uint16_t mask, char *output) { if (mask == 0) { - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data); + __lsx_vst(data, reinterpret_cast<__m128i *>(output), 0); return; } - // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits @@ -37943,13 +53037,15 @@ static inline void compress(__m128i data, uint16_t mask, char *output) { // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(tables::base64::thintable_epi8[mask2], - tables::base64::thintable_epi8[mask1]); + v2u64 shufmask = {tables::base64::thintable_epi8[mask1], + tables::base64::thintable_epi8[mask2]}; + // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + const v4u32 hi = {0, 0, 0x08080808, 0x08080808}; + __m128i shufmask1 = __lsx_vadd_b((__m128i)shufmask, (__m128i)hi); + // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(data, shufmask); + __m128i pruned = __lsx_vshuf_b(data, data, shufmask1); // we still need to put the two halves together. // we compute the popcount of the first half: int pop1 = tables::base64::BitsSetTable256mul2[mask1]; @@ -37957,88 +53053,96 @@ static inline void compress(__m128i data, uint16_t mask, char *output) { // only the first pop1 bytes from the first 8 bytes, and then // it fills in with the bytes from the second 8 bytes + some filling // at the end. - __m128i compactmask = _mm_loadu_si128(reinterpret_cast( - tables::base64::pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + __m128i compactmask = + __lsx_vld(reinterpret_cast( + tables::base64::pshufb_combine_table + pop1 * 8), + 0); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + + __lsx_vst(answer, reinterpret_cast<__m128i *>(output), 0); } struct block64 { - __m128i chunks[4]; + __m256i chunks[2]; }; template -static inline uint16_t to_base64_mask(__m128i *src, bool *error) { - const __m128i ascii_space_tbl = - _mm_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, - 0xc, 0xd, 0x0, 0x0); +static inline uint32_t to_base64_mask(__m256i *src, bool *error) { + __m256i ascii_space_tbl = + ____m256i((__m128i)v16u8{0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}); // credit: aqrit - __m128i delta_asso; + __m256i delta_asso = + ____m256i((__m128i)v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, + 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}); + __m256i delta_values; if (base64_url) { - delta_asso = _mm_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, - 0x0, 0x0, 0x0, 0xF, 0x0, 0xF); + delta_values = ____m256i( + (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), + int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}); } else { - - delta_asso = _mm_setr_epi8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); + delta_values = ____m256i( + (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), + int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), + int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), + int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}); } - __m128i delta_values; - if (base64_url) { - delta_values = _mm_setr_epi8(0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), - uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), - 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), - uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); - } else { - delta_values = - _mm_setr_epi8(int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)); - } - __m128i check_asso; + __m256i check_asso; if (base64_url) { - check_asso = _mm_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x3, 0x7, 0xB, 0xE, 0xB, 0x6); + check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, + 0x0B, 0x06, 0x0B, 0x12}); } else { - - check_asso = _mm_setr_epi8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); + check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, + 0x0B, 0x0B, 0x0B, 0x0F}); } - __m128i check_values; + + __m256i check_values; if (base64_url) { - check_values = _mm_setr_epi8(uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), - uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), - uint8_t(0xB6), uint8_t(0xA6), uint8_t(0xB5), - uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, - uint8_t(0x80), 0x0, uint8_t(0x80)); + check_values = ____m256i( + (__m128i)v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), + int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}); } else { - - check_values = - _mm_setr_epi8(int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), - int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)); - } - const __m128i shifted = _mm_srli_epi32(*src, 3); - - const __m128i delta_hash = - _mm_avg_epu8(_mm_shuffle_epi8(delta_asso, *src), shifted); - const __m128i check_hash = - _mm_avg_epu8(_mm_shuffle_epi8(check_asso, *src), shifted); - - const __m128i out = - _mm_adds_epi8(_mm_shuffle_epi8(delta_values, delta_hash), *src); - const __m128i chk = - _mm_adds_epi8(_mm_shuffle_epi8(check_values, check_hash), *src); - const int mask = _mm_movemask_epi8(chk); + check_values = ____m256i( + (__m128i)v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), + int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), + int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), + int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}); + } + + __m256i shifted = __lasx_xvsrli_b(*src, 3); + __m256i asso_index = __lasx_xvand_v(*src, __lasx_xvldi(0xF)); + __m256i delta_hash = __lasx_xvavgr_bu( + __lasx_xvshuf_b(delta_asso, delta_asso, asso_index), shifted); + __m256i check_hash = __lasx_xvavgr_bu( + __lasx_xvshuf_b(check_asso, check_asso, asso_index), shifted); + + __m256i out = __lasx_xvsadd_b( + __lasx_xvshuf_b(delta_values, delta_values, delta_hash), *src); + __m256i chk = __lasx_xvsadd_b( + __lasx_xvshuf_b(check_values, check_values, check_hash), *src); + __m256i chk_ltz = __lasx_xvmskltz_b(chk); + unsigned int mask = __lasx_xvpickve2gr_wu(chk_ltz, 0); + mask = mask | (__lsx_vpickve2gr_hu(lasx_extracti128_hi(chk_ltz), 0) << 16); if (mask) { - __m128i ascii_space = - _mm_cmpeq_epi8(_mm_shuffle_epi8(ascii_space_tbl, *src), *src); - *error |= (mask != _mm_movemask_epi8(ascii_space)); + __m256i ascii_space = __lasx_xvseq_b( + __lasx_xvshuf_b(ascii_space_tbl, ascii_space_tbl, asso_index), *src); + __m256i ascii_space_ltz = __lasx_xvmskltz_b(ascii_space); + unsigned int ascii_space_mask = __lasx_xvpickve2gr_wu(ascii_space_ltz, 0); + ascii_space_mask = + ascii_space_mask | + (__lsx_vpickve2gr_hu(lasx_extracti128_hi(ascii_space_ltz), 0) << 16); + *error |= (mask != ascii_space_mask); } + *src = out; - return (uint16_t)mask; + return (uint32_t)mask; } template @@ -38046,103 +53150,88 @@ static inline uint64_t to_base64_mask(block64 *b, bool *error) { *error = 0; uint64_t m0 = to_base64_mask(&b->chunks[0], error); uint64_t m1 = to_base64_mask(&b->chunks[1], error); - uint64_t m2 = to_base64_mask(&b->chunks[2], error); - uint64_t m3 = to_base64_mask(&b->chunks[3], error); - return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); + return m0 | (m1 << 32); } static inline void copy_block(block64 *b, char *output) { - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), b->chunks[0]); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 16), b->chunks[1]); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 32), b->chunks[2]); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 48), b->chunks[3]); + __lasx_xvst(b->chunks[0], reinterpret_cast<__m256i *>(output), 0); + __lasx_xvst(b->chunks[1], reinterpret_cast<__m256i *>(output), 32); } static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { uint64_t nmask = ~mask; - compress(b->chunks[0], uint16_t(mask), output); - compress(b->chunks[1], uint16_t(mask >> 16), - output + _mm_popcnt_u64(nmask & 0xFFFF)); - compress(b->chunks[2], uint16_t(mask >> 32), - output + _mm_popcnt_u64(nmask & 0xFFFFFFFF)); - compress(b->chunks[3], uint16_t(mask >> 48), - output + _mm_popcnt_u64(nmask & 0xFFFFFFFFFFFFULL)); - return _mm_popcnt_u64(nmask); + uint64_t count = + __lsx_vpickve2gr_d(__lsx_vpcnt_h(__lsx_vreplgr2vr_d(nmask)), 0); + uint16_t *count_ptr = (uint16_t *)&count; + compress(lasx_extracti128_lo(b->chunks[0]), uint16_t(mask), output); + compress(lasx_extracti128_hi(b->chunks[0]), uint16_t(mask >> 16), + output + count_ptr[0]); + compress(lasx_extracti128_lo(b->chunks[1]), uint16_t(mask >> 32), + output + count_ptr[0] + count_ptr[1]); + compress(lasx_extracti128_hi(b->chunks[1]), uint16_t(mask >> 48), + output + count_ptr[0] + count_ptr[1] + count_ptr[2]); + return count_ones(nmask); } // The caller of this function is responsible to ensure that there are 64 bytes // available from reading at src. The data is read into a block64 structure. static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = _mm_loadu_si128(reinterpret_cast(src)); - b->chunks[1] = _mm_loadu_si128(reinterpret_cast(src + 16)); - b->chunks[2] = _mm_loadu_si128(reinterpret_cast(src + 32)); - b->chunks[3] = _mm_loadu_si128(reinterpret_cast(src + 48)); + b->chunks[0] = __lasx_xvld(reinterpret_cast(src), 0); + b->chunks[1] = __lasx_xvld(reinterpret_cast(src), 32); } // The caller of this function is responsible to ensure that there are 128 bytes // available from reading at src. The data is read into a block64 structure. static inline void load_block(block64 *b, const char16_t *src) { - __m128i m1 = _mm_loadu_si128(reinterpret_cast(src)); - __m128i m2 = _mm_loadu_si128(reinterpret_cast(src + 8)); - __m128i m3 = _mm_loadu_si128(reinterpret_cast(src + 16)); - __m128i m4 = _mm_loadu_si128(reinterpret_cast(src + 24)); - __m128i m5 = _mm_loadu_si128(reinterpret_cast(src + 32)); - __m128i m6 = _mm_loadu_si128(reinterpret_cast(src + 40)); - __m128i m7 = _mm_loadu_si128(reinterpret_cast(src + 48)); - __m128i m8 = _mm_loadu_si128(reinterpret_cast(src + 56)); - b->chunks[0] = _mm_packus_epi16(m1, m2); - b->chunks[1] = _mm_packus_epi16(m3, m4); - b->chunks[2] = _mm_packus_epi16(m5, m6); - b->chunks[3] = _mm_packus_epi16(m7, m8); + __m256i m1 = __lasx_xvld(reinterpret_cast(src), 0); + __m256i m2 = __lasx_xvld(reinterpret_cast(src), 32); + __m256i m3 = __lasx_xvld(reinterpret_cast(src), 64); + __m256i m4 = __lasx_xvld(reinterpret_cast(src), 96); + b->chunks[0] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m2, m1, 0), 0b11011000); + b->chunks[1] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m4, m3, 0), 0b11011000); } -static inline void base64_decode(char *out, __m128i str) { - // credit: aqrit - - const __m128i pack_shuffle = - _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); +static inline void base64_decode(char *out, __m256i str) { + __m256i t0 = __lasx_xvor_v( + __lasx_xvslli_w(str, 26), + __lasx_xvslli_w(__lasx_xvand_v(str, __lasx_xvldi(-1758 /*0x0000FF00*/)), + 12)); + __m256i t1 = __lasx_xvsrli_w( + __lasx_xvand_v(str, __lasx_xvldi(-3521 /*0x003F0000*/)), 2); + __m256i t2 = __lasx_xvor_v(t0, t1); + __m256i t3 = __lasx_xvor_v(t2, __lasx_xvsrli_w(str, 16)); + __m256i pack_shuffle = ____m256i( + (__m128i)v16u8{3, 2, 1, 7, 6, 5, 11, 10, 9, 15, 14, 13, 0, 0, 0, 0}); + t3 = __lasx_xvshuf_b(t3, t3, (__m256i)pack_shuffle); - const __m128i t0 = _mm_maddubs_epi16(str, _mm_set1_epi32(0x01400140)); - const __m128i t1 = _mm_madd_epi16(t0, _mm_set1_epi32(0x00011000)); - const __m128i t2 = _mm_shuffle_epi8(t1, pack_shuffle); // Store the output: - // this writes 16 bytes, but we only need 12. - _mm_storeu_si128((__m128i *)out, t2); + __lsx_vst(lasx_extracti128_lo(t3), out, 0); + __lsx_vst(lasx_extracti128_hi(t3), out, 12); } // decode 64 bytes and output 48 bytes static inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); - base64_decode(out + 12, - _mm_loadu_si128(reinterpret_cast(src + 16))); + base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); base64_decode(out + 24, - _mm_loadu_si128(reinterpret_cast(src + 32))); - base64_decode(out + 36, - _mm_loadu_si128(reinterpret_cast(src + 48))); + __lasx_xvld(reinterpret_cast(src), 32)); } + static inline void base64_decode_block_safe(char *out, const char *src) { - base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); - base64_decode(out + 12, - _mm_loadu_si128(reinterpret_cast(src + 16))); - base64_decode(out + 24, - _mm_loadu_si128(reinterpret_cast(src + 32))); - char buffer[16]; + base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); + char buffer[32]; base64_decode(buffer, - _mm_loadu_si128(reinterpret_cast(src + 48))); - std::memcpy(out + 36, buffer, 12); + __lasx_xvld(reinterpret_cast(src), 32)); + std::memcpy(out + 24, buffer, 24); } + static inline void base64_decode_block(char *out, block64 *b) { base64_decode(out, b->chunks[0]); - base64_decode(out + 12, b->chunks[1]); - base64_decode(out + 24, b->chunks[2]); - base64_decode(out + 36, b->chunks[3]); + base64_decode(out + 24, b->chunks[1]); } static inline void base64_decode_block_safe(char *out, block64 *b) { base64_decode(out, b->chunks[0]); - base64_decode(out + 12, b->chunks[1]); - base64_decode(out + 24, b->chunks[2]); - char buffer[16]; - base64_decode(buffer, b->chunks[3]); - std::memcpy(out + 36, buffer, 12); + char buffer[32]; + base64_decode(buffer, b->chunks[1]); + std::memcpy(out + 24, buffer, 24); } template @@ -38189,7 +53278,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, const chartype *const srcend = src + srclen; constexpr size_t block_size = 6; - static_assert(block_size >= 2, "block should of size 2 or more"); + static_assert(block_size >= 2, "block_size must be at least two"); char buffer[block_size * 64]; char *bufferptr = buffer; if (srclen >= 64) { @@ -38248,6 +53337,7 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, // time, otherwise, we should just decode directly. int last_block = (int)((bufferptr - buffer_start) % 64); if (last_block != 0 && srcend - src + last_block >= 64) { + while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { uint8_t val = to_base64[uint8_t(*src)]; *bufferptr = char(val); @@ -38307,9 +53397,9 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, if (src < srcend + equalsigns) { full_result r = scalar::base64::base64_tail_decode( dst, src, srcend - src, equalsigns, options, last_chunk_options); + r.input_count += size_t(src - srcinit); if (r.error == error_code::INVALID_BASE64_CHARACTER || r.error == error_code::BASE64_EXTRA_BITS) { - r.input_count += size_t(src - srcinit); return r; } else { r.output_count += size_t(dst - dstinit); @@ -38333,15 +53423,15 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } return {SUCCESS, srclen, size_t(dst - dstinit)}; } -/* end file src/westmere/sse_base64.cpp */ +/* end file src/lasx/lasx_base64.cpp */ -} // unnamed namespace -} // namespace westmere +} // namespace +} // namespace lasx } // namespace simdutf /* begin file src/generic/buf_block_reader.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { // Walks through a buffer in block-sized increments, loading the last part with @@ -38447,12 +53537,12 @@ simdutf_really_inline void buf_block_reader::advance() { } } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdutf /* end file src/generic/buf_block_reader.h */ /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { namespace utf8_validation { @@ -38672,12 +53762,12 @@ struct utf8_checker { using utf8_validation::utf8_checker; } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdutf /* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ /* begin file src/generic/utf8_validation/utf8_validator.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { namespace utf8_validation { @@ -38806,103 +53896,31 @@ result generic_validate_ascii_with_errors(const uint8_t *input, size_t length) { } result generic_validate_ascii_with_errors(const char *input, size_t length) { - return generic_validate_ascii_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -// transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ - -namespace simdutf { -namespace westmere { -namespace { -namespace utf8_to_utf16 { - -using namespace simd; - -template -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char16_t *utf16_output) noexcept { - // The implementation is not specific to haswell and should be moved to the - // generic directory. - size_t pos = 0; - char16_t *start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - // this loop could be unrolled further. For example, we could process the - // mask far more than 64 bytes. - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // Slow path. We hope that the compiler will recognize that this is a slow - // path. Anything that is not a continuation mask is a 'leading byte', - // that is, the start of a new code point. - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - // The *start* of code points is not so useful, rather, we want the *end* - // of code points. - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times when using solely - // the slow/regular path, and at least four times if there are fast paths. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - // - // Thus we may allow convert_masked_utf8_to_utf16 to process - // more bytes at a time under a fast-path mode where 16 bytes - // are consumed at once (e.g., when encountering ASCII). - size_t consumed = convert_masked_utf8_to_utf16( - input + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - utf16_output += scalar::utf8_to_utf16::convert_valid( - input + pos, size - pos, utf16_output); - return utf16_output - start; + return generic_validate_ascii_with_errors( + reinterpret_cast(input), length); } -} // namespace utf8_to_utf16 +} // namespace utf8_validation } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdutf -/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +/* end file src/generic/utf8_validation/utf8_validator.h */ + +// transcoding from UTF-8 to Latin 1 +/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { -namespace utf8_to_utf16 { +namespace utf8_to_latin1 { using namespace simd; simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + // For UTF-8 to Latin 1, we can allow any ASCII character, and any + // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or + // 0b11000010 and nothing else. + // // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -38929,6 +53947,7 @@ check_special_cases(const simd8 input, const simd8 prev1) { // 1111011_ 1000____ // 11111___ 1000____ constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ + constexpr const uint8_t FORBIDDEN = 0xff; const simd8 byte_1_high = prev1.shr<4>().lookup_16( // 0_______ ________ @@ -38939,11 +53958,11 @@ check_special_cases(const simd8 input, const simd8 prev1) { // 1100____ ________ TOO_SHORT | OVERLONG_2, // 1101____ ________ - TOO_SHORT, + FORBIDDEN, // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, + FORBIDDEN, // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); + FORBIDDEN); constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . const simd8 byte_1_low = @@ -38957,23 +53976,16 @@ check_special_cases(const simd8 input, const simd8 prev1) { CARRY, CARRY, // ____0100 ________ - CARRY | TOO_LARGE, + FORBIDDEN, // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, + FORBIDDEN, // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, + FORBIDDEN, FORBIDDEN, // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, + FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); + FORBIDDEN, FORBIDDEN, FORBIDDEN); const simd8 byte_2_high = input.shr<4>().lookup_16( // ________ 0_______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, @@ -38992,17 +54004,6 @@ check_special_cases(const simd8 input, const simd8 prev1) { TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); return (byte_1_high & byte_1_low & byte_2_high); } -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} struct validating_transcoder { // If this is nonzero, there has been a UTF-8 error. @@ -39018,25 +54019,24 @@ struct validating_transcoder { // lead bytes (2, 3, 4-byte leads become large positive numbers instead of // small negative numbers) simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); + this->error |= check_special_cases(input, prev1); } - template simdutf_really_inline size_t convert(const char *in, size_t size, - char16_t *utf16_output) { + char *latin1_output) { size_t pos = 0; - char16_t *start{utf16_output}; + char *start{latin1_output}; // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the // last 16 bytes, and if the data is valid, then it is entirely safe because // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. + // back from the end counting 16 leading bytes, to give us a good margin. size_t leading_byte = 0; size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); + for (; margin > 0 && leading_byte < 16; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... } // If the input is long enough, then we have that margin-1 is the eight last // leading byte. @@ -39044,8 +54044,8 @@ struct validating_transcoder { while (pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; + input.store((int8_t *)latin1_output); + latin1_output += 64; pos += 64; } else { // you might think that a for-loop would work, but under Visual Studio, @@ -39064,10 +54064,9 @@ struct validating_transcoder { this->check_utf8_bytes(input.chunks[2], input.chunks[1]); this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // error - } + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; // We process in blocks of up to 12 bytes except possibly @@ -39085,8 +54084,8 @@ struct validating_transcoder { // for this section of the code. Hence, there is a limit // to how much we can further increase this latency before // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); pos += consumed; utf8_end_of_code_point_mask >>= consumed; } @@ -39100,23 +54099,22 @@ struct validating_transcoder { return 0; } if (pos < size) { - size_t howmany = scalar::utf8_to_utf16::convert( - in + pos, size - pos, utf16_output); + size_t howmany = + scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); if (howmany == 0) { return 0; } - utf16_output += howmany; + latin1_output += howmany; } - return utf16_output - start; + return latin1_output - start; } - template simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char16_t *utf16_output) { + char *latin1_output) { size_t pos = 0; - char16_t *start{utf16_output}; + char *start{latin1_output}; // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the // last 16 bytes, and if the data is valid, then it is entirely safe because // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot // generally assume that you have valid UTF-8 input, so we are going to go @@ -39132,8 +54130,8 @@ struct validating_transcoder { while (pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; + input.store((int8_t *)latin1_output); + latin1_output += 64; pos += 64; } else { // you might think that a for-loop would work, but under Visual Studio, @@ -39152,17 +54150,16 @@ struct validating_transcoder { this->check_utf8_bytes(input.chunks[2], input.chunks[1]); this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { + if (errors()) { // rewind_and_convert_with_errors will seek a potential error from // in+pos onward, with the ability to go back up to pos bytes, and // read size-pos bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); res.count += pos; return res; } + uint64_t utf8_continuation_mask = input.lt(-65 + 1); uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; // We process in blocks of up to 12 bytes except possibly @@ -39180,8 +54177,8 @@ struct validating_transcoder { // for this section of the code. Hence, there is a limit // to how much we can further increase this latency before // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); pos += consumed; utf8_end_of_code_point_mask >>= consumed; } @@ -39195,9 +54192,8 @@ struct validating_transcoder { // rewind_and_convert_with_errors will seek a potential error from in+pos // onward, with the ability to go back up to pos bytes, and read size-pos // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); res.count += pos; return res; } @@ -39205,17 +54201,16 @@ struct validating_transcoder { // rewind_and_convert_with_errors will seek a potential error from in+pos // onward, with the ability to go back up to pos bytes, and read size-pos // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); + result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( + pos, in + pos, size - pos, latin1_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; } else { // In case of success, we want the number of word written - utf16_output += res.count; + latin1_output += res.count; } } - return result(error_code::SUCCESS, utf16_output - start); + return result(error_code::SUCCESS, latin1_output - start); } simdutf_really_inline bool errors() const { @@ -39223,63 +54218,176 @@ struct validating_transcoder { } }; // struct utf8_checker -} // namespace utf8_to_utf16 +} // namespace utf8_to_latin1 } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdutf -/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -// transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { -namespace utf8_to_utf32 { +namespace utf8_to_latin1 { +using namespace simd; + +simdutf_really_inline size_t convert_valid(const char *in, size_t size, + char *latin1_output) { + size_t pos = 0; + char *start{latin1_output}; + // In the worst case, we have the haswell kernel which can cause an overflow + // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last + // 16 bytes, and if the data is valid, then it is entirely safe because 16 + // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally + // assume that you have valid UTF-8 input, so we are going to go back from the + // end counting 8 leading bytes, to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > + -65); // twos complement of -65 is 1011 1111 ... + } + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 input(reinterpret_cast(in + pos)); + if (input.is_ascii()) { + input.store((int8_t *)latin1_output); + latin1_output += 64; + pos += 64; + } else { + // you might think that a for-loop would work, but under Visual Studio, it + // is not good enough. + uint64_t utf8_continuation_mask = + input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in + // this case, we also have ASCII to account for. + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. + size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times. + while (pos < max_starting_point) { + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + size_t consumed = convert_masked_utf8_to_latin1( + in + pos, utf8_end_of_code_point_mask, latin1_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. + } + } + if (pos < size) { + size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, + latin1_output); + latin1_output += howmany; + } + return latin1_output - start; +} + +} // namespace utf8_to_latin1 +} // namespace +} // namespace lasx +} // namespace simdutf + // namespace simdutf +/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +// transcoding from UTF-8 to UTF-16 +/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ + +namespace simdutf { +namespace lasx { +namespace { +namespace utf8_to_utf16 { using namespace simd; +template simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char32_t *utf32_output) noexcept { + char16_t *utf16_output) noexcept { + // The implementation is not specific to haswell and should be moved to the + // generic directory. size_t pos = 0; - char32_t *start{utf32_output}; + char16_t *start{utf16_output}; const size_t safety_margin = 16; // to avoid overruns! while (pos + 64 + safety_margin <= size) { + // this loop could be unrolled further. For example, we could process the + // mask far more than 64 bytes. simd8x64 in(reinterpret_cast(input + pos)); if (in.is_ascii()) { - in.store_ascii_as_utf32(utf32_output); - utf32_output += 64; + in.store_ascii_as_utf16(utf16_output); + utf16_output += 64; pos += 64; } else { + // Slow path. We hope that the compiler will recognize that this is a slow + // path. Anything that is not a continuation mask is a 'leading byte', + // that is, the start of a new code point. + uint64_t utf8_continuation_mask = in.lt(-65 + 1); // -65 is 0b10111111 in two-complement's, so largest possible continuation // byte - uint64_t utf8_continuation_mask = in.lt(-65 + 1); uint64_t utf8_leading_mask = ~utf8_continuation_mask; + // The *start* of code points is not so useful, rather, we want the *end* + // of code points. uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + // We process in blocks of up to 12 bytes except possibly + // for fast paths which may process up to 16 bytes. For the + // slow path to work, we should have at least 12 input bytes left. size_t max_starting_point = (pos + 64) - 12; + // Next loop is going to run at least five times when using solely + // the slow/regular path, and at least four times if there are fast paths. while (pos < max_starting_point) { - size_t consumed = convert_masked_utf8_to_utf32( - input + pos, utf8_end_of_code_point_mask, utf32_output); + // Performance note: our ability to compute 'consumed' and + // then shift and recompute is critical. If there is a + // latency of, say, 4 cycles on getting 'consumed', then + // the inner loop might have a total latency of about 6 cycles. + // Yet we process between 6 to 12 inputs bytes, thus we get + // a speed limit between 1 cycle/byte and 0.5 cycle/byte + // for this section of the code. Hence, there is a limit + // to how much we can further increase this latency before + // it seriously harms performance. + // + // Thus we may allow convert_masked_utf8_to_utf16 to process + // more bytes at a time under a fast-path mode where 16 bytes + // are consumed at once (e.g., when encountering ASCII). + size_t consumed = convert_masked_utf8_to_utf16( + input + pos, utf8_end_of_code_point_mask, utf16_output); pos += consumed; utf8_end_of_code_point_mask >>= consumed; } + // At this point there may remain between 0 and 12 bytes in the + // 64-byte block. These bytes will be processed again. So we have an + // 80% efficiency (in the worst case). In practice we expect an + // 85% to 90% efficiency. } } - utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, - utf32_output); - return utf32_output - start; + utf16_output += scalar::utf8_to_utf16::convert_valid( + input + pos, size - pos, utf16_output); + return utf16_output - start; } -} // namespace utf8_to_utf32 +} // namespace utf8_to_utf16 } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdutf -/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ +/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ +/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { -namespace utf8_to_utf32 { +namespace utf8_to_utf16 { using namespace simd; simdutf_really_inline simd8 @@ -39403,29 +54511,30 @@ struct validating_transcoder { this->error |= check_multibyte_lengths(input, prev_input, sc); } + template simdutf_really_inline size_t convert(const char *in, size_t size, - char32_t *utf32_output) { + char16_t *utf16_output) { size_t pos = 0; - char32_t *start{utf32_output}; + char16_t *start{utf16_output}; // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the // last 16 bytes, and if the data is valid, then it is entirely safe because // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. + // back from the end counting 8 leading bytes, to give us a good margin. size_t leading_byte = 0; size_t margin = size; for (; margin > 0 && leading_byte < 8; margin--) { leading_byte += (int8_t(in[margin - 1]) > -65); } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. const size_t safety_margin = size - margin + 1; // to avoid overruns! while (pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; pos += 64; } else { // you might think that a for-loop would work, but under Visual Studio, @@ -39446,7 +54555,7 @@ struct validating_transcoder { } uint64_t utf8_continuation_mask = input.lt(-65 + 1); if (utf8_continuation_mask & 1) { - return 0; // we have an error + return 0; // error } uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; @@ -39465,8 +54574,8 @@ struct validating_transcoder { // for this section of the code. Hence, there is a limit // to how much we can further increase this latency before // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); pos += consumed; utf8_end_of_code_point_mask >>= consumed; } @@ -39480,22 +54589,23 @@ struct validating_transcoder { return 0; } if (pos < size) { - size_t howmany = - scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); + size_t howmany = scalar::utf8_to_utf16::convert( + in + pos, size - pos, utf16_output); if (howmany == 0) { return 0; } - utf32_output += howmany; + utf16_output += howmany; } - return utf32_output - start; + return utf16_output - start; } + template simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char32_t *utf32_output) { + char16_t *utf16_output) { size_t pos = 0; - char32_t *start{utf32_output}; + char16_t *start{utf16_output}; // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the + // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the // last 16 bytes, and if the data is valid, then it is entirely safe because // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot // generally assume that you have valid UTF-8 input, so we are going to go @@ -39505,14 +54615,14 @@ struct validating_transcoder { for (; margin > 0 && leading_byte < 8; margin--) { leading_byte += (int8_t(in[margin - 1]) > -65); } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. + // If the input is long enough, then we have that margin-1 is the eight last + // leading byte. const size_t safety_margin = size - margin + 1; // to avoid overruns! while (pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; + input.store_ascii_as_utf16(utf16_output); + utf16_output += 64; pos += 64; } else { // you might think that a for-loop would work, but under Visual Studio, @@ -39533,8 +54643,12 @@ struct validating_transcoder { } uint64_t utf8_continuation_mask = input.lt(-65 + 1); if (errors() || (utf8_continuation_mask & 1)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); + // rewind_and_convert_with_errors will seek a potential error from + // in+pos onward, with the ability to go back up to pos bytes, and + // read size-pos bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } @@ -39555,8 +54669,8 @@ struct validating_transcoder { // for this section of the code. Hence, there is a limit // to how much we can further increase this latency before // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); + size_t consumed = convert_masked_utf8_to_utf16( + in + pos, utf8_end_of_code_point_mask, utf16_output); pos += consumed; utf8_end_of_code_point_mask >>= consumed; } @@ -39567,22 +54681,30 @@ struct validating_transcoder { } } if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); res.count += pos; return res; } if (pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); + // rewind_and_convert_with_errors will seek a potential error from in+pos + // onward, with the ability to go back up to pos bytes, and read size-pos + // bytes forward. + result res = + scalar::utf8_to_utf16::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf16_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; } else { // In case of success, we want the number of word written - utf32_output += res.count; + utf16_output += res.count; } } - return result(error_code::SUCCESS, utf32_output - start); + return result(error_code::SUCCESS, utf16_output - start); } simdutf_really_inline bool errors() const { @@ -39590,143 +54712,67 @@ struct validating_transcoder { } }; // struct utf8_checker -} // namespace utf8_to_utf32 +} // namespace utf8_to_utf16 } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdutf -/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -// other functions -/* begin file src/generic/utf8.h */ +/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ +// transcoding from UTF-8 to UTF-32 +/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { -namespace utf8 { +namespace utf8_to_utf32 { using namespace simd; -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); - } - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} - -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} -} // namespace utf8 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8.h */ -/* begin file src/generic/utf16.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf16 { - -template -simdutf_really_inline size_t count_code_points(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); - count += count_ones(not_pair) / 2; - } - return count + - scalar::utf16::count_code_points(in + pos, size - pos); -} - -template -simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, - size_t size) { +simdutf_warn_unused size_t convert_valid(const char *input, size_t size, + char32_t *utf32_output) noexcept { size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); + char32_t *start{utf32_output}; + const size_t safety_margin = 16; // to avoid overruns! + while (pos + 64 + safety_margin <= size) { + simd8x64 in(reinterpret_cast(input + pos)); + if (in.is_ascii()) { + in.store_ascii_as_utf32(utf32_output); + utf32_output += 64; + pos += 64; + } else { + // -65 is 0b10111111 in two-complement's, so largest possible continuation + // byte + uint64_t utf8_continuation_mask = in.lt(-65 + 1); + uint64_t utf8_leading_mask = ~utf8_continuation_mask; + uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; + size_t max_starting_point = (pos + 64) - 12; + while (pos < max_starting_point) { + size_t consumed = convert_masked_utf8_to_utf32( + input + pos, utf8_end_of_code_point_mask, utf32_output); + pos += consumed; + utf8_end_of_code_point_mask >>= consumed; + } } - uint64_t ascii_mask = input.lteq(0x7F); - uint64_t twobyte_mask = input.lteq(0x7FF); - uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); - - size_t ascii_count = count_ones(ascii_mask) / 2; - size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; - size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; - size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; - count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + - ascii_count; - } - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} - -template -simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, - size_t size) { - return count_code_points(in, size); -} - -simdutf_really_inline void -change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { - size_t pos = 0; - - while (pos < size / 32 * 32) { - simd16x32 input(reinterpret_cast(in + pos)); - input.swap_bytes(); - input.store(reinterpret_cast(output)); - pos += 32; - output += 32; } - - scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); + utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, + utf32_output); + return utf32_output - start; } -} // namespace utf16 +} // namespace utf8_to_utf32 } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdutf -/* end file src/generic/utf16.h */ -// transcoding from UTF-8 to Latin 1 -/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ +/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ +/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { -namespace utf8_to_latin1 { +namespace utf8_to_utf32 { using namespace simd; simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { - // For UTF-8 to Latin 1, we can allow any ASCII character, and any - // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or - // 0b11000010 and nothing else. - // // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -39753,7 +54799,6 @@ check_special_cases(const simd8 input, const simd8 prev1) { // 1111011_ 1000____ // 11111___ 1000____ constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - constexpr const uint8_t FORBIDDEN = 0xff; const simd8 byte_1_high = prev1.shr<4>().lookup_16( // 0_______ ________ @@ -39764,11 +54809,11 @@ check_special_cases(const simd8 input, const simd8 prev1) { // 1100____ ________ TOO_SHORT | OVERLONG_2, // 1101____ ________ - FORBIDDEN, + TOO_SHORT, // 1110____ ________ - FORBIDDEN, + TOO_SHORT | OVERLONG_3 | SURROGATE, // 1111____ ________ - FORBIDDEN); + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . const simd8 byte_1_low = @@ -39782,16 +54827,23 @@ check_special_cases(const simd8 input, const simd8 prev1) { CARRY, CARRY, // ____0100 ________ - FORBIDDEN, + CARRY | TOO_LARGE, // ____0101 ________ - FORBIDDEN, + CARRY | TOO_LARGE | TOO_LARGE_1000, // ____011_ ________ - FORBIDDEN, FORBIDDEN, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, // ____1___ ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, // ____1101 ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN); + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000); const simd8 byte_2_high = input.shr<4>().lookup_16( // ________ 0_______ TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, @@ -39810,6 +54862,17 @@ check_special_cases(const simd8 input, const simd8 prev1) { TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); return (byte_1_high & byte_1_low & byte_2_high); } +simdutf_really_inline simd8 +check_multibyte_lengths(const simd8 input, + const simd8 prev_input, + const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = + simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; +} struct validating_transcoder { // If this is nonzero, there has been a UTF-8 error. @@ -39825,33 +54888,33 @@ struct validating_transcoder { // lead bytes (2, 3, 4-byte leads become large positive numbers instead of // small negative numbers) simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); } simdutf_really_inline size_t convert(const char *in, size_t size, - char *latin1_output) { + char32_t *utf32_output) { size_t pos = 0; - char *start{latin1_output}; + char32_t *start{utf32_output}; // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the // last 16 bytes, and if the data is valid, then it is entirely safe because // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot // generally assume that you have valid UTF-8 input, so we are going to go // back from the end counting 16 leading bytes, to give us a good margin. size_t leading_byte = 0; size_t margin = size; - for (; margin > 0 && leading_byte < 16; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... + for (; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin - 1]) > -65); } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. const size_t safety_margin = size - margin + 1; // to avoid overruns! while (pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; pos += 64; } else { // you might think that a for-loop would work, but under Visual Studio, @@ -39870,9 +54933,10 @@ struct validating_transcoder { this->check_utf8_bytes(input.chunks[2], input.chunks[1]); this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (utf8_continuation_mask & 1) { + return 0; // we have an error + } uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; // We process in blocks of up to 12 bytes except possibly @@ -39890,8 +54954,8 @@ struct validating_transcoder { // for this section of the code. Hence, there is a limit // to how much we can further increase this latency before // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); pos += consumed; utf8_end_of_code_point_mask >>= consumed; } @@ -39906,21 +54970,21 @@ struct validating_transcoder { } if (pos < size) { size_t howmany = - scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); + scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); if (howmany == 0) { return 0; } - latin1_output += howmany; + utf32_output += howmany; } - return latin1_output - start; + return utf32_output - start; } simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char *latin1_output) { + char32_t *utf32_output) { size_t pos = 0; - char *start{latin1_output}; + char32_t *start{utf32_output}; // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the + // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the // last 16 bytes, and if the data is valid, then it is entirely safe because // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot // generally assume that you have valid UTF-8 input, so we are going to go @@ -39930,14 +54994,14 @@ struct validating_transcoder { for (; margin > 0 && leading_byte < 8; margin--) { leading_byte += (int8_t(in[margin - 1]) > -65); } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. + // If the input is long enough, then we have that margin-1 is the fourth + // last leading byte. const size_t safety_margin = size - margin + 1; // to avoid overruns! while (pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; + input.store_ascii_as_utf32(utf32_output); + utf32_output += 64; pos += 64; } else { // you might think that a for-loop would work, but under Visual Studio, @@ -39956,16 +55020,13 @@ struct validating_transcoder { this->check_utf8_bytes(input.chunks[2], input.chunks[1]); this->check_utf8_bytes(input.chunks[3], input.chunks[2]); } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + if (errors() || (utf8_continuation_mask & 1)) { + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; // We process in blocks of up to 12 bytes except possibly @@ -39983,8 +55044,8 @@ struct validating_transcoder { // for this section of the code. Hence, there is a limit // to how much we can further increase this latency before // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); + size_t consumed = convert_masked_utf8_to_utf32( + in + pos, utf8_end_of_code_point_mask, utf32_output); pos += consumed; utf8_end_of_code_point_mask >>= consumed; } @@ -39995,28 +55056,22 @@ struct validating_transcoder { } } if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); res.count += pos; return res; } if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); + result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( + pos, in + pos, size - pos, utf32_output); if (res.error) { // In case of error, we want the error position res.count += pos; return res; } else { // In case of success, we want the number of word written - latin1_output += res.count; + utf32_output += res.count; } } - return result(error_code::SUCCESS, latin1_output - start); + return result(error_code::SUCCESS, utf32_output - start); } simdutf_really_inline bool errors() const { @@ -40024,99 +55079,136 @@ struct validating_transcoder { } }; // struct utf8_checker -} // namespace utf8_to_latin1 +} // namespace utf8_to_utf32 } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdutf -/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ + + +// other functions +/* begin file src/generic/utf8.h */ namespace simdutf { -namespace westmere { +namespace lasx { namespace { -namespace utf8_to_latin1 { +namespace utf8 { + using namespace simd; -simdutf_really_inline size_t convert_valid(const char *in, size_t size, - char *latin1_output) { +simdutf_really_inline size_t count_code_points(const char *in, size_t size) { size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last - // 16 bytes, and if the data is valid, then it is entirely safe because 16 - // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally - // assume that you have valid UTF-8 input, so we are going to go back from the - // end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... + size_t count = 0; + for (; pos + 64 <= size; pos += 64) { + simd8x64 input(reinterpret_cast(in + pos)); + uint64_t utf8_continuation_mask = input.gt(-65); + count += count_ones(utf8_continuation_mask); } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { + return count + scalar::utf8::count_code_points(in + pos, size - pos); +} + +simdutf_really_inline size_t utf16_length_from_utf8(const char *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos + 64 <= size; pos += 64) { simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. + uint64_t utf8_continuation_mask = input.lt(-65 + 1); + // We count one word for anything that is not a continuation (so + // leading bytes). + count += 64 - count_ones(utf8_continuation_mask); + int64_t utf8_4byte = input.gteq_unsigned(240); + count += count_ones(utf8_4byte); + } + return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); +} +} // namespace utf8 +} // unnamed namespace +} // namespace lasx +} // namespace simdutf +/* end file src/generic/utf8.h */ +/* begin file src/generic/utf16.h */ +namespace simdutf { +namespace lasx { +namespace { +namespace utf16 { + +template +simdutf_really_inline size_t count_code_points(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); } + uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); + count += count_ones(not_pair) / 2; } - if (pos < size) { - size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, - latin1_output); - latin1_output += howmany; + return count + + scalar::utf16::count_code_points(in + pos, size - pos); +} + +template +simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, + size_t size) { + size_t pos = 0; + size_t count = 0; + // This algorithm could no doubt be improved! + for (; pos < size / 32 * 32; pos += 32) { + simd16x32 input(reinterpret_cast(in + pos)); + if (!match_system(big_endian)) { + input.swap_bytes(); + } + uint64_t ascii_mask = input.lteq(0x7F); + uint64_t twobyte_mask = input.lteq(0x7FF); + uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); + + size_t ascii_count = count_ones(ascii_mask) / 2; + size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; + size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; + size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; + count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + + ascii_count; } - return latin1_output - start; + return count + scalar::utf16::utf8_length_from_utf16(in + pos, + size - pos); } -} // namespace utf8_to_latin1 -} // namespace -} // namespace westmere +template +simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, + size_t size) { + return count_code_points(in, size); +} + +simdutf_really_inline void +change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { + size_t pos = 0; + + while (pos < size / 32 * 32) { + simd16x32 input(reinterpret_cast(in + pos)); + input.swap_bytes(); + input.store(reinterpret_cast(output)); + pos += 32; + output += 32; + } + + scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); +} + +} // namespace utf16 +} // unnamed namespace +} // namespace lasx } // namespace simdutf - // namespace simdutf -/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ +/* end file src/generic/utf16.h */ // // Implementation-specific overrides // - namespace simdutf { -namespace westmere { +namespace lasx { simdutf_warn_unused int implementation::detect_encodings(const char *input, @@ -40147,34 +55239,32 @@ implementation::detect_encodings(const char *input, simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return westmere::utf8_validation::generic_validate_utf8(buf, len); + return lasx::utf8_validation::generic_validate_utf8(buf, len); } simdutf_warn_unused result implementation::validate_utf8_with_errors( const char *buf, size_t len) const noexcept { - return westmere::utf8_validation::generic_validate_utf8_with_errors(buf, len); + return lasx::utf8_validation::generic_validate_utf8_with_errors(buf, len); } simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return westmere::utf8_validation::generic_validate_ascii(buf, len); + return lasx::utf8_validation::generic_validate_ascii(buf, len); } simdutf_warn_unused result implementation::validate_ascii_with_errors( const char *buf, size_t len) const noexcept { - return westmere::utf8_validation::generic_validate_ascii_with_errors(buf, - len); + return lasx::utf8_validation::generic_validate_ascii_with_errors(buf, len); } simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-16. protect the implementation from - // handling nullptr + // empty input is valid. protected the implementation from nullptr. return true; } - const char16_t *tail = sse_validate_utf16(buf, len); + const char16_t *tail = lasx_validate_utf16(buf, len); if (tail) { return scalar::utf16::validate(tail, len - (tail - buf)); @@ -40187,11 +55277,10 @@ simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-16. protect the implementation from - // handling nullptr + // empty input is valid. protected the implementation from nullptr. return true; } - const char16_t *tail = sse_validate_utf16(buf, len); + const char16_t *tail = lasx_validate_utf16(buf, len); if (tail) { return scalar::utf16::validate(tail, len - (tail - buf)); } else { @@ -40201,7 +55290,10 @@ implementation::validate_utf16be(const char16_t *buf, simdutf_warn_unused result implementation::validate_utf16le_with_errors( const char16_t *buf, size_t len) const noexcept { - result res = sse_validate_utf16_with_errors(buf, len); + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = lasx_validate_utf16_with_errors(buf, len); if (res.count != len) { result scalar_res = scalar::utf16::validate_with_errors( buf + res.count, len - res.count); @@ -40213,7 +55305,10 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors( simdutf_warn_unused result implementation::validate_utf16be_with_errors( const char16_t *buf, size_t len) const noexcept { - result res = sse_validate_utf16_with_errors(buf, len); + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } + result res = lasx_validate_utf16_with_errors(buf, len); if (res.count != len) { result scalar_res = scalar::utf16::validate_with_errors( buf + res.count, len - res.count); @@ -40226,11 +55321,10 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr + // empty input is valid. protected the implementation from nullptr. return true; } - const char32_t *tail = sse_validate_utf32le(buf, len); + const char32_t *tail = lasx_validate_utf32le(buf, len); if (tail) { return scalar::utf32::validate(tail, len - (tail - buf)); } else { @@ -40240,12 +55334,10 @@ implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { simdutf_warn_unused result implementation::validate_utf32_with_errors( const char32_t *buf, size_t len) const noexcept { - if (len == 0) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr + if (simdutf_unlikely(len == 0)) { return result(error_code::SUCCESS, 0); } - result res = sse_validate_utf32le_with_errors(buf, len); + result res = lasx_validate_utf32le_with_errors(buf, len); if (res.count != len) { result scalar_res = scalar::utf32::validate_with_errors(buf + res.count, len - res.count); @@ -40257,9 +55349,8 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors( simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( const char *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - sse_convert_latin1_to_utf8(buf, len, utf8_output); + lasx_convert_latin1_to_utf8(buf, len, utf8_output); size_t converted_chars = ret.second - utf8_output; if (ret.first != buf + len) { @@ -40267,25 +55358,18 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( ret.first, len - (ret.first - buf), ret.second); converted_chars += scalar_converted_chars; } - return converted_chars; } simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - sse_convert_latin1_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } + lasx_convert_latin1_to_utf16le(buf, len, utf16_output); size_t converted_chars = ret.second - utf16_output; if (ret.first != buf + len) { const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert( ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } converted_chars += scalar_converted_chars; } return converted_chars; @@ -40294,18 +55378,12 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( const char *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - sse_convert_latin1_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } + lasx_convert_latin1_to_utf16be(buf, len, utf16_output); size_t converted_chars = ret.second - utf16_output; if (ret.first != buf + len) { const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert( ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } converted_chars += scalar_converted_chars; } return converted_chars; @@ -40314,17 +55392,11 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( const char *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - sse_convert_latin1_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } + lasx_convert_latin1_to_utf32(buf, len, utf32_output); size_t converted_chars = ret.second - utf32_output; if (ret.first != buf + len) { const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } converted_chars += scalar_converted_chars; } return converted_chars; @@ -40332,19 +55404,117 @@ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { + size_t pos = 0; + char *output_start{latin1_output}; + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)latin1_output & 0x1F) && pos < len) { + if (buf[pos] & 0x80) { + if (pos + 1 >= len) + return 0; + if ((buf[pos] & 0b11100000) == 0b11000000) { + if ((buf[pos + 1] & 0b11000000) != 0b10000000) + return 0; + uint32_t code_point = + (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0xFF < code_point) { + return 0; + } + *latin1_output++ = char(code_point); + pos += 2; + } else { + return 0; + } + } else { + *latin1_output++ = char(buf[pos]); + pos++; + } + } + size_t convert_size = latin1_output - output_start; + if (pos == len) + return convert_size; utf8_to_latin1::validating_transcoder converter; - return converter.convert(buf, len, latin1_output); + size_t convert_result = + converter.convert(buf + pos, len - pos, latin1_output); + return convert_result ? convert_size + convert_result : 0; } simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( const char *buf, size_t len, char *latin1_output) const noexcept { + size_t pos = 0; + char *output_start{latin1_output}; + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)latin1_output & 0x1F) && pos < len) { + if (buf[pos] & 0x80) { + if ((buf[pos] & 0b11100000) == 0b11000000) { + if (pos + 1 >= len) + return result(error_code::TOO_SHORT, pos); + if ((buf[pos + 1] & 0b11000000) != 0b10000000) + return result(error_code::TOO_SHORT, pos); + uint32_t code_point = + (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); + if (code_point < 0x80) + return result(error_code::OVERLONG, pos); + if (0xFF < code_point) + return result(error_code::TOO_LARGE, pos); + *latin1_output++ = char(code_point); + pos += 2; + } else if ((buf[pos] & 0b11110000) == 0b11100000) { + return result(error_code::TOO_LARGE, pos); + } else if ((buf[pos] & 0b11111000) == 0b11110000) { + return result(error_code::TOO_LARGE, pos); + } else { + if ((buf[pos] & 0b11000000) == 0b10000000) { + return result(error_code::TOO_LONG, pos); + } + return result(error_code::HEADER_BITS, pos); + } + } else { + *latin1_output++ = char(buf[pos]); + pos++; + } + } + size_t convert_size = latin1_output - output_start; + if (pos == len) + return result(error_code::SUCCESS, convert_size); + utf8_to_latin1::validating_transcoder converter; - return converter.convert_with_errors(buf, len, latin1_output); + result res = + converter.convert_with_errors(buf + pos, len - pos, latin1_output); + return res.error ? result(res.error, res.count + pos) + : result(res.error, res.count + convert_size); } simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( const char *buf, size_t len, char *latin1_output) const noexcept { - return westmere::utf8_to_latin1::convert_valid(buf, len, latin1_output); + size_t pos = 0; + char *output_start{latin1_output}; + // Performance degradation when memory address is not 32-byte aligned + while (((uint64_t)latin1_output & 0x1F) && pos < len) { + if (buf[pos] & 0x80) { + if (pos + 1 >= len) + break; + if ((buf[pos] & 0b11100000) == 0b11000000) { + if ((buf[pos + 1] & 0b11000000) != 0b10000000) + return 0; + uint32_t code_point = + (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); + *latin1_output++ = char(code_point); + pos += 2; + } else { + return 0; + } + } else { + *latin1_output++ = char(buf[pos]); + pos++; + } + } + size_t convert_size = latin1_output - output_start; + if (pos == len) + return convert_size; + + size_t convert_result = + lasx::utf8_to_latin1::convert_valid(buf + pos, len - pos, latin1_output); + return convert_result ? convert_size + convert_result : 0; } simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( @@ -40404,7 +55574,7 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - sse_convert_utf16_to_latin1(buf, len, latin1_output); + lasx_convert_utf16_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -40425,7 +55595,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - sse_convert_utf16_to_latin1(buf, len, latin1_output); + lasx_convert_utf16_to_latin1(buf, len, latin1_output); if (ret.first == nullptr) { return 0; } @@ -40447,7 +55617,7 @@ simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - sse_convert_utf16_to_latin1_with_errors( + lasx_convert_utf16_to_latin1_with_errors( buf, len, latin1_output); if (ret.first.error) { return ret.first; @@ -40474,8 +55644,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors( const char16_t *buf, size_t len, char *latin1_output) const noexcept { std::pair ret = - sse_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); + lasx_convert_utf16_to_latin1_with_errors(buf, len, + latin1_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -40499,20 +55669,20 @@ implementation::convert_utf16be_to_latin1_with_errors( simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: we could provide an optimized function. + // optimization opportunity: implement a custom function. return convert_utf16be_to_latin1(buf, len, latin1_output); } simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: we could provide an optimized function. + // optimization opportunity: implement a custom function. return convert_utf16le_to_latin1(buf, len, latin1_output); } simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - sse_convert_utf16_to_utf8(buf, len, utf8_output); + lasx_convert_utf16_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -40532,7 +55702,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( const char16_t *buf, size_t len, char *utf8_output) const noexcept { std::pair ret = - sse_convert_utf16_to_utf8(buf, len, utf8_output); + lasx_convert_utf16_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -40554,8 +55724,8 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - westmere::sse_convert_utf16_to_utf8_with_errors( - buf, len, utf8_output); + lasx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -40582,8 +55752,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - westmere::sse_convert_utf16_to_utf8_with_errors( - buf, len, utf8_output); + lasx_convert_utf16_to_utf8_with_errors(buf, len, + utf8_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -40615,59 +55785,13 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( return convert_utf16be_to_utf8(buf, len, utf8_output); } -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - sse_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - // if (ret.first != buf + len) { - if (ret.first < buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - westmere::sse_convert_utf32_to_latin1_with_errors(buf, len, - latin1_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: we could provide an optimized function. - return convert_utf32_to_latin1(buf, len, latin1_output); -} - simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return 0; + } std::pair ret = - sse_convert_utf32_to_utf8(buf, len, utf8_output); + lasx_convert_utf32_to_utf8(buf, len, utf8_output); if (ret.first == nullptr) { return 0; } @@ -40685,10 +55809,13 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( const char32_t *buf, size_t len, char *utf8_output) const noexcept { + if (simdutf_unlikely(len == 0)) { + return result(error_code::SUCCESS, 0); + } // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - westmere::sse_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); + lasx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf8::convert_with_errors( buf + ret.first.count, len - ret.first.count, ret.second); @@ -40708,7 +55835,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - sse_convert_utf16_to_utf32(buf, len, utf32_output); + lasx_convert_utf16_to_utf32(buf, len, utf32_output); if (ret.first == nullptr) { return 0; } @@ -40728,7 +55855,7 @@ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { std::pair ret = - sse_convert_utf16_to_utf32(buf, len, utf32_output); + lasx_convert_utf16_to_utf32(buf, len, utf32_output); if (ret.first == nullptr) { return 0; } @@ -40750,8 +55877,8 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - westmere::sse_convert_utf16_to_utf32_with_errors( - buf, len, utf32_output); + lasx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -40778,8 +55905,8 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - westmere::sse_convert_utf16_to_utf32_with_errors( - buf, len, utf32_output); + lasx_convert_utf16_to_utf32_with_errors(buf, len, + utf32_output); if (ret.first.error) { return ret.first; } // Can return directly since scalar fallback already found correct @@ -40801,15 +55928,77 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( return ret.first; } +simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( + ret.first, len - (ret.first - buf), ret.second); + if (scalar_saved_bytes == 0) { + return 0; + } + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + +simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); + if (ret.first.error) { + return ret.first; + } // Can return directly since scalar fallback already found correct + // ret.first.count + if (ret.first.count != len) { // All good so far, but not finished + result scalar_res = scalar::utf32_to_latin1::convert_with_errors( + buf + ret.first.count, len - ret.first.count, ret.second); + if (scalar_res.error) { + scalar_res.count += ret.first.count; + return scalar_res; + } else { + ret.second += scalar_res.count; + } + } + ret.first.count = + ret.second - + latin1_output; // Set count to the number of 8-bit code units written + return ret.first; +} + +simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( + const char32_t *buf, size_t len, char *latin1_output) const noexcept { + std::pair ret = + lasx_convert_utf32_to_latin1(buf, len, latin1_output); + if (ret.first == nullptr) { + return 0; + } + size_t saved_bytes = ret.second - latin1_output; + + if (ret.first != buf + len) { + const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( + ret.first, len - (ret.first - buf), ret.second); + saved_bytes += scalar_saved_bytes; + } + return saved_bytes; +} + simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( const char32_t *buf, size_t len, char *utf8_output) const noexcept { + // optimization opportunity: implement a custom function. return convert_utf32_to_utf8(buf, len, utf8_output); } simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - sse_convert_utf32_to_utf16(buf, len, utf16_output); + lasx_convert_utf32_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -40823,13 +56012,14 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( } saved_bytes += scalar_saved_bytes; } + return saved_bytes; } simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { std::pair ret = - sse_convert_utf32_to_utf16(buf, len, utf16_output); + lasx_convert_utf32_to_utf16(buf, len, utf16_output); if (ret.first == nullptr) { return 0; } @@ -40851,8 +56041,8 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - westmere::sse_convert_utf32_to_utf16_with_errors( - buf, len, utf16_output); + lasx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf16::convert_with_errors( @@ -40875,8 +56065,8 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( // ret.first.count is always the position in the buffer, not the number of // code units written even if finished std::pair ret = - westmere::sse_convert_utf32_to_utf16_with_errors( - buf, len, utf16_output); + lasx_convert_utf32_to_utf16_with_errors(buf, len, + utf16_output); if (ret.first.count != len) { result scalar_res = scalar::utf32_to_utf16::convert_with_errors( @@ -40932,7 +56122,23 @@ simdutf_warn_unused size_t implementation::count_utf16be( simdutf_warn_unused size_t implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); + size_t pos = 0; + size_t count = 0; + // Performance degradation when memory address is not 32-byte aligned + while ((((uint64_t)input + pos) & 0x1F && pos < length)) { + if (input[pos++] > -65) { + count++; + } + } + __m256i v_bf = __lasx_xvldi(0xBF); // 0b10111111 + for (; pos + 32 <= length; pos += 32) { + __m256i in = __lasx_xvld(reinterpret_cast(input + pos), 0); + __m256i utf8_count = + __lasx_xvpcnt_h(__lasx_xvmskltz_b(__lasx_xvslt_b(v_bf, in))); + count = count + __lasx_xvpickve2gr_wu(utf8_count, 0) + + __lasx_xvpickve2gr_wu(utf8_count, 4); + } + return count + scalar::utf8::count_code_points(input + pos, length - pos); } simdutf_warn_unused size_t implementation::latin1_length_from_utf8( @@ -40942,12 +56148,29 @@ simdutf_warn_unused size_t implementation::latin1_length_from_utf8( simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept { - return scalar::utf16::latin1_length_from_utf16(length); + return length; } simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept { - return scalar::utf32::latin1_length_from_utf32(length); + return length; +} + +simdutf_warn_unused size_t implementation::utf8_length_from_latin1( + const char *input, size_t length) const noexcept { + const uint8_t *data = reinterpret_cast(input); + const uint8_t *data_end = data + length; + uint64_t result = 0; + while (data + 16 < data_end) { + uint64_t two_bytes = 0; + __m128i input_vec = __lsx_vld(data, 0); + two_bytes = + __lsx_vpickve2gr_hu(__lsx_vpcnt_h(__lsx_vmskltz_b(input_vec)), 0); + result += 16 + two_bytes; + data += 16; + } + return result + scalar::latin1::utf8_length_from_latin1((const char *)data, + data_end - data); } simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( @@ -40962,72 +56185,12 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf16_length_from_latin1(length); + return length; } simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept { - return scalar::latin1::utf32_length_from_latin1(length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t len) const noexcept { - const uint8_t *str = reinterpret_cast(input); - size_t answer = len / sizeof(__m128i) * sizeof(__m128i); - size_t i = 0; - if (answer >= 2048) { // long strings optimization - __m128i two_64bits = _mm_setzero_si128(); - while (i + sizeof(__m128i) <= len) { - __m128i runner = _mm_setzero_si128(); - size_t iterations = (len - i) / sizeof(__m128i); - if (iterations > 255) { - iterations = 255; - } - size_t max_i = i + iterations * sizeof(__m128i) - sizeof(__m128i); - for (; i + 4 * sizeof(__m128i) <= max_i; i += 4 * sizeof(__m128i)) { - __m128i input1 = _mm_loadu_si128((const __m128i *)(str + i)); - __m128i input2 = - _mm_loadu_si128((const __m128i *)(str + i + sizeof(__m128i))); - __m128i input3 = - _mm_loadu_si128((const __m128i *)(str + i + 2 * sizeof(__m128i))); - __m128i input4 = - _mm_loadu_si128((const __m128i *)(str + i + 3 * sizeof(__m128i))); - __m128i input12 = - _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input1), - _mm_cmpgt_epi8(_mm_setzero_si128(), input2)); - __m128i input34 = - _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input3), - _mm_cmpgt_epi8(_mm_setzero_si128(), input4)); - __m128i input1234 = _mm_add_epi8(input12, input34); - runner = _mm_sub_epi8(runner, input1234); - } - for (; i <= max_i; i += sizeof(__m128i)) { - __m128i more_input = _mm_loadu_si128((const __m128i *)(str + i)); - runner = _mm_sub_epi8(runner, - _mm_cmpgt_epi8(_mm_setzero_si128(), more_input)); - } - two_64bits = - _mm_add_epi64(two_64bits, _mm_sad_epu8(runner, _mm_setzero_si128())); - } - answer += - _mm_extract_epi64(two_64bits, 0) + _mm_extract_epi64(two_64bits, 1); - } else if (answer > 0) { // short string optimization - for (; i + 2 * sizeof(__m128i) <= len; i += 2 * sizeof(__m128i)) { - __m128i latin = _mm_loadu_si128((const __m128i *)(input + i)); - uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin); - answer += count_ones(non_ascii); - latin = _mm_loadu_si128((const __m128i *)(input + i) + 1); - non_ascii = (uint16_t)_mm_movemask_epi8(latin); - answer += count_ones(non_ascii); - } - for (; i + sizeof(__m128i) <= len; i += sizeof(__m128i)) { - __m128i latin = _mm_loadu_si128((const __m128i *)(input + i)); - uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin); - answer += count_ones(non_ascii); - } - } - return answer + scalar::latin1::utf8_length_from_latin1( - reinterpret_cast(str + i), len - i); + return length; } simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( @@ -41047,35 +56210,35 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf8( simdutf_warn_unused size_t implementation::utf8_length_from_utf32( const char32_t *input, size_t length) const noexcept { - const __m128i v_00000000 = _mm_setzero_si128(); - const __m128i v_ffffff80 = _mm_set1_epi32((uint32_t)0xffffff80); - const __m128i v_fffff800 = _mm_set1_epi32((uint32_t)0xfffff800); - const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); + __m256i v_80 = __lasx_xvrepli_w(0x80); /*0x00000080*/ + __m256i v_800 = __lasx_xvldi(-3832); /*0x00000800*/ + __m256i v_10000 = __lasx_xvldi(-3583); /*0x00010000*/ size_t pos = 0; size_t count = 0; - for (; pos + 4 <= length; pos += 4) { - __m128i in = _mm_loadu_si128((__m128i *)(input + pos)); - const __m128i ascii_bytes_bytemask = - _mm_cmpeq_epi32(_mm_and_si128(in, v_ffffff80), v_00000000); - const __m128i one_two_bytes_bytemask = - _mm_cmpeq_epi32(_mm_and_si128(in, v_fffff800), v_00000000); - const __m128i two_bytes_bytemask = - _mm_xor_si128(one_two_bytes_bytemask, ascii_bytes_bytemask); - const __m128i one_two_three_bytes_bytemask = - _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000); - const __m128i three_bytes_bytemask = - _mm_xor_si128(one_two_three_bytes_bytemask, one_two_bytes_bytemask); - const uint16_t ascii_bytes_bitmask = - static_cast(_mm_movemask_epi8(ascii_bytes_bytemask)); - const uint16_t two_bytes_bitmask = - static_cast(_mm_movemask_epi8(two_bytes_bytemask)); - const uint16_t three_bytes_bitmask = - static_cast(_mm_movemask_epi8(three_bytes_bytemask)); - - size_t ascii_count = count_ones(ascii_bytes_bitmask) / 4; - size_t two_bytes_count = count_ones(two_bytes_bitmask) / 4; - size_t three_bytes_count = count_ones(three_bytes_bitmask) / 4; - count += 16 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; + for (; pos + 8 <= length; pos += 8) { + __m256i in = + __lasx_xvld(reinterpret_cast(input + pos), 0); + __m256i ascii_bytes_bytemask = __lasx_xvslt_w(in, v_80); + __m256i one_two_bytes_bytemask = __lasx_xvslt_w(in, v_800); + __m256i two_bytes_bytemask = + __lasx_xvxor_v(one_two_bytes_bytemask, ascii_bytes_bytemask); + __m256i three_bytes_bytemask = + __lasx_xvxor_v(__lasx_xvslt_w(in, v_10000), one_two_bytes_bytemask); + + __m256i ascii_bytes = + __lasx_xvpcnt_w(__lasx_xvmskltz_w(ascii_bytes_bytemask)); + const uint32_t ascii_bytes_count = __lasx_xvpickve2gr_wu(ascii_bytes, 0) + + __lasx_xvpickve2gr_wu(ascii_bytes, 4); + __m256i two_bytes = __lasx_xvpcnt_w(__lasx_xvmskltz_w(two_bytes_bytemask)); + const uint32_t two_bytes_count = __lasx_xvpickve2gr_wu(two_bytes, 0) + + __lasx_xvpickve2gr_wu(two_bytes, 4); + __m256i three_bytes = + __lasx_xvpcnt_w(__lasx_xvmskltz_w(three_bytes_bytemask)); + const uint32_t three_bytes_count = __lasx_xvpickve2gr_wu(three_bytes, 0) + + __lasx_xvpickve2gr_wu(three_bytes, 4); + + count += + 32 - 3 * ascii_bytes_count - 2 * two_bytes_count - three_bytes_count; } return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); @@ -41083,17 +56246,14 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf32( simdutf_warn_unused size_t implementation::utf16_length_from_utf32( const char32_t *input, size_t length) const noexcept { - const __m128i v_00000000 = _mm_setzero_si128(); - const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); + __m128i v_ffff = __lsx_vldi(-2304); /*0x0000ffff*/ size_t pos = 0; size_t count = 0; for (; pos + 4 <= length; pos += 4) { - __m128i in = _mm_loadu_si128((__m128i *)(input + pos)); - const __m128i surrogate_bytemask = - _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000); - const uint16_t surrogate_bitmask = - static_cast(_mm_movemask_epi8(surrogate_bytemask)); - size_t surrogate_count = (16 - count_ones(surrogate_bitmask)) / 4; + __m128i in = __lsx_vld(reinterpret_cast(input + pos), 0); + __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); + size_t surrogate_count = __lsx_vpickve2gr_bu( + __lsx_vpcnt_b(__lsx_vmskltz_w(surrogate_bytemask)), 0); count += 4 + surrogate_count; } return count + @@ -41169,18 +56329,12 @@ size_t implementation::binary_to_base64(const char *input, size_t length, return encode_base64(output, input, length, options); } } -} // namespace westmere +} // namespace lasx } // namespace simdutf -/* begin file src/simdutf/westmere/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -/* end file src/simdutf/westmere/end.h */ -/* end file src/westmere/implementation.cpp */ +/* begin file src/simdutf/lasx/end.h */ +/* end file src/simdutf/lasx/end.h */ +/* end file src/lasx/implementation.cpp */ #endif SIMDUTF_POP_DISABLE_WARNINGS diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h index a219e96ad9cd9f..2d984f40e7bc3f 100644 --- a/deps/simdutf/simdutf.h +++ b/deps/simdutf/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-11-12 20:00:19 -0500. Do not edit! */ +/* auto-generated on 2024-12-10 14:54:53 -0500. Do not edit! */ /* begin file include/simdutf.h */ #ifndef SIMDUTF_H #define SIMDUTF_H @@ -155,11 +155,11 @@ // RISC-V 64-bit #define SIMDUTF_IS_RISCV64 1 - #if __clang_major__ >= 19 - // Does the compiler support target regions for RISC-V - #define SIMDUTF_HAS_RVV_TARGET_REGION 1 - #endif - + // #if __riscv_v_intrinsic >= 1000000 + // #define SIMDUTF_HAS_RVV_INTRINSICS 1 + // #define SIMDUTF_HAS_RVV_TARGET_REGION 1 + // #elif ... + // Check for special compiler versions that implement pre v1.0 intrinsics #if __riscv_v_intrinsic >= 11000 #define SIMDUTF_HAS_RVV_INTRINSICS 1 #endif @@ -178,7 +178,12 @@ #endif #elif defined(__loongarch_lp64) -// LoongArch 64-bit + #if defined(__loongarch_sx) && defined(__loongarch_asx) + #define SIMDUTF_IS_LSX 1 + #define SIMDUTF_IS_LASX 1 + #elif defined(__loongarch_sx) + #define SIMDUTF_IS_LSX 1 + #endif #else // The simdutf library is designed // for 64-bit processors and it seems that you are not @@ -670,7 +675,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "5.6.1" +#define SIMDUTF_VERSION "5.6.4" namespace simdutf { enum { @@ -685,7 +690,7 @@ enum { /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 1 + SIMDUTF_VERSION_REVISION = 4 }; } // namespace simdutf @@ -796,6 +801,8 @@ enum instruction_set { AVX512VPOPCNTDQ = 0x2000, RVV = 0x4000, ZVBB = 0x8000, + LSX = 0x40000, + LASX = 0x80000, }; #if defined(__PPC64__) @@ -987,6 +994,28 @@ static inline uint32_t detect_supported_architectures() { } return host_isa; } +#elif defined(__loongarch__) + #if defined(__linux__) + #include + // bits/hwcap.h + // #define HWCAP_LOONGARCH_LSX (1 << 4) + // #define HWCAP_LOONGARCH_LASX (1 << 5) + #endif + +static inline uint32_t detect_supported_architectures() { + uint32_t host_isa = instruction_set::DEFAULT; + #if defined(__linux__) + uint64_t hwcap = 0; + hwcap = getauxval(AT_HWCAP); + if (hwcap & HWCAP_LOONGARCH_LSX) { + host_isa |= instruction_set::LSX; + } + if (hwcap & HWCAP_LOONGARCH_LASX) { + host_isa |= instruction_set::LASX; + } + #endif + return host_isa; +} #else // fallback // includes 32-bit ARM. diff --git a/deps/sqlite/sqlite3.c b/deps/sqlite/sqlite3.c index 2886d04ae5263b..c748d033461fae 100644 --- a/deps/sqlite/sqlite3.c +++ b/deps/sqlite/sqlite3.c @@ -1,6 +1,6 @@ /****************************************************************************** ** This file is an amalgamation of many separate C source files from SQLite -** version 3.47.0. By combining all the individual C code files into this +** version 3.47.2. By combining all the individual C code files into this ** single large file, the entire code can be compiled as a single translation ** unit. This allows many compilers to do optimizations that would not be ** possible if the files were compiled separately. Performance improvements @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 03a9703e27c44437c39363d0baf82db4ebc9. +** 2aabe05e2e8cae4847a802ee2daddc1d7413. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -462,9 +462,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.47.0" -#define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-10-21 16:30:22 03a9703e27c44437c39363d0baf82db4ebc94538a0f28411c85dda156f82636e" +#define SQLITE_VERSION "3.47.2" +#define SQLITE_VERSION_NUMBER 3047002 +#define SQLITE_SOURCE_ID "2024-12-07 20:39:59 2aabe05e2e8cae4847a802ee2daddc1d7413d8fc560254d93ee3e72c14685b6c" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -968,6 +968,13 @@ SQLITE_API int sqlite3_exec( ** filesystem supports doing multiple write operations atomically when those ** write operations are bracketed by [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] and ** [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. +** +** The SQLITE_IOCAP_SUBPAGE_READ property means that it is ok to read +** from the database file in amounts that are not a multiple of the +** page size and that do not begin at a page boundary. Without this +** property, SQLite is careful to only do full-page reads and write +** on aligned pages, with the one exception that it will do a sub-page +** read of the first page to access the database header. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 @@ -984,6 +991,7 @@ SQLITE_API int sqlite3_exec( #define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 #define SQLITE_IOCAP_IMMUTABLE 0x00002000 #define SQLITE_IOCAP_BATCH_ATOMIC 0x00004000 +#define SQLITE_IOCAP_SUBPAGE_READ 0x00008000 /* ** CAPI3REF: File Locking Levels @@ -1130,6 +1138,7 @@ struct sqlite3_file { **
  • [SQLITE_IOCAP_POWERSAFE_OVERWRITE] **
  • [SQLITE_IOCAP_IMMUTABLE] **
  • [SQLITE_IOCAP_BATCH_ATOMIC] +**
  • [SQLITE_IOCAP_SUBPAGE_READ] ** ** ** The SQLITE_IOCAP_ATOMIC property means that all writes of @@ -32298,6 +32307,7 @@ SQLITE_PRIVATE void sqlite3RecordErrorOffsetOfExpr(sqlite3 *db, const Expr *pExp pExpr = pExpr->pLeft; } if( pExpr==0 ) return; + if( ExprHasProperty(pExpr, EP_FromDDL) ) return; db->errByteOffset = pExpr->w.iOfst; } @@ -35687,8 +35697,8 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en int eValid = 1; /* True exponent is either not used or is well-formed */ int nDigit = 0; /* Number of digits processed */ int eType = 1; /* 1: pure integer, 2+: fractional -1 or less: bad UTF16 */ + u64 s2; /* round-tripped significand */ double rr[2]; - u64 s2; assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); *pResult = 0.0; /* Default return value, in case of an error */ @@ -35791,7 +35801,7 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en e = (e*esign) + d; /* Try to adjust the exponent to make it smaller */ - while( e>0 && s<(LARGEST_UINT64/10) ){ + while( e>0 && s<((LARGEST_UINT64-0x7ff)/10) ){ s *= 10; e--; } @@ -35801,11 +35811,16 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en } rr[0] = (double)s; - s2 = (u64)rr[0]; -#if defined(_MSC_VER) && _MSC_VER<1700 - if( s2==0x8000000000000000LL ){ s2 = 2*(u64)(0.5*rr[0]); } -#endif - rr[1] = s>=s2 ? (double)(s - s2) : -(double)(s2 - s); + assert( sizeof(s2)==sizeof(rr[0]) ); + memcpy(&s2, &rr[0], sizeof(s2)); + if( s2<=0x43efffffffffffffLL ){ + s2 = (u64)rr[0]; + rr[1] = s>=s2 ? (double)(s - s2) : -(double)(s2 - s); + }else{ + rr[1] = 0.0; + } + assert( rr[1]<=1.0e-10*rr[0] ); /* Equal only when rr[0]==0.0 */ + if( e>0 ){ while( e>=100 ){ e -= 100; @@ -42591,6 +42606,7 @@ static void setDeviceCharacteristics(unixFile *pFd){ if( pFd->ctrlFlags & UNIXFILE_PSOW ){ pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; } + pFd->deviceCharacteristics |= SQLITE_IOCAP_SUBPAGE_READ; pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; } @@ -50391,7 +50407,7 @@ static int winSectorSize(sqlite3_file *id){ */ static int winDeviceCharacteristics(sqlite3_file *id){ winFile *p = (winFile*)id; - return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | + return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | SQLITE_IOCAP_SUBPAGE_READ | ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0); } @@ -51779,7 +51795,7 @@ static int winOpen( int rc = SQLITE_OK; /* Function Return Code */ #if !defined(NDEBUG) || SQLITE_OS_WINCE - int eType = flags&0xFFFFFF00; /* Type of file to open */ + int eType = flags&0x0FFF00; /* Type of file to open */ #endif int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); @@ -57999,18 +58015,26 @@ static const unsigned char aJournalMagic[] = { ** Return true if page pgno can be read directly from the database file ** by the b-tree layer. This is the case if: ** -** * the database file is open, -** * there are no dirty pages in the cache, and -** * the desired page is not currently in the wal file. +** (1) the database file is open +** (2) the VFS for the database is able to do unaligned sub-page reads +** (3) there are no dirty pages in the cache, and +** (4) the desired page is not currently in the wal file. */ SQLITE_PRIVATE int sqlite3PagerDirectReadOk(Pager *pPager, Pgno pgno){ - if( pPager->fd->pMethods==0 ) return 0; - if( sqlite3PCacheIsDirty(pPager->pPCache) ) return 0; + assert( pPager!=0 ); + assert( pPager->fd!=0 ); + if( pPager->fd->pMethods==0 ) return 0; /* Case (1) */ + assert( pPager->fd->pMethods->xDeviceCharacteristics!=0 ); + if( (pPager->fd->pMethods->xDeviceCharacteristics(pPager->fd) + & SQLITE_IOCAP_SUBPAGE_READ)==0 ){ + return 0; /* Case (2) */ + } + if( sqlite3PCacheIsDirty(pPager->pPCache) ) return 0; /* Failed (3) */ #ifndef SQLITE_OMIT_WAL if( pPager->pWal ){ u32 iRead = 0; (void)sqlite3WalFindFrame(pPager->pWal, pgno, &iRead); - return iRead==0; + return iRead==0; /* Condition (4) */ } #endif return 1; @@ -147586,32 +147610,32 @@ static Expr *substExpr( if( pSubst->isOuterJoin ){ ExprSetProperty(pNew, EP_CanBeNull); } - if( ExprHasProperty(pExpr,EP_OuterON|EP_InnerON) ){ - sqlite3SetJoinExpr(pNew, pExpr->w.iJoin, - pExpr->flags & (EP_OuterON|EP_InnerON)); - } - sqlite3ExprDelete(db, pExpr); - pExpr = pNew; - if( pExpr->op==TK_TRUEFALSE ){ - pExpr->u.iValue = sqlite3ExprTruthValue(pExpr); - pExpr->op = TK_INTEGER; - ExprSetProperty(pExpr, EP_IntValue); + if( pNew->op==TK_TRUEFALSE ){ + pNew->u.iValue = sqlite3ExprTruthValue(pNew); + pNew->op = TK_INTEGER; + ExprSetProperty(pNew, EP_IntValue); } /* Ensure that the expression now has an implicit collation sequence, ** just as it did when it was a column of a view or sub-query. */ { - CollSeq *pNat = sqlite3ExprCollSeq(pSubst->pParse, pExpr); + CollSeq *pNat = sqlite3ExprCollSeq(pSubst->pParse, pNew); CollSeq *pColl = sqlite3ExprCollSeq(pSubst->pParse, pSubst->pCList->a[iColumn].pExpr ); - if( pNat!=pColl || (pExpr->op!=TK_COLUMN && pExpr->op!=TK_COLLATE) ){ - pExpr = sqlite3ExprAddCollateString(pSubst->pParse, pExpr, + if( pNat!=pColl || (pNew->op!=TK_COLUMN && pNew->op!=TK_COLLATE) ){ + pNew = sqlite3ExprAddCollateString(pSubst->pParse, pNew, (pColl ? pColl->zName : "BINARY") ); } } - ExprClearProperty(pExpr, EP_Collate); + ExprClearProperty(pNew, EP_Collate); + if( ExprHasProperty(pExpr,EP_OuterON|EP_InnerON) ){ + sqlite3SetJoinExpr(pNew, pExpr->w.iJoin, + pExpr->flags & (EP_OuterON|EP_InnerON)); + } + sqlite3ExprDelete(db, pExpr); + pExpr = pNew; } } }else{ @@ -158939,6 +158963,7 @@ static Expr *removeUnindexableInClauseTerms( pNew->pLeft->x.pList = pLhs; } pSelect->pEList = pRhs; + pSelect->selId = ++pParse->nSelect; /* Req'd for SubrtnSig validity */ if( pLhs && pLhs->nExpr==1 ){ /* Take care here not to generate a TK_VECTOR containing only a ** single value. Since the parser never creates such a vector, some @@ -189798,10 +189823,15 @@ static int fts3PoslistPhraseMerge( if( *p1==POS_COLUMN ){ p1++; p1 += fts3GetVarint32(p1, &iCol1); + /* iCol1==0 indicates corruption. Column 0 does not have a POS_COLUMN + ** entry, so this is actually end-of-doclist. */ + if( iCol1==0 ) return 0; } if( *p2==POS_COLUMN ){ p2++; p2 += fts3GetVarint32(p2, &iCol2); + /* As above, iCol2==0 indicates corruption. */ + if( iCol2==0 ) return 0; } while( 1 ){ @@ -192972,7 +193002,7 @@ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ nTmp += p->pRight->pPhrase->doclist.nList; } nTmp += p->pPhrase->doclist.nList; - aTmp = sqlite3_malloc64(nTmp*2); + aTmp = sqlite3_malloc64(nTmp*2 + FTS3_VARINT_MAX); if( !aTmp ){ *pRc = SQLITE_NOMEM; res = 0; @@ -194525,10 +194555,11 @@ static int getNextString( Fts3PhraseToken *pToken; p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); - if( !p ) goto no_mem; - zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); - if( !zTemp ) goto no_mem; + if( !zTemp || !p ){ + rc = SQLITE_NOMEM; + goto getnextstring_out; + } assert( nToken==ii ); pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; @@ -194543,9 +194574,6 @@ static int getNextString( nToken = ii+1; } } - - pModule->xClose(pCursor); - pCursor = 0; } if( rc==SQLITE_DONE ){ @@ -194553,7 +194581,10 @@ static int getNextString( char *zBuf = 0; p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); - if( !p ) goto no_mem; + if( !p ){ + rc = SQLITE_NOMEM; + goto getnextstring_out; + } memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); p->eType = FTSQUERY_PHRASE; p->pPhrase = (Fts3Phrase *)&p[1]; @@ -194561,11 +194592,9 @@ static int getNextString( p->pPhrase->nToken = nToken; zBuf = (char *)&p->pPhrase->aToken[nToken]; + assert( nTemp==0 || zTemp ); if( zTemp ){ memcpy(zBuf, zTemp, nTemp); - sqlite3_free(zTemp); - }else{ - assert( nTemp==0 ); } for(jj=0; jjpPhrase->nToken; jj++){ @@ -194575,17 +194604,17 @@ static int getNextString( rc = SQLITE_OK; } - *ppExpr = p; - return rc; -no_mem: - + getnextstring_out: if( pCursor ){ pModule->xClose(pCursor); } sqlite3_free(zTemp); - sqlite3_free(p); - *ppExpr = 0; - return SQLITE_NOMEM; + if( rc!=SQLITE_OK ){ + sqlite3_free(p); + p = 0; + } + *ppExpr = p; + return rc; } /* @@ -232806,7 +232835,27 @@ SQLITE_API int sqlite3session_config(int op, void *pArg){ /************** End of sqlite3session.c **************************************/ /************** Begin file fts5.c ********************************************/ - +/* +** This, the "fts5.c" source file, is a composite file that is itself +** assembled from the following files: +** +** fts5.h +** fts5Int.h +** fts5parse.h <--- Generated from fts5parse.y by Lemon +** fts5parse.c <--- Generated from fts5parse.y by Lemon +** fts5_aux.c +** fts5_buffer.c +** fts5_config.c +** fts5_expr.c +** fts5_hash.c +** fts5_index.c +** fts5_main.c +** fts5_storage.c +** fts5_tokenize.c +** fts5_unicode2.c +** fts5_varint.c +** fts5_vocab.c +*/ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) @@ -232816,6 +232865,12 @@ SQLITE_API int sqlite3session_config(int op, void *pArg){ # undef NDEBUG #endif +#ifdef HAVE_STDINT_H +/* #include */ +#endif +#ifdef HAVE_INTTYPES_H +/* #include */ +#endif /* ** 2014 May 31 ** @@ -254888,7 +254943,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-10-21 16:30:22 03a9703e27c44437c39363d0baf82db4ebc94538a0f28411c85dda156f82636e", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-07 20:39:59 2aabe05e2e8cae4847a802ee2daddc1d7413d8fc560254d93ee3e72c14685b6c", -1, SQLITE_TRANSIENT); } /* @@ -260079,7 +260134,7 @@ static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ } - +/* Here ends the fts5.c composite file. */ #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */ /************** End of fts5.c ************************************************/ diff --git a/deps/sqlite/sqlite3.h b/deps/sqlite/sqlite3.h index eaffd1ec167ad4..d8ce1482a352af 100644 --- a/deps/sqlite/sqlite3.h +++ b/deps/sqlite/sqlite3.h @@ -146,9 +146,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.47.0" -#define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-10-21 16:30:22 03a9703e27c44437c39363d0baf82db4ebc94538a0f28411c85dda156f82636e" +#define SQLITE_VERSION "3.47.2" +#define SQLITE_VERSION_NUMBER 3047002 +#define SQLITE_SOURCE_ID "2024-12-07 20:39:59 2aabe05e2e8cae4847a802ee2daddc1d7413d8fc560254d93ee3e72c14685b6c" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -652,6 +652,13 @@ SQLITE_API int sqlite3_exec( ** filesystem supports doing multiple write operations atomically when those ** write operations are bracketed by [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] and ** [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. +** +** The SQLITE_IOCAP_SUBPAGE_READ property means that it is ok to read +** from the database file in amounts that are not a multiple of the +** page size and that do not begin at a page boundary. Without this +** property, SQLite is careful to only do full-page reads and write +** on aligned pages, with the one exception that it will do a sub-page +** read of the first page to access the database header. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 @@ -668,6 +675,7 @@ SQLITE_API int sqlite3_exec( #define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 #define SQLITE_IOCAP_IMMUTABLE 0x00002000 #define SQLITE_IOCAP_BATCH_ATOMIC 0x00004000 +#define SQLITE_IOCAP_SUBPAGE_READ 0x00008000 /* ** CAPI3REF: File Locking Levels @@ -814,6 +822,7 @@ struct sqlite3_file { **
  • [SQLITE_IOCAP_POWERSAFE_OVERWRITE] **
  • [SQLITE_IOCAP_IMMUTABLE] **
  • [SQLITE_IOCAP_BATCH_ATOMIC] +**
  • [SQLITE_IOCAP_SUBPAGE_READ] ** ** ** The SQLITE_IOCAP_ATOMIC property means that all writes of diff --git a/deps/sqlite/sqlite3ext.h b/deps/sqlite/sqlite3ext.h new file mode 100644 index 00000000000000..ae0949baf75ae8 --- /dev/null +++ b/deps/sqlite/sqlite3ext.h @@ -0,0 +1,719 @@ +/* +** 2006 June 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the SQLite interface for use by +** shared libraries that want to be imported as extensions into +** an SQLite instance. Shared libraries that intend to be loaded +** as extensions by SQLite should #include this file instead of +** sqlite3.h. +*/ +#ifndef SQLITE3EXT_H +#define SQLITE3EXT_H +#include "sqlite3.h" + +/* +** The following structure holds pointers to all of the SQLite API +** routines. +** +** WARNING: In order to maintain backwards compatibility, add new +** interfaces to the end of this structure only. If you insert new +** interfaces in the middle of this structure, then older different +** versions of SQLite will not be able to load each other's shared +** libraries! +*/ +struct sqlite3_api_routines { + void * (*aggregate_context)(sqlite3_context*,int nBytes); + int (*aggregate_count)(sqlite3_context*); + int (*bind_blob)(sqlite3_stmt*,int,const void*,int n,void(*)(void*)); + int (*bind_double)(sqlite3_stmt*,int,double); + int (*bind_int)(sqlite3_stmt*,int,int); + int (*bind_int64)(sqlite3_stmt*,int,sqlite_int64); + int (*bind_null)(sqlite3_stmt*,int); + int (*bind_parameter_count)(sqlite3_stmt*); + int (*bind_parameter_index)(sqlite3_stmt*,const char*zName); + const char * (*bind_parameter_name)(sqlite3_stmt*,int); + int (*bind_text)(sqlite3_stmt*,int,const char*,int n,void(*)(void*)); + int (*bind_text16)(sqlite3_stmt*,int,const void*,int,void(*)(void*)); + int (*bind_value)(sqlite3_stmt*,int,const sqlite3_value*); + int (*busy_handler)(sqlite3*,int(*)(void*,int),void*); + int (*busy_timeout)(sqlite3*,int ms); + int (*changes)(sqlite3*); + int (*close)(sqlite3*); + int (*collation_needed)(sqlite3*,void*,void(*)(void*,sqlite3*, + int eTextRep,const char*)); + int (*collation_needed16)(sqlite3*,void*,void(*)(void*,sqlite3*, + int eTextRep,const void*)); + const void * (*column_blob)(sqlite3_stmt*,int iCol); + int (*column_bytes)(sqlite3_stmt*,int iCol); + int (*column_bytes16)(sqlite3_stmt*,int iCol); + int (*column_count)(sqlite3_stmt*pStmt); + const char * (*column_database_name)(sqlite3_stmt*,int); + const void * (*column_database_name16)(sqlite3_stmt*,int); + const char * (*column_decltype)(sqlite3_stmt*,int i); + const void * (*column_decltype16)(sqlite3_stmt*,int); + double (*column_double)(sqlite3_stmt*,int iCol); + int (*column_int)(sqlite3_stmt*,int iCol); + sqlite_int64 (*column_int64)(sqlite3_stmt*,int iCol); + const char * (*column_name)(sqlite3_stmt*,int); + const void * (*column_name16)(sqlite3_stmt*,int); + const char * (*column_origin_name)(sqlite3_stmt*,int); + const void * (*column_origin_name16)(sqlite3_stmt*,int); + const char * (*column_table_name)(sqlite3_stmt*,int); + const void * (*column_table_name16)(sqlite3_stmt*,int); + const unsigned char * (*column_text)(sqlite3_stmt*,int iCol); + const void * (*column_text16)(sqlite3_stmt*,int iCol); + int (*column_type)(sqlite3_stmt*,int iCol); + sqlite3_value* (*column_value)(sqlite3_stmt*,int iCol); + void * (*commit_hook)(sqlite3*,int(*)(void*),void*); + int (*complete)(const char*sql); + int (*complete16)(const void*sql); + int (*create_collation)(sqlite3*,const char*,int,void*, + int(*)(void*,int,const void*,int,const void*)); + int (*create_collation16)(sqlite3*,const void*,int,void*, + int(*)(void*,int,const void*,int,const void*)); + int (*create_function)(sqlite3*,const char*,int,int,void*, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*)); + int (*create_function16)(sqlite3*,const void*,int,int,void*, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*)); + int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*); + int (*data_count)(sqlite3_stmt*pStmt); + sqlite3 * (*db_handle)(sqlite3_stmt*); + int (*declare_vtab)(sqlite3*,const char*); + int (*enable_shared_cache)(int); + int (*errcode)(sqlite3*db); + const char * (*errmsg)(sqlite3*); + const void * (*errmsg16)(sqlite3*); + int (*exec)(sqlite3*,const char*,sqlite3_callback,void*,char**); + int (*expired)(sqlite3_stmt*); + int (*finalize)(sqlite3_stmt*pStmt); + void (*free)(void*); + void (*free_table)(char**result); + int (*get_autocommit)(sqlite3*); + void * (*get_auxdata)(sqlite3_context*,int); + int (*get_table)(sqlite3*,const char*,char***,int*,int*,char**); + int (*global_recover)(void); + void (*interruptx)(sqlite3*); + sqlite_int64 (*last_insert_rowid)(sqlite3*); + const char * (*libversion)(void); + int (*libversion_number)(void); + void *(*malloc)(int); + char * (*mprintf)(const char*,...); + int (*open)(const char*,sqlite3**); + int (*open16)(const void*,sqlite3**); + int (*prepare)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); + int (*prepare16)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); + void * (*profile)(sqlite3*,void(*)(void*,const char*,sqlite_uint64),void*); + void (*progress_handler)(sqlite3*,int,int(*)(void*),void*); + void *(*realloc)(void*,int); + int (*reset)(sqlite3_stmt*pStmt); + void (*result_blob)(sqlite3_context*,const void*,int,void(*)(void*)); + void (*result_double)(sqlite3_context*,double); + void (*result_error)(sqlite3_context*,const char*,int); + void (*result_error16)(sqlite3_context*,const void*,int); + void (*result_int)(sqlite3_context*,int); + void (*result_int64)(sqlite3_context*,sqlite_int64); + void (*result_null)(sqlite3_context*); + void (*result_text)(sqlite3_context*,const char*,int,void(*)(void*)); + void (*result_text16)(sqlite3_context*,const void*,int,void(*)(void*)); + void (*result_text16be)(sqlite3_context*,const void*,int,void(*)(void*)); + void (*result_text16le)(sqlite3_context*,const void*,int,void(*)(void*)); + void (*result_value)(sqlite3_context*,sqlite3_value*); + void * (*rollback_hook)(sqlite3*,void(*)(void*),void*); + int (*set_authorizer)(sqlite3*,int(*)(void*,int,const char*,const char*, + const char*,const char*),void*); + void (*set_auxdata)(sqlite3_context*,int,void*,void (*)(void*)); + char * (*xsnprintf)(int,char*,const char*,...); + int (*step)(sqlite3_stmt*); + int (*table_column_metadata)(sqlite3*,const char*,const char*,const char*, + char const**,char const**,int*,int*,int*); + void (*thread_cleanup)(void); + int (*total_changes)(sqlite3*); + void * (*trace)(sqlite3*,void(*xTrace)(void*,const char*),void*); + int (*transfer_bindings)(sqlite3_stmt*,sqlite3_stmt*); + void * (*update_hook)(sqlite3*,void(*)(void*,int ,char const*,char const*, + sqlite_int64),void*); + void * (*user_data)(sqlite3_context*); + const void * (*value_blob)(sqlite3_value*); + int (*value_bytes)(sqlite3_value*); + int (*value_bytes16)(sqlite3_value*); + double (*value_double)(sqlite3_value*); + int (*value_int)(sqlite3_value*); + sqlite_int64 (*value_int64)(sqlite3_value*); + int (*value_numeric_type)(sqlite3_value*); + const unsigned char * (*value_text)(sqlite3_value*); + const void * (*value_text16)(sqlite3_value*); + const void * (*value_text16be)(sqlite3_value*); + const void * (*value_text16le)(sqlite3_value*); + int (*value_type)(sqlite3_value*); + char *(*vmprintf)(const char*,va_list); + /* Added ??? */ + int (*overload_function)(sqlite3*, const char *zFuncName, int nArg); + /* Added by 3.3.13 */ + int (*prepare_v2)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); + int (*prepare16_v2)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); + int (*clear_bindings)(sqlite3_stmt*); + /* Added by 3.4.1 */ + int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*, + void (*xDestroy)(void *)); + /* Added by 3.5.0 */ + int (*bind_zeroblob)(sqlite3_stmt*,int,int); + int (*blob_bytes)(sqlite3_blob*); + int (*blob_close)(sqlite3_blob*); + int (*blob_open)(sqlite3*,const char*,const char*,const char*,sqlite3_int64, + int,sqlite3_blob**); + int (*blob_read)(sqlite3_blob*,void*,int,int); + int (*blob_write)(sqlite3_blob*,const void*,int,int); + int (*create_collation_v2)(sqlite3*,const char*,int,void*, + int(*)(void*,int,const void*,int,const void*), + void(*)(void*)); + int (*file_control)(sqlite3*,const char*,int,void*); + sqlite3_int64 (*memory_highwater)(int); + sqlite3_int64 (*memory_used)(void); + sqlite3_mutex *(*mutex_alloc)(int); + void (*mutex_enter)(sqlite3_mutex*); + void (*mutex_free)(sqlite3_mutex*); + void (*mutex_leave)(sqlite3_mutex*); + int (*mutex_try)(sqlite3_mutex*); + int (*open_v2)(const char*,sqlite3**,int,const char*); + int (*release_memory)(int); + void (*result_error_nomem)(sqlite3_context*); + void (*result_error_toobig)(sqlite3_context*); + int (*sleep)(int); + void (*soft_heap_limit)(int); + sqlite3_vfs *(*vfs_find)(const char*); + int (*vfs_register)(sqlite3_vfs*,int); + int (*vfs_unregister)(sqlite3_vfs*); + int (*xthreadsafe)(void); + void (*result_zeroblob)(sqlite3_context*,int); + void (*result_error_code)(sqlite3_context*,int); + int (*test_control)(int, ...); + void (*randomness)(int,void*); + sqlite3 *(*context_db_handle)(sqlite3_context*); + int (*extended_result_codes)(sqlite3*,int); + int (*limit)(sqlite3*,int,int); + sqlite3_stmt *(*next_stmt)(sqlite3*,sqlite3_stmt*); + const char *(*sql)(sqlite3_stmt*); + int (*status)(int,int*,int*,int); + int (*backup_finish)(sqlite3_backup*); + sqlite3_backup *(*backup_init)(sqlite3*,const char*,sqlite3*,const char*); + int (*backup_pagecount)(sqlite3_backup*); + int (*backup_remaining)(sqlite3_backup*); + int (*backup_step)(sqlite3_backup*,int); + const char *(*compileoption_get)(int); + int (*compileoption_used)(const char*); + int (*create_function_v2)(sqlite3*,const char*,int,int,void*, + void (*xFunc)(sqlite3_context*,int,sqlite3_value**), + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*), + void(*xDestroy)(void*)); + int (*db_config)(sqlite3*,int,...); + sqlite3_mutex *(*db_mutex)(sqlite3*); + int (*db_status)(sqlite3*,int,int*,int*,int); + int (*extended_errcode)(sqlite3*); + void (*log)(int,const char*,...); + sqlite3_int64 (*soft_heap_limit64)(sqlite3_int64); + const char *(*sourceid)(void); + int (*stmt_status)(sqlite3_stmt*,int,int); + int (*strnicmp)(const char*,const char*,int); + int (*unlock_notify)(sqlite3*,void(*)(void**,int),void*); + int (*wal_autocheckpoint)(sqlite3*,int); + int (*wal_checkpoint)(sqlite3*,const char*); + void *(*wal_hook)(sqlite3*,int(*)(void*,sqlite3*,const char*,int),void*); + int (*blob_reopen)(sqlite3_blob*,sqlite3_int64); + int (*vtab_config)(sqlite3*,int op,...); + int (*vtab_on_conflict)(sqlite3*); + /* Version 3.7.16 and later */ + int (*close_v2)(sqlite3*); + const char *(*db_filename)(sqlite3*,const char*); + int (*db_readonly)(sqlite3*,const char*); + int (*db_release_memory)(sqlite3*); + const char *(*errstr)(int); + int (*stmt_busy)(sqlite3_stmt*); + int (*stmt_readonly)(sqlite3_stmt*); + int (*stricmp)(const char*,const char*); + int (*uri_boolean)(const char*,const char*,int); + sqlite3_int64 (*uri_int64)(const char*,const char*,sqlite3_int64); + const char *(*uri_parameter)(const char*,const char*); + char *(*xvsnprintf)(int,char*,const char*,va_list); + int (*wal_checkpoint_v2)(sqlite3*,const char*,int,int*,int*); + /* Version 3.8.7 and later */ + int (*auto_extension)(void(*)(void)); + int (*bind_blob64)(sqlite3_stmt*,int,const void*,sqlite3_uint64, + void(*)(void*)); + int (*bind_text64)(sqlite3_stmt*,int,const char*,sqlite3_uint64, + void(*)(void*),unsigned char); + int (*cancel_auto_extension)(void(*)(void)); + int (*load_extension)(sqlite3*,const char*,const char*,char**); + void *(*malloc64)(sqlite3_uint64); + sqlite3_uint64 (*msize)(void*); + void *(*realloc64)(void*,sqlite3_uint64); + void (*reset_auto_extension)(void); + void (*result_blob64)(sqlite3_context*,const void*,sqlite3_uint64, + void(*)(void*)); + void (*result_text64)(sqlite3_context*,const char*,sqlite3_uint64, + void(*)(void*), unsigned char); + int (*strglob)(const char*,const char*); + /* Version 3.8.11 and later */ + sqlite3_value *(*value_dup)(const sqlite3_value*); + void (*value_free)(sqlite3_value*); + int (*result_zeroblob64)(sqlite3_context*,sqlite3_uint64); + int (*bind_zeroblob64)(sqlite3_stmt*, int, sqlite3_uint64); + /* Version 3.9.0 and later */ + unsigned int (*value_subtype)(sqlite3_value*); + void (*result_subtype)(sqlite3_context*,unsigned int); + /* Version 3.10.0 and later */ + int (*status64)(int,sqlite3_int64*,sqlite3_int64*,int); + int (*strlike)(const char*,const char*,unsigned int); + int (*db_cacheflush)(sqlite3*); + /* Version 3.12.0 and later */ + int (*system_errno)(sqlite3*); + /* Version 3.14.0 and later */ + int (*trace_v2)(sqlite3*,unsigned,int(*)(unsigned,void*,void*,void*),void*); + char *(*expanded_sql)(sqlite3_stmt*); + /* Version 3.18.0 and later */ + void (*set_last_insert_rowid)(sqlite3*,sqlite3_int64); + /* Version 3.20.0 and later */ + int (*prepare_v3)(sqlite3*,const char*,int,unsigned int, + sqlite3_stmt**,const char**); + int (*prepare16_v3)(sqlite3*,const void*,int,unsigned int, + sqlite3_stmt**,const void**); + int (*bind_pointer)(sqlite3_stmt*,int,void*,const char*,void(*)(void*)); + void (*result_pointer)(sqlite3_context*,void*,const char*,void(*)(void*)); + void *(*value_pointer)(sqlite3_value*,const char*); + int (*vtab_nochange)(sqlite3_context*); + int (*value_nochange)(sqlite3_value*); + const char *(*vtab_collation)(sqlite3_index_info*,int); + /* Version 3.24.0 and later */ + int (*keyword_count)(void); + int (*keyword_name)(int,const char**,int*); + int (*keyword_check)(const char*,int); + sqlite3_str *(*str_new)(sqlite3*); + char *(*str_finish)(sqlite3_str*); + void (*str_appendf)(sqlite3_str*, const char *zFormat, ...); + void (*str_vappendf)(sqlite3_str*, const char *zFormat, va_list); + void (*str_append)(sqlite3_str*, const char *zIn, int N); + void (*str_appendall)(sqlite3_str*, const char *zIn); + void (*str_appendchar)(sqlite3_str*, int N, char C); + void (*str_reset)(sqlite3_str*); + int (*str_errcode)(sqlite3_str*); + int (*str_length)(sqlite3_str*); + char *(*str_value)(sqlite3_str*); + /* Version 3.25.0 and later */ + int (*create_window_function)(sqlite3*,const char*,int,int,void*, + void (*xStep)(sqlite3_context*,int,sqlite3_value**), + void (*xFinal)(sqlite3_context*), + void (*xValue)(sqlite3_context*), + void (*xInv)(sqlite3_context*,int,sqlite3_value**), + void(*xDestroy)(void*)); + /* Version 3.26.0 and later */ + const char *(*normalized_sql)(sqlite3_stmt*); + /* Version 3.28.0 and later */ + int (*stmt_isexplain)(sqlite3_stmt*); + int (*value_frombind)(sqlite3_value*); + /* Version 3.30.0 and later */ + int (*drop_modules)(sqlite3*,const char**); + /* Version 3.31.0 and later */ + sqlite3_int64 (*hard_heap_limit64)(sqlite3_int64); + const char *(*uri_key)(const char*,int); + const char *(*filename_database)(const char*); + const char *(*filename_journal)(const char*); + const char *(*filename_wal)(const char*); + /* Version 3.32.0 and later */ + const char *(*create_filename)(const char*,const char*,const char*, + int,const char**); + void (*free_filename)(const char*); + sqlite3_file *(*database_file_object)(const char*); + /* Version 3.34.0 and later */ + int (*txn_state)(sqlite3*,const char*); + /* Version 3.36.1 and later */ + sqlite3_int64 (*changes64)(sqlite3*); + sqlite3_int64 (*total_changes64)(sqlite3*); + /* Version 3.37.0 and later */ + int (*autovacuum_pages)(sqlite3*, + unsigned int(*)(void*,const char*,unsigned int,unsigned int,unsigned int), + void*, void(*)(void*)); + /* Version 3.38.0 and later */ + int (*error_offset)(sqlite3*); + int (*vtab_rhs_value)(sqlite3_index_info*,int,sqlite3_value**); + int (*vtab_distinct)(sqlite3_index_info*); + int (*vtab_in)(sqlite3_index_info*,int,int); + int (*vtab_in_first)(sqlite3_value*,sqlite3_value**); + int (*vtab_in_next)(sqlite3_value*,sqlite3_value**); + /* Version 3.39.0 and later */ + int (*deserialize)(sqlite3*,const char*,unsigned char*, + sqlite3_int64,sqlite3_int64,unsigned); + unsigned char *(*serialize)(sqlite3*,const char *,sqlite3_int64*, + unsigned int); + const char *(*db_name)(sqlite3*,int); + /* Version 3.40.0 and later */ + int (*value_encoding)(sqlite3_value*); + /* Version 3.41.0 and later */ + int (*is_interrupted)(sqlite3*); + /* Version 3.43.0 and later */ + int (*stmt_explain)(sqlite3_stmt*,int); + /* Version 3.44.0 and later */ + void *(*get_clientdata)(sqlite3*,const char*); + int (*set_clientdata)(sqlite3*, const char*, void*, void(*)(void*)); +}; + +/* +** This is the function signature used for all extension entry points. It +** is also defined in the file "loadext.c". +*/ +typedef int (*sqlite3_loadext_entry)( + sqlite3 *db, /* Handle to the database. */ + char **pzErrMsg, /* Used to set error string on failure. */ + const sqlite3_api_routines *pThunk /* Extension API function pointers. */ +); + +/* +** The following macros redefine the API routines so that they are +** redirected through the global sqlite3_api structure. +** +** This header file is also used by the loadext.c source file +** (part of the main SQLite library - not an extension) so that +** it can get access to the sqlite3_api_routines structure +** definition. But the main library does not want to redefine +** the API. So the redefinition macros are only valid if the +** SQLITE_CORE macros is undefined. +*/ +#if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) +#define sqlite3_aggregate_context sqlite3_api->aggregate_context +#ifndef SQLITE_OMIT_DEPRECATED +#define sqlite3_aggregate_count sqlite3_api->aggregate_count +#endif +#define sqlite3_bind_blob sqlite3_api->bind_blob +#define sqlite3_bind_double sqlite3_api->bind_double +#define sqlite3_bind_int sqlite3_api->bind_int +#define sqlite3_bind_int64 sqlite3_api->bind_int64 +#define sqlite3_bind_null sqlite3_api->bind_null +#define sqlite3_bind_parameter_count sqlite3_api->bind_parameter_count +#define sqlite3_bind_parameter_index sqlite3_api->bind_parameter_index +#define sqlite3_bind_parameter_name sqlite3_api->bind_parameter_name +#define sqlite3_bind_text sqlite3_api->bind_text +#define sqlite3_bind_text16 sqlite3_api->bind_text16 +#define sqlite3_bind_value sqlite3_api->bind_value +#define sqlite3_busy_handler sqlite3_api->busy_handler +#define sqlite3_busy_timeout sqlite3_api->busy_timeout +#define sqlite3_changes sqlite3_api->changes +#define sqlite3_close sqlite3_api->close +#define sqlite3_collation_needed sqlite3_api->collation_needed +#define sqlite3_collation_needed16 sqlite3_api->collation_needed16 +#define sqlite3_column_blob sqlite3_api->column_blob +#define sqlite3_column_bytes sqlite3_api->column_bytes +#define sqlite3_column_bytes16 sqlite3_api->column_bytes16 +#define sqlite3_column_count sqlite3_api->column_count +#define sqlite3_column_database_name sqlite3_api->column_database_name +#define sqlite3_column_database_name16 sqlite3_api->column_database_name16 +#define sqlite3_column_decltype sqlite3_api->column_decltype +#define sqlite3_column_decltype16 sqlite3_api->column_decltype16 +#define sqlite3_column_double sqlite3_api->column_double +#define sqlite3_column_int sqlite3_api->column_int +#define sqlite3_column_int64 sqlite3_api->column_int64 +#define sqlite3_column_name sqlite3_api->column_name +#define sqlite3_column_name16 sqlite3_api->column_name16 +#define sqlite3_column_origin_name sqlite3_api->column_origin_name +#define sqlite3_column_origin_name16 sqlite3_api->column_origin_name16 +#define sqlite3_column_table_name sqlite3_api->column_table_name +#define sqlite3_column_table_name16 sqlite3_api->column_table_name16 +#define sqlite3_column_text sqlite3_api->column_text +#define sqlite3_column_text16 sqlite3_api->column_text16 +#define sqlite3_column_type sqlite3_api->column_type +#define sqlite3_column_value sqlite3_api->column_value +#define sqlite3_commit_hook sqlite3_api->commit_hook +#define sqlite3_complete sqlite3_api->complete +#define sqlite3_complete16 sqlite3_api->complete16 +#define sqlite3_create_collation sqlite3_api->create_collation +#define sqlite3_create_collation16 sqlite3_api->create_collation16 +#define sqlite3_create_function sqlite3_api->create_function +#define sqlite3_create_function16 sqlite3_api->create_function16 +#define sqlite3_create_module sqlite3_api->create_module +#define sqlite3_create_module_v2 sqlite3_api->create_module_v2 +#define sqlite3_data_count sqlite3_api->data_count +#define sqlite3_db_handle sqlite3_api->db_handle +#define sqlite3_declare_vtab sqlite3_api->declare_vtab +#define sqlite3_enable_shared_cache sqlite3_api->enable_shared_cache +#define sqlite3_errcode sqlite3_api->errcode +#define sqlite3_errmsg sqlite3_api->errmsg +#define sqlite3_errmsg16 sqlite3_api->errmsg16 +#define sqlite3_exec sqlite3_api->exec +#ifndef SQLITE_OMIT_DEPRECATED +#define sqlite3_expired sqlite3_api->expired +#endif +#define sqlite3_finalize sqlite3_api->finalize +#define sqlite3_free sqlite3_api->free +#define sqlite3_free_table sqlite3_api->free_table +#define sqlite3_get_autocommit sqlite3_api->get_autocommit +#define sqlite3_get_auxdata sqlite3_api->get_auxdata +#define sqlite3_get_table sqlite3_api->get_table +#ifndef SQLITE_OMIT_DEPRECATED +#define sqlite3_global_recover sqlite3_api->global_recover +#endif +#define sqlite3_interrupt sqlite3_api->interruptx +#define sqlite3_last_insert_rowid sqlite3_api->last_insert_rowid +#define sqlite3_libversion sqlite3_api->libversion +#define sqlite3_libversion_number sqlite3_api->libversion_number +#define sqlite3_malloc sqlite3_api->malloc +#define sqlite3_mprintf sqlite3_api->mprintf +#define sqlite3_open sqlite3_api->open +#define sqlite3_open16 sqlite3_api->open16 +#define sqlite3_prepare sqlite3_api->prepare +#define sqlite3_prepare16 sqlite3_api->prepare16 +#define sqlite3_prepare_v2 sqlite3_api->prepare_v2 +#define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 +#define sqlite3_profile sqlite3_api->profile +#define sqlite3_progress_handler sqlite3_api->progress_handler +#define sqlite3_realloc sqlite3_api->realloc +#define sqlite3_reset sqlite3_api->reset +#define sqlite3_result_blob sqlite3_api->result_blob +#define sqlite3_result_double sqlite3_api->result_double +#define sqlite3_result_error sqlite3_api->result_error +#define sqlite3_result_error16 sqlite3_api->result_error16 +#define sqlite3_result_int sqlite3_api->result_int +#define sqlite3_result_int64 sqlite3_api->result_int64 +#define sqlite3_result_null sqlite3_api->result_null +#define sqlite3_result_text sqlite3_api->result_text +#define sqlite3_result_text16 sqlite3_api->result_text16 +#define sqlite3_result_text16be sqlite3_api->result_text16be +#define sqlite3_result_text16le sqlite3_api->result_text16le +#define sqlite3_result_value sqlite3_api->result_value +#define sqlite3_rollback_hook sqlite3_api->rollback_hook +#define sqlite3_set_authorizer sqlite3_api->set_authorizer +#define sqlite3_set_auxdata sqlite3_api->set_auxdata +#define sqlite3_snprintf sqlite3_api->xsnprintf +#define sqlite3_step sqlite3_api->step +#define sqlite3_table_column_metadata sqlite3_api->table_column_metadata +#define sqlite3_thread_cleanup sqlite3_api->thread_cleanup +#define sqlite3_total_changes sqlite3_api->total_changes +#define sqlite3_trace sqlite3_api->trace +#ifndef SQLITE_OMIT_DEPRECATED +#define sqlite3_transfer_bindings sqlite3_api->transfer_bindings +#endif +#define sqlite3_update_hook sqlite3_api->update_hook +#define sqlite3_user_data sqlite3_api->user_data +#define sqlite3_value_blob sqlite3_api->value_blob +#define sqlite3_value_bytes sqlite3_api->value_bytes +#define sqlite3_value_bytes16 sqlite3_api->value_bytes16 +#define sqlite3_value_double sqlite3_api->value_double +#define sqlite3_value_int sqlite3_api->value_int +#define sqlite3_value_int64 sqlite3_api->value_int64 +#define sqlite3_value_numeric_type sqlite3_api->value_numeric_type +#define sqlite3_value_text sqlite3_api->value_text +#define sqlite3_value_text16 sqlite3_api->value_text16 +#define sqlite3_value_text16be sqlite3_api->value_text16be +#define sqlite3_value_text16le sqlite3_api->value_text16le +#define sqlite3_value_type sqlite3_api->value_type +#define sqlite3_vmprintf sqlite3_api->vmprintf +#define sqlite3_vsnprintf sqlite3_api->xvsnprintf +#define sqlite3_overload_function sqlite3_api->overload_function +#define sqlite3_prepare_v2 sqlite3_api->prepare_v2 +#define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 +#define sqlite3_clear_bindings sqlite3_api->clear_bindings +#define sqlite3_bind_zeroblob sqlite3_api->bind_zeroblob +#define sqlite3_blob_bytes sqlite3_api->blob_bytes +#define sqlite3_blob_close sqlite3_api->blob_close +#define sqlite3_blob_open sqlite3_api->blob_open +#define sqlite3_blob_read sqlite3_api->blob_read +#define sqlite3_blob_write sqlite3_api->blob_write +#define sqlite3_create_collation_v2 sqlite3_api->create_collation_v2 +#define sqlite3_file_control sqlite3_api->file_control +#define sqlite3_memory_highwater sqlite3_api->memory_highwater +#define sqlite3_memory_used sqlite3_api->memory_used +#define sqlite3_mutex_alloc sqlite3_api->mutex_alloc +#define sqlite3_mutex_enter sqlite3_api->mutex_enter +#define sqlite3_mutex_free sqlite3_api->mutex_free +#define sqlite3_mutex_leave sqlite3_api->mutex_leave +#define sqlite3_mutex_try sqlite3_api->mutex_try +#define sqlite3_open_v2 sqlite3_api->open_v2 +#define sqlite3_release_memory sqlite3_api->release_memory +#define sqlite3_result_error_nomem sqlite3_api->result_error_nomem +#define sqlite3_result_error_toobig sqlite3_api->result_error_toobig +#define sqlite3_sleep sqlite3_api->sleep +#define sqlite3_soft_heap_limit sqlite3_api->soft_heap_limit +#define sqlite3_vfs_find sqlite3_api->vfs_find +#define sqlite3_vfs_register sqlite3_api->vfs_register +#define sqlite3_vfs_unregister sqlite3_api->vfs_unregister +#define sqlite3_threadsafe sqlite3_api->xthreadsafe +#define sqlite3_result_zeroblob sqlite3_api->result_zeroblob +#define sqlite3_result_error_code sqlite3_api->result_error_code +#define sqlite3_test_control sqlite3_api->test_control +#define sqlite3_randomness sqlite3_api->randomness +#define sqlite3_context_db_handle sqlite3_api->context_db_handle +#define sqlite3_extended_result_codes sqlite3_api->extended_result_codes +#define sqlite3_limit sqlite3_api->limit +#define sqlite3_next_stmt sqlite3_api->next_stmt +#define sqlite3_sql sqlite3_api->sql +#define sqlite3_status sqlite3_api->status +#define sqlite3_backup_finish sqlite3_api->backup_finish +#define sqlite3_backup_init sqlite3_api->backup_init +#define sqlite3_backup_pagecount sqlite3_api->backup_pagecount +#define sqlite3_backup_remaining sqlite3_api->backup_remaining +#define sqlite3_backup_step sqlite3_api->backup_step +#define sqlite3_compileoption_get sqlite3_api->compileoption_get +#define sqlite3_compileoption_used sqlite3_api->compileoption_used +#define sqlite3_create_function_v2 sqlite3_api->create_function_v2 +#define sqlite3_db_config sqlite3_api->db_config +#define sqlite3_db_mutex sqlite3_api->db_mutex +#define sqlite3_db_status sqlite3_api->db_status +#define sqlite3_extended_errcode sqlite3_api->extended_errcode +#define sqlite3_log sqlite3_api->log +#define sqlite3_soft_heap_limit64 sqlite3_api->soft_heap_limit64 +#define sqlite3_sourceid sqlite3_api->sourceid +#define sqlite3_stmt_status sqlite3_api->stmt_status +#define sqlite3_strnicmp sqlite3_api->strnicmp +#define sqlite3_unlock_notify sqlite3_api->unlock_notify +#define sqlite3_wal_autocheckpoint sqlite3_api->wal_autocheckpoint +#define sqlite3_wal_checkpoint sqlite3_api->wal_checkpoint +#define sqlite3_wal_hook sqlite3_api->wal_hook +#define sqlite3_blob_reopen sqlite3_api->blob_reopen +#define sqlite3_vtab_config sqlite3_api->vtab_config +#define sqlite3_vtab_on_conflict sqlite3_api->vtab_on_conflict +/* Version 3.7.16 and later */ +#define sqlite3_close_v2 sqlite3_api->close_v2 +#define sqlite3_db_filename sqlite3_api->db_filename +#define sqlite3_db_readonly sqlite3_api->db_readonly +#define sqlite3_db_release_memory sqlite3_api->db_release_memory +#define sqlite3_errstr sqlite3_api->errstr +#define sqlite3_stmt_busy sqlite3_api->stmt_busy +#define sqlite3_stmt_readonly sqlite3_api->stmt_readonly +#define sqlite3_stricmp sqlite3_api->stricmp +#define sqlite3_uri_boolean sqlite3_api->uri_boolean +#define sqlite3_uri_int64 sqlite3_api->uri_int64 +#define sqlite3_uri_parameter sqlite3_api->uri_parameter +#define sqlite3_uri_vsnprintf sqlite3_api->xvsnprintf +#define sqlite3_wal_checkpoint_v2 sqlite3_api->wal_checkpoint_v2 +/* Version 3.8.7 and later */ +#define sqlite3_auto_extension sqlite3_api->auto_extension +#define sqlite3_bind_blob64 sqlite3_api->bind_blob64 +#define sqlite3_bind_text64 sqlite3_api->bind_text64 +#define sqlite3_cancel_auto_extension sqlite3_api->cancel_auto_extension +#define sqlite3_load_extension sqlite3_api->load_extension +#define sqlite3_malloc64 sqlite3_api->malloc64 +#define sqlite3_msize sqlite3_api->msize +#define sqlite3_realloc64 sqlite3_api->realloc64 +#define sqlite3_reset_auto_extension sqlite3_api->reset_auto_extension +#define sqlite3_result_blob64 sqlite3_api->result_blob64 +#define sqlite3_result_text64 sqlite3_api->result_text64 +#define sqlite3_strglob sqlite3_api->strglob +/* Version 3.8.11 and later */ +#define sqlite3_value_dup sqlite3_api->value_dup +#define sqlite3_value_free sqlite3_api->value_free +#define sqlite3_result_zeroblob64 sqlite3_api->result_zeroblob64 +#define sqlite3_bind_zeroblob64 sqlite3_api->bind_zeroblob64 +/* Version 3.9.0 and later */ +#define sqlite3_value_subtype sqlite3_api->value_subtype +#define sqlite3_result_subtype sqlite3_api->result_subtype +/* Version 3.10.0 and later */ +#define sqlite3_status64 sqlite3_api->status64 +#define sqlite3_strlike sqlite3_api->strlike +#define sqlite3_db_cacheflush sqlite3_api->db_cacheflush +/* Version 3.12.0 and later */ +#define sqlite3_system_errno sqlite3_api->system_errno +/* Version 3.14.0 and later */ +#define sqlite3_trace_v2 sqlite3_api->trace_v2 +#define sqlite3_expanded_sql sqlite3_api->expanded_sql +/* Version 3.18.0 and later */ +#define sqlite3_set_last_insert_rowid sqlite3_api->set_last_insert_rowid +/* Version 3.20.0 and later */ +#define sqlite3_prepare_v3 sqlite3_api->prepare_v3 +#define sqlite3_prepare16_v3 sqlite3_api->prepare16_v3 +#define sqlite3_bind_pointer sqlite3_api->bind_pointer +#define sqlite3_result_pointer sqlite3_api->result_pointer +#define sqlite3_value_pointer sqlite3_api->value_pointer +/* Version 3.22.0 and later */ +#define sqlite3_vtab_nochange sqlite3_api->vtab_nochange +#define sqlite3_value_nochange sqlite3_api->value_nochange +#define sqlite3_vtab_collation sqlite3_api->vtab_collation +/* Version 3.24.0 and later */ +#define sqlite3_keyword_count sqlite3_api->keyword_count +#define sqlite3_keyword_name sqlite3_api->keyword_name +#define sqlite3_keyword_check sqlite3_api->keyword_check +#define sqlite3_str_new sqlite3_api->str_new +#define sqlite3_str_finish sqlite3_api->str_finish +#define sqlite3_str_appendf sqlite3_api->str_appendf +#define sqlite3_str_vappendf sqlite3_api->str_vappendf +#define sqlite3_str_append sqlite3_api->str_append +#define sqlite3_str_appendall sqlite3_api->str_appendall +#define sqlite3_str_appendchar sqlite3_api->str_appendchar +#define sqlite3_str_reset sqlite3_api->str_reset +#define sqlite3_str_errcode sqlite3_api->str_errcode +#define sqlite3_str_length sqlite3_api->str_length +#define sqlite3_str_value sqlite3_api->str_value +/* Version 3.25.0 and later */ +#define sqlite3_create_window_function sqlite3_api->create_window_function +/* Version 3.26.0 and later */ +#define sqlite3_normalized_sql sqlite3_api->normalized_sql +/* Version 3.28.0 and later */ +#define sqlite3_stmt_isexplain sqlite3_api->stmt_isexplain +#define sqlite3_value_frombind sqlite3_api->value_frombind +/* Version 3.30.0 and later */ +#define sqlite3_drop_modules sqlite3_api->drop_modules +/* Version 3.31.0 and later */ +#define sqlite3_hard_heap_limit64 sqlite3_api->hard_heap_limit64 +#define sqlite3_uri_key sqlite3_api->uri_key +#define sqlite3_filename_database sqlite3_api->filename_database +#define sqlite3_filename_journal sqlite3_api->filename_journal +#define sqlite3_filename_wal sqlite3_api->filename_wal +/* Version 3.32.0 and later */ +#define sqlite3_create_filename sqlite3_api->create_filename +#define sqlite3_free_filename sqlite3_api->free_filename +#define sqlite3_database_file_object sqlite3_api->database_file_object +/* Version 3.34.0 and later */ +#define sqlite3_txn_state sqlite3_api->txn_state +/* Version 3.36.1 and later */ +#define sqlite3_changes64 sqlite3_api->changes64 +#define sqlite3_total_changes64 sqlite3_api->total_changes64 +/* Version 3.37.0 and later */ +#define sqlite3_autovacuum_pages sqlite3_api->autovacuum_pages +/* Version 3.38.0 and later */ +#define sqlite3_error_offset sqlite3_api->error_offset +#define sqlite3_vtab_rhs_value sqlite3_api->vtab_rhs_value +#define sqlite3_vtab_distinct sqlite3_api->vtab_distinct +#define sqlite3_vtab_in sqlite3_api->vtab_in +#define sqlite3_vtab_in_first sqlite3_api->vtab_in_first +#define sqlite3_vtab_in_next sqlite3_api->vtab_in_next +/* Version 3.39.0 and later */ +#ifndef SQLITE_OMIT_DESERIALIZE +#define sqlite3_deserialize sqlite3_api->deserialize +#define sqlite3_serialize sqlite3_api->serialize +#endif +#define sqlite3_db_name sqlite3_api->db_name +/* Version 3.40.0 and later */ +#define sqlite3_value_encoding sqlite3_api->value_encoding +/* Version 3.41.0 and later */ +#define sqlite3_is_interrupted sqlite3_api->is_interrupted +/* Version 3.43.0 and later */ +#define sqlite3_stmt_explain sqlite3_api->stmt_explain +/* Version 3.44.0 and later */ +#define sqlite3_get_clientdata sqlite3_api->get_clientdata +#define sqlite3_set_clientdata sqlite3_api->set_clientdata +#endif /* !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) */ + +#if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) + /* This case when the file really is being compiled as a loadable + ** extension */ +# define SQLITE_EXTENSION_INIT1 const sqlite3_api_routines *sqlite3_api=0; +# define SQLITE_EXTENSION_INIT2(v) sqlite3_api=v; +# define SQLITE_EXTENSION_INIT3 \ + extern const sqlite3_api_routines *sqlite3_api; +#else + /* This case when the file is being statically linked into the + ** application */ +# define SQLITE_EXTENSION_INIT1 /*no-op*/ +# define SQLITE_EXTENSION_INIT2(v) (void)v; /* unused parameter */ +# define SQLITE_EXTENSION_INIT3 /*no-op*/ +#endif + +#endif /* SQLITE3EXT_H */ diff --git a/deps/sqlite/unofficial.gni b/deps/sqlite/unofficial.gni index 6eda916ad23e07..b26e1335eac339 100644 --- a/deps/sqlite/unofficial.gni +++ b/deps/sqlite/unofficial.gni @@ -7,6 +7,10 @@ template("sqlite_gn_build") { config("sqlite_config") { include_dirs = [ "." ] + defines = [ + "SQLITE_ENABLE_SESSION", + "SQLITE_ENABLE_PREUPDATE_HOOK", + ] } gypi_values = exec_script("../../tools/gypi_to_gn.py", diff --git a/deps/uv/AUTHORS b/deps/uv/AUTHORS index 807440b30e8488..041b7aff610f57 100644 --- a/deps/uv/AUTHORS +++ b/deps/uv/AUTHORS @@ -588,3 +588,5 @@ Raihaan Shouhell Rialbat Adam Poul T Lomholt +dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> +Thad House diff --git a/deps/uv/ChangeLog b/deps/uv/ChangeLog index e1d1aa32989124..dc2dd2790c57d3 100644 --- a/deps/uv/ChangeLog +++ b/deps/uv/ChangeLog @@ -1,4 +1,22 @@ -2024.10.11, Version 1.49.1 (Stable) +2024.10.18, Version 1.49.2 (Stable) + +Changes since version 1.49.1: + +* win,fs: remove trailing slash in junctions (Hüseyin Açacak) + +* Revert "linux: eliminate a read on eventfd per wakeup" (Ben Noordhuis) + +* win: Fix linked list logic in getaddrinfo (Thad House) + +* win: fix compilation against Windows 24H2 SDK (Thad House) + +* win: remap ERROR_NOACCESS and ERROR_BUFFER_OVERFLOW (Jameson Nash) + +* win,fs: match trailing slash presence in junctions to user input (Jameson + Nash) + + +2024.10.11, Version 1.49.1 (Stable), 8be336f4ee296d20e1c071a44d6adf279e202236 Changes since version 1.49.0: diff --git a/deps/uv/configure.ac b/deps/uv/configure.ac index e3ee8a840c6872..98c59363026f86 100644 --- a/deps/uv/configure.ac +++ b/deps/uv/configure.ac @@ -13,7 +13,7 @@ # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. AC_PREREQ(2.57) -AC_INIT([libuv], [1.49.1], [https://github.com/libuv/libuv/issues]) +AC_INIT([libuv], [1.49.2], [https://github.com/libuv/libuv/issues]) AC_CONFIG_MACRO_DIR([m4]) m4_include([m4/libuv-extra-automake-flags.m4]) m4_include([m4/as_case.m4]) diff --git a/deps/uv/include/uv/version.h b/deps/uv/include/uv/version.h index 77a8b2541749f9..cfa7871322e690 100644 --- a/deps/uv/include/uv/version.h +++ b/deps/uv/include/uv/version.h @@ -32,7 +32,7 @@ #define UV_VERSION_MAJOR 1 #define UV_VERSION_MINOR 49 -#define UV_VERSION_PATCH 1 +#define UV_VERSION_PATCH 2 #define UV_VERSION_IS_RELEASE 1 #define UV_VERSION_SUFFIX "" diff --git a/deps/uv/src/unix/async.c b/deps/uv/src/unix/async.c index bc97ec54c4fcc6..0ff2669e30a628 100644 --- a/deps/uv/src/unix/async.c +++ b/deps/uv/src/unix/async.c @@ -38,34 +38,6 @@ #include #endif -#if UV__KQUEUE_EVFILT_USER -static uv_once_t kqueue_runtime_detection_guard = UV_ONCE_INIT; -static int kqueue_evfilt_user_support = 1; - - -static void uv__kqueue_runtime_detection(void) { - int kq; - struct kevent ev[2]; - struct timespec timeout = {0, 0}; - - /* Perform the runtime detection to ensure that kqueue with - * EVFILT_USER actually works. */ - kq = kqueue(); - EV_SET(ev, UV__KQUEUE_EVFILT_USER_IDENT, EVFILT_USER, - EV_ADD | EV_CLEAR, 0, 0, 0); - EV_SET(ev + 1, UV__KQUEUE_EVFILT_USER_IDENT, EVFILT_USER, - 0, NOTE_TRIGGER, 0, 0); - if (kevent(kq, ev, 2, ev, 1, &timeout) < 1 || - ev[0].filter != EVFILT_USER || - ev[0].ident != UV__KQUEUE_EVFILT_USER_IDENT || - ev[0].flags & EV_ERROR) - /* If we wind up here, we can assume that EVFILT_USER is defined but - * broken on the current system. */ - kqueue_evfilt_user_support = 0; - uv__close(kq); -} -#endif - static void uv__async_send(uv_loop_t* loop); static int uv__async_start(uv_loop_t* loop); static void uv__cpu_relax(void); @@ -158,10 +130,8 @@ void uv__async_close(uv_async_t* handle) { static void uv__async_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) { -#ifndef __linux__ char buf[1024]; ssize_t r; -#endif struct uv__queue queue; struct uv__queue* q; uv_async_t* h; @@ -169,12 +139,7 @@ static void uv__async_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) { assert(w == &loop->async_io_watcher); -#ifndef __linux__ -#if UV__KQUEUE_EVFILT_USER - for (;!kqueue_evfilt_user_support;) { -#else for (;;) { -#endif r = read(w->fd, buf, sizeof(buf)); if (r == sizeof(buf)) @@ -191,7 +156,6 @@ static void uv__async_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) { abort(); } -#endif /* !__linux__ */ uv__queue_move(&loop->async_handles, &queue); while (!uv__queue_empty(&queue)) { @@ -215,58 +179,34 @@ static void uv__async_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) { static void uv__async_send(uv_loop_t* loop) { + const void* buf; + ssize_t len; int fd; - ssize_t r; -#ifdef __linux__ - uint64_t val; - - fd = loop->async_io_watcher.fd; /* eventfd */ - for (val = 1; /* empty */; val = 1) { - r = write(fd, &val, sizeof(uint64_t)); - if (r < 0) { - /* When EAGAIN occurs, the eventfd counter hits the maximum value of the unsigned 64-bit. - * We need to first drain the eventfd and then write again. - * - * Check out https://man7.org/linux/man-pages/man2/eventfd.2.html for details. - */ - if (errno == EAGAIN) { - /* It's ready to retry. */ - if (read(fd, &val, sizeof(uint64_t)) > 0 || errno == EAGAIN) { - continue; - } - } - /* Unknown error occurs. */ - break; - } - return; - } -#else -#if UV__KQUEUE_EVFILT_USER - struct kevent ev; - - if (kqueue_evfilt_user_support) { - fd = loop->async_io_watcher.fd; /* magic number for EVFILT_USER */ - EV_SET(&ev, fd, EVFILT_USER, 0, NOTE_TRIGGER, 0, 0); - r = kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL); - if (r == 0) - return; - else - abort(); + int r; + + buf = ""; + len = 1; + fd = loop->async_wfd; + +#if defined(__linux__) + if (fd == -1) { + static const uint64_t val = 1; + buf = &val; + len = sizeof(val); + fd = loop->async_io_watcher.fd; /* eventfd */ } #endif - fd = loop->async_wfd; /* write end of the pipe */ do - r = write(fd, "x", 1); + r = write(fd, buf, len); while (r == -1 && errno == EINTR); - if (r == 1) + if (r == len) return; if (r == -1) if (errno == EAGAIN || errno == EWOULDBLOCK) return; -#endif abort(); } @@ -275,9 +215,6 @@ static void uv__async_send(uv_loop_t* loop) { static int uv__async_start(uv_loop_t* loop) { int pipefd[2]; int err; -#if UV__KQUEUE_EVFILT_USER - struct kevent ev; -#endif if (loop->async_io_watcher.fd != -1) return 0; @@ -289,36 +226,6 @@ static int uv__async_start(uv_loop_t* loop) { pipefd[0] = err; pipefd[1] = -1; -#elif UV__KQUEUE_EVFILT_USER - uv_once(&kqueue_runtime_detection_guard, uv__kqueue_runtime_detection); - if (kqueue_evfilt_user_support) { - /* In order not to break the generic pattern of I/O polling, a valid - * file descriptor is required to take up a room in loop->watchers, - * thus we create one for that, but this fd will not be actually used, - * it's just a placeholder and magic number which is going to be closed - * during the cleanup, as other FDs. */ - err = uv__open_cloexec("/dev/null", O_RDONLY); - if (err < 0) - return err; - - pipefd[0] = err; - pipefd[1] = -1; - - /* When using EVFILT_USER event to wake up the kqueue, this event must be - * registered beforehand. Otherwise, calling kevent() to issue an - * unregistered EVFILT_USER event will get an ENOENT. - * Since uv__async_send() may happen before uv__io_poll() with multi-threads, - * we can't defer this registration of EVFILT_USER event as we did for other - * events, but must perform it right away. */ - EV_SET(&ev, err, EVFILT_USER, EV_ADD | EV_CLEAR, 0, 0, 0); - err = kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL); - if (err < 0) - return UV__ERR(errno); - } else { - err = uv__make_pipe(pipefd, UV_NONBLOCK_PIPE); - if (err < 0) - return err; - } #else err = uv__make_pipe(pipefd, UV_NONBLOCK_PIPE); if (err < 0) @@ -329,13 +236,6 @@ static int uv__async_start(uv_loop_t* loop) { uv__io_start(loop, &loop->async_io_watcher, POLLIN); loop->async_wfd = pipefd[1]; -#if UV__KQUEUE_EVFILT_USER - /* Prevent the EVFILT_USER event from being added to kqueue redundantly - * and mistakenly later in uv__io_poll(). */ - if (kqueue_evfilt_user_support) - loop->async_io_watcher.events = loop->async_io_watcher.pevents; -#endif - return 0; } diff --git a/deps/uv/src/unix/internal.h b/deps/uv/src/unix/internal.h index 568a55b55acb35..8d586b0b64a96c 100644 --- a/deps/uv/src/unix/internal.h +++ b/deps/uv/src/unix/internal.h @@ -35,10 +35,6 @@ #include #include #include -#if defined(__APPLE__) || defined(__DragonFly__) || \ - defined(__FreeBSD__) || defined(__NetBSD__) -#include -#endif #define uv__msan_unpoison(p, n) \ do { \ @@ -508,22 +504,4 @@ int uv__get_constrained_cpu(uv__cpu_constraint* constraint); #endif #endif -#if defined(EVFILT_USER) && defined(NOTE_TRIGGER) -/* EVFILT_USER is available since OS X 10.6, DragonFlyBSD 4.0, - * FreeBSD 8.1, and NetBSD 10.0. - * - * Note that even though EVFILT_USER is defined on the current system, - * it may still fail to work at runtime somehow. In that case, we fall - * back to pipe-based signaling. - */ -#define UV__KQUEUE_EVFILT_USER 1 -/* Magic number of identifier used for EVFILT_USER during runtime detection. - * There are no Google hits for this number when I create it. That way, - * people will be directed here if this number gets printed due to some - * kqueue error and they google for help. */ -#define UV__KQUEUE_EVFILT_USER_IDENT 0x1e7e7711 -#else -#define UV__KQUEUE_EVFILT_USER 0 -#endif - #endif /* UV_UNIX_INTERNAL_H_ */ diff --git a/deps/uv/src/unix/kqueue.c b/deps/uv/src/unix/kqueue.c index 876b717086c609..66aa166f053f52 100644 --- a/deps/uv/src/unix/kqueue.c +++ b/deps/uv/src/unix/kqueue.c @@ -367,17 +367,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { continue; } -#if UV__KQUEUE_EVFILT_USER - if (ev->filter == EVFILT_USER) { - w = &loop->async_io_watcher; - assert(fd == w->fd); - uv__metrics_update_idle_time(loop); - w->cb(loop, w, w->events); - nevents++; - continue; - } -#endif - if (ev->filter == EVFILT_VNODE) { assert(w->events == POLLIN); assert(w->pevents == POLLIN); diff --git a/deps/uv/src/unix/linux.c b/deps/uv/src/unix/linux.c index 803a9a9d3f04c9..857a4ef8a6686f 100644 --- a/deps/uv/src/unix/linux.c +++ b/deps/uv/src/unix/linux.c @@ -1414,12 +1414,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { w->events = w->pevents; e.events = w->pevents; - if (w == &loop->async_io_watcher) - /* Enable edge-triggered mode on async_io_watcher(eventfd), - * so that we're able to eliminate the overhead of reading - * the eventfd via system call on each event loop wakeup. - */ - e.events |= EPOLLET; e.data.fd = w->fd; fd = w->fd; diff --git a/deps/uv/src/win/error.c b/deps/uv/src/win/error.c index 58587c5fb785ea..7abf906bb5c823 100644 --- a/deps/uv/src/win/error.c +++ b/deps/uv/src/win/error.c @@ -69,7 +69,6 @@ int uv_translate_sys_error(int sys_errno) { } switch (sys_errno) { - case ERROR_NOACCESS: return UV_EACCES; case WSAEACCES: return UV_EACCES; case ERROR_ELEVATION_REQUIRED: return UV_EACCES; case ERROR_CANT_ACCESS_FILE: return UV_EACCES; @@ -96,7 +95,7 @@ int uv_translate_sys_error(int sys_errno) { case WSAECONNRESET: return UV_ECONNRESET; case ERROR_ALREADY_EXISTS: return UV_EEXIST; case ERROR_FILE_EXISTS: return UV_EEXIST; - case ERROR_BUFFER_OVERFLOW: return UV_EFAULT; + case ERROR_NOACCESS: return UV_EFAULT; case WSAEFAULT: return UV_EFAULT; case ERROR_HOST_UNREACHABLE: return UV_EHOSTUNREACH; case WSAEHOSTUNREACH: return UV_EHOSTUNREACH; @@ -127,6 +126,7 @@ int uv_translate_sys_error(int sys_errno) { case ERROR_TOO_MANY_OPEN_FILES: return UV_EMFILE; case WSAEMFILE: return UV_EMFILE; case WSAEMSGSIZE: return UV_EMSGSIZE; + case ERROR_BUFFER_OVERFLOW: return UV_ENAMETOOLONG; case ERROR_FILENAME_EXCED_RANGE: return UV_ENAMETOOLONG; case ERROR_NETWORK_UNREACHABLE: return UV_ENETUNREACH; case WSAENETUNREACH: return UV_ENETUNREACH; diff --git a/deps/uv/src/win/fs.c b/deps/uv/src/win/fs.c index 08b42eb14c972a..f2215bb3082178 100644 --- a/deps/uv/src/win/fs.c +++ b/deps/uv/src/win/fs.c @@ -2566,16 +2566,17 @@ static void fs__create_junction(uv_fs_t* req, const WCHAR* path, path_buf[path_buf_len++] = path[i]; } - path_buf[path_buf_len++] = L'\\'; + if (add_slash) + path_buf[path_buf_len++] = L'\\'; len = path_buf_len - start; + /* Insert null terminator */ + path_buf[path_buf_len++] = L'\0'; + /* Set the info about the substitute name */ buffer->MountPointReparseBuffer.SubstituteNameOffset = start * sizeof(WCHAR); buffer->MountPointReparseBuffer.SubstituteNameLength = len * sizeof(WCHAR); - /* Insert null terminator */ - path_buf[path_buf_len++] = L'\0'; - /* Copy the print name of the target path */ start = path_buf_len; add_slash = 0; @@ -2593,18 +2594,18 @@ static void fs__create_junction(uv_fs_t* req, const WCHAR* path, path_buf[path_buf_len++] = path[i]; } len = path_buf_len - start; - if (len == 2) { + if (len == 2 || add_slash) { path_buf[path_buf_len++] = L'\\'; len++; } + /* Insert another null terminator */ + path_buf[path_buf_len++] = L'\0'; + /* Set the info about the print name */ buffer->MountPointReparseBuffer.PrintNameOffset = start * sizeof(WCHAR); buffer->MountPointReparseBuffer.PrintNameLength = len * sizeof(WCHAR); - /* Insert another null terminator */ - path_buf[path_buf_len++] = L'\0'; - /* Calculate how much buffer space was actually used */ used_buf_size = FIELD_OFFSET(REPARSE_DATA_BUFFER, MountPointReparseBuffer.PathBuffer) + path_buf_len * sizeof(WCHAR); diff --git a/deps/uv/src/win/getaddrinfo.c b/deps/uv/src/win/getaddrinfo.c index f20e10d49d974a..4b8ee75a0622f6 100644 --- a/deps/uv/src/win/getaddrinfo.c +++ b/deps/uv/src/win/getaddrinfo.c @@ -191,8 +191,9 @@ static void uv__getaddrinfo_done(struct uv__work* w, int status) { if (addrinfow_ptr == NULL) break; cur_off = align_offset(cur_off, sizeof(void *)); - addrinfo_ptr = (struct addrinfo *)(alloc_ptr + cur_off); - addrinfo_ptr->ai_next = addrinfo_ptr; + struct addrinfo *next_addrinfo_ptr = (struct addrinfo *)(alloc_ptr + cur_off); + addrinfo_ptr->ai_next = next_addrinfo_ptr; + addrinfo_ptr = next_addrinfo_ptr; } req->addrinfo = (struct addrinfo*)alloc_ptr; } else { diff --git a/deps/uv/src/win/winapi.h b/deps/uv/src/win/winapi.h index 548081f23a9276..5800e70dfd7d11 100644 --- a/deps/uv/src/win/winapi.h +++ b/deps/uv/src/win/winapi.h @@ -4125,6 +4125,12 @@ typedef const UNICODE_STRING *PCUNICODE_STRING; # define DEVICE_TYPE DWORD #endif +#ifndef NTDDI_WIN11_ZN +# define NTDDI_WIN11_ZN 0x0A00000E +#endif + +/* API is defined in newer SDKS */ +#if (NTDDI_VERSION < NTDDI_WIN11_ZN) typedef struct _FILE_STAT_BASIC_INFORMATION { LARGE_INTEGER FileId; LARGE_INTEGER CreationTime; @@ -4142,6 +4148,7 @@ typedef struct _FILE_STAT_BASIC_INFORMATION { FILE_ID_128 FileId128; LARGE_INTEGER VolumeSerialNumber; } FILE_STAT_BASIC_INFORMATION; +#endif /* MinGW already has a definition for REPARSE_DATA_BUFFER, but mingw-w64 does * not. @@ -4783,6 +4790,8 @@ typedef struct _TCP_INITIAL_RTO_PARAMETERS { #endif /* from winnt.h */ +/* API is defined in newer SDKS */ +#if (NTDDI_VERSION < NTDDI_WIN11_ZN) typedef enum _FILE_INFO_BY_NAME_CLASS { FileStatByNameInfo, FileStatLxByNameInfo, @@ -4790,6 +4799,7 @@ typedef enum _FILE_INFO_BY_NAME_CLASS { FileStatBasicByNameInfo, MaximumFileInfoByNameClass } FILE_INFO_BY_NAME_CLASS; +#endif typedef BOOL(WINAPI* sGetFileInformationByName)( PCWSTR FileName, diff --git a/deps/uv/test/test-error.c b/deps/uv/test/test-error.c index 2c6d0ca49790e0..b6e18b0f052eae 100644 --- a/deps/uv/test/test-error.c +++ b/deps/uv/test/test-error.c @@ -64,7 +64,7 @@ TEST_IMPL(error_message) { TEST_IMPL(sys_error) { #if defined(_WIN32) - ASSERT_EQ(uv_translate_sys_error(ERROR_NOACCESS), UV_EACCES); + ASSERT_EQ(uv_translate_sys_error(ERROR_NOACCESS), UV_EFAULT); ASSERT_EQ(uv_translate_sys_error(ERROR_ELEVATION_REQUIRED), UV_EACCES); ASSERT_EQ(uv_translate_sys_error(WSAEADDRINUSE), UV_EADDRINUSE); ASSERT_EQ(uv_translate_sys_error(ERROR_BAD_PIPE), UV_EPIPE); diff --git a/deps/uv/test/test-fs.c b/deps/uv/test/test-fs.c index ff0f9fc89a2d1b..33cbd428707c36 100644 --- a/deps/uv/test/test-fs.c +++ b/deps/uv/test/test-fs.c @@ -2379,8 +2379,8 @@ int test_symlink_dir_impl(int type) { strcpy(test_dir_abs_buf, "\\\\?\\"); uv_cwd(test_dir_abs_buf + 4, &test_dir_abs_size); test_dir_abs_size += 4; - strcat(test_dir_abs_buf, "\\test_dir\\"); - test_dir_abs_size += strlen("\\test_dir\\"); + strcat(test_dir_abs_buf, "\\test_dir"); + test_dir_abs_size += strlen("\\test_dir"); test_dir = test_dir_abs_buf; #else uv_cwd(test_dir_abs_buf, &test_dir_abs_size); @@ -2435,8 +2435,8 @@ int test_symlink_dir_impl(int type) { r = uv_fs_realpath(NULL, &req, "test_dir_symlink", NULL); ASSERT_OK(r); #ifdef _WIN32 - ASSERT_EQ(strlen(req.ptr), test_dir_abs_size - 5); - ASSERT_OK(_strnicmp(req.ptr, test_dir + 4, test_dir_abs_size - 5)); + ASSERT_EQ(strlen(req.ptr), test_dir_abs_size - 4); + ASSERT_OK(_strnicmp(req.ptr, test_dir + 4, test_dir_abs_size - 4)); #else ASSERT_OK(strcmp(req.ptr, test_dir_abs_buf)); #endif diff --git a/deps/v8/include/v8config.h b/deps/v8/include/v8config.h index e649d8cce7d03d..31d9421ed79876 100644 --- a/deps/v8/include/v8config.h +++ b/deps/v8/include/v8config.h @@ -549,11 +549,15 @@ path. Add it with -I to the command line // functions. // Use like: // V8_NOINLINE V8_PRESERVE_MOST void UnlikelyMethod(); +#if V8_OS_WIN +# define V8_PRESERVE_MOST +#else #if V8_HAS_ATTRIBUTE_PRESERVE_MOST # define V8_PRESERVE_MOST __attribute__((preserve_most)) #else # define V8_PRESERVE_MOST /* NOT SUPPORTED */ #endif +#endif // A macro (V8_DEPRECATED) to mark classes or functions as deprecated. diff --git a/deps/zlib/README.chromium b/deps/zlib/README.chromium index 31b9d55860f9e5..92c5bfd1af200e 100644 --- a/deps/zlib/README.chromium +++ b/deps/zlib/README.chromium @@ -2,6 +2,7 @@ Name: zlib Short Name: zlib URL: http://zlib.net/ Version: 1.3.0.1 +Revision: ac8f12c97d1afd9bafa9c710f827d40a407d3266 CPEPrefix: cpe:/a:zlib:zlib:1.3.0.1 Security Critical: yes Shipped: yes diff --git a/deps/zlib/google/zip_internal.cc b/deps/zlib/google/zip_internal.cc index aa49f4546caa0e..e6b5a4fc3bcb00 100644 --- a/deps/zlib/google/zip_internal.cc +++ b/deps/zlib/google/zip_internal.cc @@ -165,8 +165,7 @@ struct ZipBuffer { // writing compressed data and it returns NULL for this case.) void* OpenZipBuffer(void* opaque, const void* /*filename*/, int mode) { if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER) != ZLIB_FILEFUNC_MODE_READ) { - NOTREACHED_IN_MIGRATION(); - return NULL; + NOTREACHED(); } ZipBuffer* buffer = static_cast(opaque); if (!buffer || !buffer->data || !buffer->length) @@ -196,8 +195,7 @@ uLong WriteZipBuffer(void* /*opaque*/, void* /*stream*/, const void* /*buf*/, uLong /*size*/) { - NOTREACHED_IN_MIGRATION(); - return 0; + NOTREACHED(); } // Returns the offset from the beginning of the data. @@ -228,8 +226,7 @@ long SeekZipBuffer(void* opaque, buffer->offset = std::min(buffer->length, offset); return 0; } - NOTREACHED_IN_MIGRATION(); - return -1; + NOTREACHED(); } // Closes the input offset and deletes all resources used for compressing or diff --git a/deps/zlib/google/zip_reader_unittest.cc b/deps/zlib/google/zip_reader_unittest.cc index 9d1406feff9887..46c0beb1453237 100644 --- a/deps/zlib/google/zip_reader_unittest.cc +++ b/deps/zlib/google/zip_reader_unittest.cc @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -555,10 +556,10 @@ TEST_F(ZipReaderTest, ExtractToFileAsync_RegularFile) { const std::string md5 = base::MD5String(output); EXPECT_EQ(kQuuxExpectedMD5, md5); - int64_t file_size = 0; - ASSERT_TRUE(base::GetFileSize(target_file, &file_size)); + std::optional file_size = base::GetFileSize(target_file); + ASSERT_TRUE(file_size.has_value()); - EXPECT_EQ(file_size, listener.current_progress()); + EXPECT_EQ(file_size.value(), listener.current_progress()); } TEST_F(ZipReaderTest, ExtractToFileAsync_Encrypted_NoPassword) { diff --git a/deps/zlib/google/zip_unittest.cc b/deps/zlib/google/zip_unittest.cc index 58bafb809d6bf9..2bcfa309281fce 100644 --- a/deps/zlib/google/zip_unittest.cc +++ b/deps/zlib/google/zip_unittest.cc @@ -63,8 +63,9 @@ bool CreateFile(const std::string& content, if (!base::CreateTemporaryFile(file_path)) return false; - if (base::WriteFile(*file_path, content.data(), content.size()) == -1) + if (!base::WriteFile(*file_path, content)) { return false; + } *file = base::File( *file_path, base::File::Flags::FLAG_OPEN | base::File::Flags::FLAG_READ); @@ -350,7 +351,7 @@ class ZipTest : public PlatformTest { base::Time now_time; EXPECT_TRUE(base::Time::FromUTCExploded(now_parts, &now_time)); - EXPECT_EQ(1, base::WriteFile(src_file, "1", 1)); + EXPECT_TRUE(base::WriteFile(src_file, "1")); EXPECT_TRUE(base::TouchFile(src_file, base::Time::Now(), test_mtime)); EXPECT_TRUE(zip::Zip(src_dir, zip_file, true)); @@ -748,6 +749,8 @@ TEST_F(ZipTest, UnzipMixedPaths) { "Space→", // #else " ", // + "...", // Disappears on Windows + "....", // Disappears on Windows "AUX", // Disappears on Windows "COM1", // Disappears on Windows "COM2", // Disappears on Windows @@ -1113,9 +1116,9 @@ TEST_F(ZipTest, UnzipFilesWithIncorrectSize) { SCOPED_TRACE(base::StringPrintf("Processing %d.txt", i)); base::FilePath file_path = temp_dir.AppendASCII(base::StringPrintf("%d.txt", i)); - int64_t file_size = -1; - EXPECT_TRUE(base::GetFileSize(file_path, &file_size)); - EXPECT_EQ(static_cast(i), file_size); + std::optional file_size = base::GetFileSize(file_path); + EXPECT_TRUE(file_size.has_value()); + EXPECT_EQ(static_cast(i), file_size.value()); } } @@ -1306,10 +1309,10 @@ TEST_F(ZipTest, Compressed) { // Since the source files compress well, the destination ZIP file should be // smaller than the source files. - int64_t dest_file_size; - ASSERT_TRUE(base::GetFileSize(dest_file, &dest_file_size)); - EXPECT_GT(dest_file_size, 300); - EXPECT_LT(dest_file_size, 1000); + std::optional dest_file_size = base::GetFileSize(dest_file); + ASSERT_TRUE(dest_file_size.has_value()); + EXPECT_GT(dest_file_size.value(), 300); + EXPECT_LT(dest_file_size.value(), 1000); } // Tests that a ZIP put inside a ZIP is simply stored instead of being @@ -1338,10 +1341,10 @@ TEST_F(ZipTest, NestedZip) { // Since the dummy source (inner) ZIP file should simply be stored in the // destination (outer) ZIP file, the destination file should be bigger than // the source file, but not much bigger. - int64_t dest_file_size; - ASSERT_TRUE(base::GetFileSize(dest_file, &dest_file_size)); - EXPECT_GT(dest_file_size, src_size + 100); - EXPECT_LT(dest_file_size, src_size + 300); + std::optional dest_file_size = base::GetFileSize(dest_file); + ASSERT_TRUE(dest_file_size.has_value()); + EXPECT_GT(dest_file_size.value(), src_size + 100); + EXPECT_LT(dest_file_size.value(), src_size + 300); } // Tests that there is no 2GB or 4GB limits. Tests that big files can be zipped @@ -1402,10 +1405,10 @@ TEST_F(ZipTest, BigFile) { // Since the dummy source (inner) ZIP file should simply be stored in the // destination (outer) ZIP file, the destination file should be bigger than // the source file, but not much bigger. - int64_t dest_file_size; - ASSERT_TRUE(base::GetFileSize(dest_file, &dest_file_size)); - EXPECT_GT(dest_file_size, src_size + 100); - EXPECT_LT(dest_file_size, src_size + 300); + std::optional dest_file_size = base::GetFileSize(dest_file); + ASSERT_TRUE(dest_file_size.has_value()); + EXPECT_GT(dest_file_size.value(), src_size + 100); + EXPECT_LT(dest_file_size.value(), src_size + 300); LOG(INFO) << "Reading big ZIP " << dest_file; zip::ZipReader reader; diff --git a/deps/zlib/google/zip_writer.cc b/deps/zlib/google/zip_writer.cc index 31161ae86c3b7a..34ab0ad9ef2887 100644 --- a/deps/zlib/google/zip_writer.cc +++ b/deps/zlib/google/zip_writer.cc @@ -5,6 +5,7 @@ #include "third_party/zlib/google/zip_writer.h" #include +#include #include "base/files/file.h" #include "base/logging.h" @@ -193,8 +194,8 @@ bool ZipWriter::AddMixedEntries(Paths paths) { while (!paths.empty()) { // Work with chunks of 50 paths at most. const size_t n = std::min(paths.size(), 50); - const Paths relative_paths = paths.subspan(0, n); - paths = paths.subspan(n, paths.size() - n); + Paths relative_paths; + std::tie(relative_paths, paths) = paths.split_at(n); files.clear(); if (!file_accessor_->Open(relative_paths, &files) || files.size() != n) @@ -233,8 +234,8 @@ bool ZipWriter::AddFileEntries(Paths paths) { while (!paths.empty()) { // Work with chunks of 50 paths at most. const size_t n = std::min(paths.size(), 50); - const Paths relative_paths = paths.subspan(0, n); - paths = paths.subspan(n, paths.size() - n); + Paths relative_paths; + std::tie(relative_paths, paths) = paths.split_at(n); DCHECK_EQ(relative_paths.size(), n); diff --git a/doc/api/addons.md b/doc/api/addons.md index e0e00dca0b9e8b..8e2864952e0841 100644 --- a/doc/api/addons.md +++ b/doc/api/addons.md @@ -72,6 +72,7 @@ namespace demo { using v8::FunctionCallbackInfo; using v8::Isolate; using v8::Local; +using v8::NewStringType; using v8::Object; using v8::String; using v8::Value; @@ -79,7 +80,7 @@ using v8::Value; void Method(const FunctionCallbackInfo& args) { Isolate* isolate = args.GetIsolate(); args.GetReturnValue().Set(String::NewFromUtf8( - isolate, "world").ToLocalChecked()); + isolate, "world", NewStringType::kNormal).ToLocalChecked()); } void Initialize(Local exports) { diff --git a/doc/api/assert.md b/doc/api/assert.md index b087b21b019735..336fcd8cc5ffb9 100644 --- a/doc/api/assert.md +++ b/doc/api/assert.md @@ -2543,6 +2543,96 @@ assert.throws(throwingFirst, /Second$/); Due to the confusing error-prone notation, avoid a string as the second argument. +## `assert.partialDeepStrictEqual(actual, expected[, message])` + + + +> Stability: 1.0 - Early development + +* `actual` {any} +* `expected` {any} +* `message` {string|Error} + +[`assert.partialDeepStrictEqual()`][] Asserts the equivalence between the `actual` and `expected` parameters through a +deep comparison, ensuring that all properties in the `expected` parameter are +present in the `actual` parameter with equivalent values, not allowing type coercion. +The main difference with [`assert.deepStrictEqual()`][] is that [`assert.partialDeepStrictEqual()`][] does not require +all properties in the `actual` parameter to be present in the `expected` parameter. +This method should always pass the same test cases as [`assert.deepStrictEqual()`][], behaving as a super set of it. + +```mjs +import assert from 'node:assert'; + +assert.partialDeepStrictEqual({ a: 1, b: 2 }, { a: 1, b: 2 }); +// OK + +assert.partialDeepStrictEqual({ a: { b: { c: 1 } } }, { a: { b: { c: 1 } } }); +// OK + +assert.partialDeepStrictEqual({ a: 1, b: 2, c: 3 }, { a: 1, b: 2 }); +// OK + +assert.partialDeepStrictEqual(new Set(['value1', 'value2']), new Set(['value1', 'value2'])); +// OK + +assert.partialDeepStrictEqual(new Map([['key1', 'value1']]), new Map([['key1', 'value1']])); +// OK + +assert.partialDeepStrictEqual(new Uint8Array([1, 2, 3]), new Uint8Array([1, 2, 3])); +// OK + +assert.partialDeepStrictEqual(/abc/, /abc/); +// OK + +assert.partialDeepStrictEqual([{ a: 5 }, { b: 5 }], [{ a: 5 }]); +// OK + +assert.partialDeepStrictEqual(new Set([{ a: 1 }, { b: 1 }]), new Set([{ a: 1 }])); +// OK + +assert.partialDeepStrictEqual(new Date(0), new Date(0)); +// OK + +assert.partialDeepStrictEqual({ a: 1 }, { a: 1, b: 2 }); +// AssertionError + +assert.partialDeepStrictEqual({ a: 1, b: '2' }, { a: 1, b: 2 }); +// AssertionError + +assert.partialDeepStrictEqual({ a: { b: 2 } }, { a: { b: '2' } }); +// AssertionError +``` + +```cjs +const assert = require('node:assert'); + +assert.partialDeepStrictEqual({ a: 1, b: 2 }, { a: 1, b: 2 }); +// OK + +assert.partialDeepStrictEqual({ a: { b: { c: 1 } } }, { a: { b: { c: 1 } } }); +// OK + +assert.partialDeepStrictEqual({ a: 1, b: 2, c: 3 }, { a: 1, b: 2 }); +// OK + +assert.partialDeepStrictEqual([{ a: 5 }, { b: 5 }], [{ a: 5 }]); +// OK + +assert.partialDeepStrictEqual(new Set([{ a: 1 }, { b: 1 }]), new Set([{ a: 1 }])); +// OK + +assert.partialDeepStrictEqual({ a: 1 }, { a: 1, b: 2 }); +// AssertionError + +assert.partialDeepStrictEqual({ a: 1, b: '2' }, { a: 1, b: 2 }); +// AssertionError + +assert.partialDeepStrictEqual({ a: { b: 2 } }, { a: { b: '2' } }); +// AssertionError +``` + [Object wrappers]: https://developer.mozilla.org/en-US/docs/Glossary/Primitive#Primitive_wrapper_objects_in_JavaScript [Object.prototype.toString()]: https://tc39.github.io/ecma262/#sec-object.prototype.tostring [`!=` operator]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Inequality @@ -2571,6 +2661,7 @@ argument. [`assert.notEqual()`]: #assertnotequalactual-expected-message [`assert.notStrictEqual()`]: #assertnotstrictequalactual-expected-message [`assert.ok()`]: #assertokvalue-message +[`assert.partialDeepStrictEqual()`]: #assertpartialdeepstrictequalactual-expected-message [`assert.strictEqual()`]: #assertstrictequalactual-expected-message [`assert.throws()`]: #assertthrowsfn-error-message [`getColorDepth()`]: tty.md#writestreamgetcolordepthenv diff --git a/doc/api/async_context.md b/doc/api/async_context.md index d1be2fb3807e17..6b018bc9d4e292 100644 --- a/doc/api/async_context.md +++ b/doc/api/async_context.md @@ -626,7 +626,6 @@ a Worker pool around it could use the following structure: ```mjs import { AsyncResource } from 'node:async_hooks'; import { EventEmitter } from 'node:events'; -import path from 'node:path'; import { Worker } from 'node:worker_threads'; const kTaskInfo = Symbol('kTaskInfo'); diff --git a/doc/api/buffer.md b/doc/api/buffer.md index aac57fc86666e6..6a0faa481dd07b 100644 --- a/doc/api/buffer.md +++ b/doc/api/buffer.md @@ -1041,7 +1041,8 @@ in `list` by adding their lengths. If `totalLength` is provided, it is coerced to an unsigned integer. If the combined length of the `Buffer`s in `list` exceeds `totalLength`, the result is -truncated to `totalLength`. +truncated to `totalLength`. If the combined length of the `Buffer`s in `list` is +less than `totalLength`, the remaining space is filled with zeros. ```mjs import { Buffer } from 'node:buffer'; diff --git a/doc/api/child_process.md b/doc/api/child_process.md index 0c0c80b35dc40c..7a9345416e0545 100644 --- a/doc/api/child_process.md +++ b/doc/api/child_process.md @@ -1700,8 +1700,8 @@ may not actually terminate the process. See kill(2) for reference. On Windows, where POSIX signals do not exist, the `signal` argument will be -ignored, and the process will be killed forcefully and abruptly (similar to -`'SIGKILL'`). +ignored except for `'SIGKILL'`, `'SIGTERM'`, `'SIGINT'` and `'SIGQUIT'`, and the +process will always be killed forcefully and abruptly (similar to `'SIGKILL'`). See [Signal Events][] for more details. On Linux, child processes of child processes will not be terminated diff --git a/doc/api/cli.md b/doc/api/cli.md index 1a51aa21afb18d..d3b23639bafce0 100644 --- a/doc/api/cli.md +++ b/doc/api/cli.md @@ -126,7 +126,7 @@ require('nodejs-addon-example'); ``` ```console -$ node --experimental-permission --allow-fs-read=* index.js +$ node --permission --allow-fs-read=* index.js node:internal/modules/cjs/loader:1319 return process.dlopen(module, path.toNamespacedPath(filename)); ^ @@ -168,7 +168,7 @@ childProcess.spawn('node', ['-e', 'require("fs").writeFileSync("/new-file", "exa ``` ```console -$ node --experimental-permission --allow-fs-read=* index.js +$ node --permission --allow-fs-read=* index.js node:internal/child_process:388 const err = this._handle.spawn(options); ^ @@ -192,12 +192,15 @@ Error: Access to this API has been restricted -> Stability: 1.1 - Active development +> Stability: 2 - Stable. This flag configures file system read permissions using the [Permission Model][]. @@ -208,37 +211,25 @@ The valid arguments for the `--allow-fs-read` flag are: * Multiple paths can be allowed using multiple `--allow-fs-read` flags. Example `--allow-fs-read=/folder1/ --allow-fs-read=/folder1/` -Paths delimited by comma (`,`) are no longer allowed. -When passing a single flag with a comma a warning will be displayed. - Examples can be found in the [File System Permissions][] documentation. -Relative paths are NOT yet supported by the CLI flag. - The initializer module also needs to be allowed. Consider the following example: ```console -$ node --experimental-permission t.js -node:internal/modules/cjs/loader:162 - const result = internalModuleStat(receiver, filename); - ^ +$ node --permission index.js Error: Access to this API has been restricted - at stat (node:internal/modules/cjs/loader:162:18) - at Module._findPath (node:internal/modules/cjs/loader:640:16) - at resolveMainPath (node:internal/modules/run_main:15:25) - at Function.executeUserEntryPoint [as runMain] (node:internal/modules/run_main:53:24) at node:internal/main/run_main_module:23:47 { code: 'ERR_ACCESS_DENIED', permission: 'FileSystemRead', - resource: '/Users/rafaelgss/repos/os/node/t.js' + resource: '/Users/rafaelgss/repos/os/node/index.js' } ``` The process needs to have access to the `index.js` module: ```bash -node --experimental-permission --allow-fs-read=/path/to/index.js index.js +node --permission --allow-fs-read=/path/to/index.js index.js ``` ### `--allow-fs-write` @@ -246,12 +237,15 @@ node --experimental-permission --allow-fs-read=/path/to/index.js index.js -> Stability: 1.1 - Active development +> Stability: 2 - Stable. This flag configures file system write permissions using the [Permission Model][]. @@ -267,8 +261,6 @@ When passing a single flag with a comma a warning will be displayed. Examples can be found in the [File System Permissions][] documentation. -Relative paths are NOT supported through the CLI flag. - ### `--allow-wasi` + +Disable the `Object.prototype.__proto__` property. If `mode` is `delete`, the +property is removed entirely. If `mode` is `throw`, accesses to the +property throw an exception with the code `ERR_PROTO_ACCESS`. + ### `--disable-warning=code-or-type` > Stability: 1.1 - Active development @@ -675,18 +659,6 @@ users can at least run WebAssembly (with less optimal performance) when the virtual memory address space available to their Node.js process is lower than what the V8 WebAssembly memory cage needs. -### `--disable-proto=mode` - - - -Disable the `Object.prototype.__proto__` property. If `mode` is `delete`, the -property is removed entirely. If `mode` is `throw`, accesses to the -property throw an exception with the code `ERR_PROTO_ACCESS`. - ### `--disallow-code-generation-from-strings` - -> Stability: 1 - Experimental. This flag is inherited from V8 and is subject to -> change upstream. - -This flag will expose the gc extension from V8. - -```js -if (globalThis.gc) { - globalThis.gc(); -} -``` - ### `--dns-result-order=order` + +Behavior is the same as [`--env-file`][], but an error is not thrown if the file +does not exist. + ### `--env-file=config` > Stability: 1.1 - Active development @@ -884,15 +848,6 @@ export USERNAME="nodejs" # will result in `nodejs` as the value. If you want to load environment variables from a file that may not exist, you can use the [`--env-file-if-exists`][] flag instead. -### `--env-file-if-exists=config` - - - -Behavior is the same as [`--env-file`][], but an error is not thrown if the file -does not exist. - ### `-e`, `--eval "script"` - -> Stability: 1.0 - Early development - -Enables the transformation of TypeScript-only syntax into JavaScript code. -Implies `--experimental-strip-types` and `--enable-source-maps`. - ### `--experimental-eventsource` -> Stability: 1.1 - Active development - -Enable the Permission Model for current process. When enabled, the -following permissions are restricted: - -* File System - manageable through - [`--allow-fs-read`][], [`--allow-fs-write`][] flags -* Child Process - manageable through [`--allow-child-process`][] flag -* Worker Threads - manageable through [`--allow-worker`][] flag -* WASI - manageable through [`--allow-wasi`][] flag -* Addons - manageable through [`--allow-addons`][] flag +If the ES module being `require()`'d contains top-level `await`, this flag +allows Node.js to evaluate the module, try to locate the +top-level awaits, and print their location to help users find them. ### `--experimental-require-module` @@ -1088,21 +1024,13 @@ added: Use this flag to enable [ShadowRealm][] support. -### `--experimental-sqlite` - - - -Enable the experimental [`node:sqlite`][] module. - ### `--experimental-strip-types` -> Stability: 1.0 - Early development +> Stability: 1.1 - Active development Enable experimental type-stripping for TypeScript files. For more information, see the [TypeScript type-stripping][] documentation. @@ -1150,15 +1078,16 @@ added: v22.3.0 Enable module mocking in the test runner. -### `--experimental-test-snapshots` +### `--experimental-transform-types` -> Stability: 1.0 - Early development +> Stability: 1.1 - Active development -Enable [snapshot testing][] in the test runner. +Enables the transformation of TypeScript-only syntax into JavaScript code. +Implies `--experimental-strip-types` and `--enable-source-maps`. ### `--experimental-vm-modules` @@ -1205,6 +1134,23 @@ added: v22.4.0 Enable experimental [`Web Storage`][] support. +### `--expose-gc` + + + +> Stability: 1 - Experimental. This flag is inherited from V8 and is subject to +> change upstream. + +This flag will expose the gc extension from V8. + +```js +if (globalThis.gc) { + globalThis.gc(); +} +``` + ### `--force-context-aware` - -Activate inspector on `host:port`. Default is `127.0.0.1:9229`. If port `0` is -specified, a random available port will be used. - -V8 inspector integration allows tools such as Chrome DevTools and IDEs to debug -and profile Node.js instances. The tools attach to Node.js instances via a -tcp port and communicate using the [Chrome DevTools Protocol][]. -See [V8 Inspector integration for Node.js][] for further explanation on Node.js debugger. - - - - - -#### Warning: binding inspector to a public IP:port combination is insecure - -Binding the inspector to a public IP (including `0.0.0.0`) with an open port is -insecure, as it allows external hosts to connect to the inspector and perform -a [remote code execution][] attack. - -If specifying a host, make sure that either: - -* The host is not accessible from public networks. -* A firewall disallows unwanted connections on the port. - -**More specifically, `--inspect=0.0.0.0` is insecure if the port (`9229` by -default) is not firewall-protected.** - -See the [debugging security implications][] section for more information. - ### `--inspect-brk[=[host:]port]` + +Activate inspector on `host:port`. Default is `127.0.0.1:9229`. If port `0` is +specified, a random available port will be used. + +V8 inspector integration allows tools such as Chrome DevTools and IDEs to debug +and profile Node.js instances. The tools attach to Node.js instances via a +tcp port and communicate using the [Chrome DevTools Protocol][]. +See [V8 Inspector integration for Node.js][] for further explanation on Node.js debugger. + + + + + +#### Warning: binding inspector to a public IP:port combination is insecure + +Binding the inspector to a public IP (including `0.0.0.0`) with an open port is +insecure, as it allows external hosts to connect to the inspector and perform +a [remote code execution][] attack. + +If specifying a host, make sure that either: + +* The host is not accessible from public networks. +* A firewall disallows unwanted connections on the port. + +**More specifically, `--inspect=0.0.0.0` is insecure if the port (`9229` by +default) is not firewall-protected.** + +See the [debugging security implications][] section for more information. + ### `-i`, `--interactive` - -Disable exposition of [Fetch API][] on the global scope. - ### `--no-experimental-detect-module` + +Disable exposition of [Fetch API][] on the global scope. + ### `--no-experimental-global-customevent` + +Disable the experimental [`node:sqlite`][] module. + ### `--no-experimental-websocket` + +> Stability: 2 - Stable. + +Enable the Permission Model for current process. When enabled, the +following permissions are restricted: + +* File System - manageable through + [`--allow-fs-read`][], [`--allow-fs-write`][] flags +* Child Process - manageable through [`--allow-child-process`][] flag +* Worker Threads - manageable through [`--allow-worker`][] flag +* WASI - manageable through [`--allow-wasi`][] flag +* Addons - manageable through [`--allow-addons`][] flag + ### `--preserve-symlinks` - -If the ES module being `require()`'d contains top-level `await`, this flag -allows Node.js to evaluate the module, try to locate the -top-level awaits, and print their location to help users find them. - ### `--prof` + +When `--report-exclude-env` is passed the diagnostic report generated will not +contain the `environmentVariables` data. + +### `--report-exclude-network` + + + +Exclude `header.networkInterfaces` from the diagnostic report. By default +this is not set and the network interfaces are included. + ### `--report-filename=filename` - -Exclude `header.networkInterfaces` from the diagnostic report. By default -this is not set and the network interfaces are included. - ### `-r`, `--require module` + +When using `--secure-heap`, the `--secure-heap-min` flag specifies the +minimum allocation from the secure heap. The minimum value is `2`. +The maximum value is the lesser of `--secure-heap` or `2147483647`. +The value given must be a power of two. + ### `--secure-heap=n` - -When using `--secure-heap`, the `--secure-heap-min` flag specifies the -minimum allocation from the secure heap. The minimum value is `2`. -The maximum value is the lesser of `--secure-heap` or `2147483647`. -The value given must be a power of two. - ### `--snapshot-blob=path` -> Stability: 1.0 - Early development - Regenerates the snapshot files used by the test runner for [snapshot testing][]. -Node.js must be started with the `--experimental-test-snapshots` flag in order -to use this functionality. ### `--throw-deprecation` @@ -2624,6 +2603,45 @@ added: v0.8.0 Print stack traces for deprecations. +### `--trace-env` + + + +Print information about any access to environment variables done in the current Node.js +instance to stderr, including: + +* The environment variable reads that Node.js does internally. +* Writes in the form of `process.env.KEY = "SOME VALUE"`. +* Reads in the form of `process.env.KEY`. +* Definitions in the form of `Object.defineProperty(process.env, 'KEY', {...})`. +* Queries in the form of `Object.hasOwn(process.env, 'KEY')`, + `process.env.hasOwnProperty('KEY')` or `'KEY' in process.env`. +* Deletions in the form of `delete process.env.KEY`. +* Enumerations inf the form of `...process.env` or `Object.keys(process.env)`. + +Only the names of the environment variables being accessed are printed. The values are not printed. + +To print the stack trace of the access, use `--trace-env-js-stack` and/or +`--trace-env-native-stack`. + +### `--trace-env-js-stack` + + + +In addition to what `--trace-env` does, this prints the JavaScript stack trace of the access. + +### `--trace-env-native-stack` + + + +In addition to what `--trace-env` does, this prints the native stack trace of the access. + ### `--trace-event-categories` + +Prints information about usage of [Loading ECMAScript modules using `require()`][]. + +When `mode` is `all`, all usage is printed. When `mode` is `no-node-modules`, usage +from the `node_modules` folder is excluded. + ### `--trace-sigint` @@ -3583,6 +3609,16 @@ for MiB in 16 32 64 128; do done ``` +### `--perf-basic-prof` + +### `--perf-basic-prof-only-functions` + +### `--perf-prof` + +### `--perf-prof-unwinding-info` + +### `--prof` + ### `--security-revert` ### `--stack-trace-limit=limit` diff --git a/doc/api/console.md b/doc/api/console.md index 147a45eadb1203..1c966d094d1472 100644 --- a/doc/api/console.md +++ b/doc/api/console.md @@ -19,7 +19,11 @@ The module exports two specific components: _**Warning**_: The global console object's methods are neither consistently synchronous like the browser APIs they resemble, nor are they consistently -asynchronous like all other Node.js streams. See the [note on process I/O][] for +asynchronous like all other Node.js streams. Programs that desire to depend +on the synchronous / asynchronous behavior of the console functions should +first figure out the nature of console's backing stream. This is because the +stream is dependent on the underlying platform and standard stream +configuration of the current process. See the [note on process I/O][] for more information. Example using the global `console`: diff --git a/doc/api/crypto.md b/doc/api/crypto.md index 03c4f3dc346b91..886427d5880305 100644 --- a/doc/api/crypto.md +++ b/doc/api/crypto.md @@ -2057,6 +2057,22 @@ types are: This property is `undefined` for unrecognized `KeyObject` types and symmetric keys. +### `keyObject.equals(otherKeyObject)` + + + +* `otherKeyObject`: {KeyObject} A `KeyObject` with which to + compare `keyObject`. +* Returns: {boolean} + +Returns `true` or `false` depending on whether the keys have exactly the same +type, value, and parameters. This method is not +[constant time](https://en.wikipedia.org/wiki/Timing_attack). + ### `keyObject.export([options])` - -* `otherKeyObject`: {KeyObject} A `KeyObject` with which to - compare `keyObject`. -* Returns: {boolean} - -Returns `true` or `false` depending on whether the keys have exactly the same -type, value, and parameters. This method is not -[constant time](https://en.wikipedia.org/wiki/Timing_attack). - ### `keyObject.symmetricKeySize` + +* Type: {string\[]} + +An array detailing the key extended usages for this certificate. + ### `x509.fingerprint` - -* Type: {string\[]} - -An array detailing the key extended usages for this certificate. - ### `x509.publicKey` - -* {Object} - -An object containing commonly used constants for crypto and security related -operations. The specific constants currently defined are described in -[Crypto constants][]. - -### `crypto.fips` - - - -> Stability: 0 - Deprecated - -Property for checking and controlling whether a FIPS compliant crypto provider -is currently in use. Setting to true requires a FIPS build of Node.js. - -This property is deprecated. Please use `crypto.setFips()` and -`crypto.getFips()` instead. - ### `crypto.checkPrime(candidate[, options], callback)` + +* {Object} + +An object containing commonly used constants for crypto and security related +operations. The specific constants currently defined are described in +[Crypto constants][]. + ### `crypto.createCipheriv(algorithm, key, iv[, options])` -> Stability: 1.2 - Release candidate - -* `algorithm` {string|undefined} -* `data` {string|Buffer|TypedArray|DataView} When `data` is a - string, it will be encoded as UTF-8 before being hashed. If a different - input encoding is desired for a string input, user could encode the string - into a `TypedArray` using either `TextEncoder` or `Buffer.from()` and passing - the encoded `TypedArray` into this API instead. -* `outputEncoding` {string|undefined} [Encoding][encoding] used to encode the - returned digest. **Default:** `'hex'`. -* Returns: {string|Buffer} - -A utility for creating one-shot hash digests of data. It can be faster than -the object-based `crypto.createHash()` when hashing a smaller amount of data -(<= 5MB) that's readily available. If the data can be big or if it is streamed, -it's still recommended to use `crypto.createHash()` instead. - -The `algorithm` is dependent on the available algorithms supported by the -version of OpenSSL on the platform. Examples are `'sha256'`, `'sha512'`, etc. -On recent releases of OpenSSL, `openssl list -digest-algorithms` will -display the available digest algorithms. - -Example: - -```cjs -const crypto = require('node:crypto'); -const { Buffer } = require('node:buffer'); - -// Hashing a string and return the result as a hex-encoded string. -const string = 'Node.js'; -// 10b3493287f831e81a438811a1ffba01f8cec4b7 -console.log(crypto.hash('sha1', string)); - -// Encode a base64-encoded string into a Buffer, hash it and return -// the result as a buffer. -const base64 = 'Tm9kZS5qcw=='; -// -console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer')); -``` - -```mjs -import crypto from 'node:crypto'; -import { Buffer } from 'node:buffer'; +> Stability: 0 - Deprecated -// Hashing a string and return the result as a hex-encoded string. -const string = 'Node.js'; -// 10b3493287f831e81a438811a1ffba01f8cec4b7 -console.log(crypto.hash('sha1', string)); +Property for checking and controlling whether a FIPS compliant crypto provider +is currently in use. Setting to true requires a FIPS build of Node.js. -// Encode a base64-encoded string into a Buffer, hash it and return -// the result as a buffer. -const base64 = 'Tm9kZS5qcw=='; -// -console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer')); -``` +This property is deprecated. Please use `crypto.setFips()` and +`crypto.getFips()` instead. ### `crypto.generateKey(type, options, callback)` @@ -4221,6 +4157,70 @@ A convenient alias for [`crypto.webcrypto.getRandomValues()`][]. This implementation is not compliant with the Web Crypto spec, to write web-compatible code use [`crypto.webcrypto.getRandomValues()`][] instead. +### `crypto.hash(algorithm, data[, outputEncoding])` + + + +> Stability: 1.2 - Release candidate + +* `algorithm` {string|undefined} +* `data` {string|Buffer|TypedArray|DataView} When `data` is a + string, it will be encoded as UTF-8 before being hashed. If a different + input encoding is desired for a string input, user could encode the string + into a `TypedArray` using either `TextEncoder` or `Buffer.from()` and passing + the encoded `TypedArray` into this API instead. +* `outputEncoding` {string|undefined} [Encoding][encoding] used to encode the + returned digest. **Default:** `'hex'`. +* Returns: {string|Buffer} + +A utility for creating one-shot hash digests of data. It can be faster than +the object-based `crypto.createHash()` when hashing a smaller amount of data +(<= 5MB) that's readily available. If the data can be big or if it is streamed, +it's still recommended to use `crypto.createHash()` instead. + +The `algorithm` is dependent on the available algorithms supported by the +version of OpenSSL on the platform. Examples are `'sha256'`, `'sha512'`, etc. +On recent releases of OpenSSL, `openssl list -digest-algorithms` will +display the available digest algorithms. + +Example: + +```cjs +const crypto = require('node:crypto'); +const { Buffer } = require('node:buffer'); + +// Hashing a string and return the result as a hex-encoded string. +const string = 'Node.js'; +// 10b3493287f831e81a438811a1ffba01f8cec4b7 +console.log(crypto.hash('sha1', string)); + +// Encode a base64-encoded string into a Buffer, hash it and return +// the result as a buffer. +const base64 = 'Tm9kZS5qcw=='; +// +console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer')); +``` + +```mjs +import crypto from 'node:crypto'; +import { Buffer } from 'node:buffer'; + +// Hashing a string and return the result as a hex-encoded string. +const string = 'Node.js'; +// 10b3493287f831e81a438811a1ffba01f8cec4b7 +console.log(crypto.hash('sha1', string)); + +// Encode a base64-encoded string into a Buffer, hash it and return +// the result as a buffer. +const base64 = 'Tm9kZS5qcw=='; +// +console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer')); +``` + ### `crypto.hkdf(digest, ikm, salt, info, keylen, callback)` - -* `buffer` {ArrayBuffer|Buffer|TypedArray|DataView} Must be supplied. The - size of the provided `buffer` must not be larger than `2**31 - 1`. -* `offset` {number} **Default:** `0` -* `size` {number} **Default:** `buffer.length - offset`. The `size` must - not be larger than `2**31 - 1`. -* Returns: {ArrayBuffer|Buffer|TypedArray|DataView} The object passed as - `buffer` argument. - -Synchronous version of [`crypto.randomFill()`][]. - -```mjs -import { Buffer } from 'node:buffer'; -const { randomFillSync } = await import('node:crypto'); - -const buf = Buffer.alloc(10); -console.log(randomFillSync(buf).toString('hex')); - -randomFillSync(buf, 5); -console.log(buf.toString('hex')); - -// The above is equivalent to the following: -randomFillSync(buf, 5, 5); -console.log(buf.toString('hex')); -``` - -```cjs -const { randomFillSync } = require('node:crypto'); -const { Buffer } = require('node:buffer'); - -const buf = Buffer.alloc(10); -console.log(randomFillSync(buf).toString('hex')); - -randomFillSync(buf, 5); -console.log(buf.toString('hex')); - -// The above is equivalent to the following: -randomFillSync(buf, 5, 5); -console.log(buf.toString('hex')); -``` - -Any `ArrayBuffer`, `TypedArray` or `DataView` instance may be passed as -`buffer`. - -```mjs -import { Buffer } from 'node:buffer'; -const { randomFillSync } = await import('node:crypto'); - -const a = new Uint32Array(10); -console.log(Buffer.from(randomFillSync(a).buffer, - a.byteOffset, a.byteLength).toString('hex')); - -const b = new DataView(new ArrayBuffer(10)); -console.log(Buffer.from(randomFillSync(b).buffer, - b.byteOffset, b.byteLength).toString('hex')); - -const c = new ArrayBuffer(10); -console.log(Buffer.from(randomFillSync(c)).toString('hex')); -``` - -```cjs -const { randomFillSync } = require('node:crypto'); -const { Buffer } = require('node:buffer'); - -const a = new Uint32Array(10); -console.log(Buffer.from(randomFillSync(a).buffer, - a.byteOffset, a.byteLength).toString('hex')); - -const b = new DataView(new ArrayBuffer(10)); -console.log(Buffer.from(randomFillSync(b).buffer, - b.byteOffset, b.byteLength).toString('hex')); - -const c = new ArrayBuffer(10); -console.log(Buffer.from(randomFillSync(c)).toString('hex')); -``` - ### `crypto.randomFill(buffer[, offset][, size], callback)` + +* `buffer` {ArrayBuffer|Buffer|TypedArray|DataView} Must be supplied. The + size of the provided `buffer` must not be larger than `2**31 - 1`. +* `offset` {number} **Default:** `0` +* `size` {number} **Default:** `buffer.length - offset`. The `size` must + not be larger than `2**31 - 1`. +* Returns: {ArrayBuffer|Buffer|TypedArray|DataView} The object passed as + `buffer` argument. + +Synchronous version of [`crypto.randomFill()`][]. + +```mjs +import { Buffer } from 'node:buffer'; +const { randomFillSync } = await import('node:crypto'); + +const buf = Buffer.alloc(10); +console.log(randomFillSync(buf).toString('hex')); + +randomFillSync(buf, 5); +console.log(buf.toString('hex')); + +// The above is equivalent to the following: +randomFillSync(buf, 5, 5); +console.log(buf.toString('hex')); +``` + +```cjs +const { randomFillSync } = require('node:crypto'); +const { Buffer } = require('node:buffer'); + +const buf = Buffer.alloc(10); +console.log(randomFillSync(buf).toString('hex')); + +randomFillSync(buf, 5); +console.log(buf.toString('hex')); + +// The above is equivalent to the following: +randomFillSync(buf, 5, 5); +console.log(buf.toString('hex')); +``` + +Any `ArrayBuffer`, `TypedArray` or `DataView` instance may be passed as +`buffer`. + +```mjs +import { Buffer } from 'node:buffer'; +const { randomFillSync } = await import('node:crypto'); + +const a = new Uint32Array(10); +console.log(Buffer.from(randomFillSync(a).buffer, + a.byteOffset, a.byteLength).toString('hex')); + +const b = new DataView(new ArrayBuffer(10)); +console.log(Buffer.from(randomFillSync(b).buffer, + b.byteOffset, b.byteLength).toString('hex')); + +const c = new ArrayBuffer(10); +console.log(Buffer.from(randomFillSync(c)).toString('hex')); +``` + +```cjs +const { randomFillSync } = require('node:crypto'); +const { Buffer } = require('node:buffer'); + +const a = new Uint32Array(10); +console.log(Buffer.from(randomFillSync(a).buffer, + a.byteOffset, a.byteLength).toString('hex')); + +const b = new DataView(new ArrayBuffer(10)); +console.log(Buffer.from(randomFillSync(b).buffer, + b.byteOffset, b.byteLength).toString('hex')); + +const c = new ArrayBuffer(10); +console.log(Buffer.from(randomFillSync(c)).toString('hex')); +``` + ### `crypto.randomInt([min, ]max[, callback])` +### DEP0187: Passing invalid argument types to `fs.existsSync` + + + +Type: Documentation-only + +Passing non-supported argument types is deprecated and, instead of returning `false`, +will throw an error in a future version. + +### DEP0188: `process.features.ipv6` and `process.features.uv` + + + +Type: Documentation-only + +These properties are unconditionally `true`. Any checks based on these properties are redundant. + +### DEP0189: `process.features.tls_*` + + + +Type: Documentation-only + +`process.features.tls_alpn`, `process.features.tls_ocsp`, and `process.features.tls_sni` are +deprecated, as their values are guaranteed to be identical to that of `process.features.tls`. + [NIST SP 800-38D]: https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf [RFC 6066]: https://tools.ietf.org/html/rfc6066#section-3 [RFC 8247 Section 2.4]: https://www.rfc-editor.org/rfc/rfc8247#section-2.4 diff --git a/doc/api/dgram.md b/doc/api/dgram.md index 5f2f01850ace3e..ac8625bf157381 100644 --- a/doc/api/dgram.md +++ b/doc/api/dgram.md @@ -957,6 +957,13 @@ changes: * `sendBufferSize` {number} Sets the `SO_SNDBUF` socket value. * `lookup` {Function} Custom lookup function. **Default:** [`dns.lookup()`][]. * `signal` {AbortSignal} An AbortSignal that may be used to close a socket. + * `receiveBlockList` {net.BlockList} `receiveBlockList` can be used for discarding + inbound datagram to specific IP addresses, IP ranges, or IP subnets. This does not + work if the server is behind a reverse proxy, NAT, etc. because the address + checked against the blocklist is the address of the proxy, or the one + specified by the NAT. + * `sendBlockList` {net.BlockList} `sendBlockList` can be used for disabling outbound + access to specific IP addresses, IP ranges, or IP subnets. * `callback` {Function} Attached as a listener for `'message'` events. Optional. * Returns: {dgram.Socket} diff --git a/doc/api/errors.md b/doc/api/errors.md index 5db362cf4d86f5..affb78e84ccbca 100644 --- a/doc/api/errors.md +++ b/doc/api/errors.md @@ -61,7 +61,7 @@ Errors that occur within _Asynchronous APIs_ may be reported in multiple ways: ```js - const fs = require('fs/promises'); + const fs = require('node:fs/promises'); (async () => { let data; @@ -2166,6 +2166,12 @@ An attempt was made to open an IPC communication channel with a synchronously forked Node.js process. See the documentation for the [`child_process`][] module for more information. + + +### `ERR_IP_BLOCKED` + +IP is blocked by `net.BlockList`. + ### `ERR_LOADER_CHAIN_INCOMPLETE` @@ -2179,6 +2185,16 @@ added: An ESM loader hook returned without calling `next()` and without explicitly signaling a short circuit. + + +### `ERR_LOAD_SQLITE_EXTENSION` + + + +An error occurred while loading a SQLite extension. + ### `ERR_MEMORY_ALLOCATION_FAILED` @@ -2447,6 +2463,18 @@ Accessing `Object.prototype.__proto__` has been forbidden using [`Object.setPrototypeOf`][] should be used to get and set the prototype of an object. + + +### `ERR_QUIC_APPLICATION_ERROR` + + + +> Stability: 1 - Experimental + +A QUIC application error occurred. + ### `ERR_QUIC_CONNECTION_FAILED` @@ -2483,6 +2511,30 @@ added: v22.10.0 Opening a QUIC stream failed. + + +### `ERR_QUIC_TRANSPORT_ERROR` + + + +> Stability: 1 - Experimental + +A QUIC transport error occurred. + + + +### `ERR_QUIC_VERSION_NEGOTIATION_ERROR` + + + +> Stability: 1 - Experimental + +A QUIC session failed because version negotiation is required. + ### `ERR_REQUIRE_ASYNC_MODULE` diff --git a/doc/api/esm.md b/doc/api/esm.md index bf004c062525da..bc77f3d3324c70 100644 --- a/doc/api/esm.md +++ b/doc/api/esm.md @@ -10,6 +10,9 @@ changes: - version: v22.12.0 pr-url: https://github.com/nodejs/node/pull/55333 description: Import attributes are no longer experimental. + - version: v22.0.0 + pr-url: https://github.com/nodejs/node/pull/52104 + description: Drop support for import assertions. - version: - v21.0.0 - v20.10.0 @@ -338,7 +341,7 @@ modules it can be used to load ES modules. * {Object} The `import.meta` meta property is an `Object` that contains the following -properties. +properties. It is only supported in ES modules. ### `import.meta.dirname` diff --git a/doc/api/events.md b/doc/api/events.md index fe5aa827602bd8..e4dbe5785bf5e1 100644 --- a/doc/api/events.md +++ b/doc/api/events.md @@ -1171,6 +1171,10 @@ that a "possible EventEmitter memory leak" has been detected. For any single `EventEmitter`, the `emitter.getMaxListeners()` and `emitter.setMaxListeners()` methods can be used to temporarily avoid this warning: +`defaultMaxListeners` has no effect on `AbortSignal` instances. While it is +still possible to use [`emitter.setMaxListeners(n)`][] to set a warning limit +for individual `AbortSignal` instances, per default `AbortSignal` instances will not warn. + ```mjs import { EventEmitter } from 'node:events'; const emitter = new EventEmitter(); diff --git a/doc/api/http.md b/doc/api/http.md index 9b9175a003f56c..886bbe26ecc95c 100644 --- a/doc/api/http.md +++ b/doc/api/http.md @@ -3848,8 +3848,13 @@ changes: * `port` {number} Port of remote server. **Default:** `defaultPort` if set, else `80`. * `protocol` {string} Protocol to use. **Default:** `'http:'`. + * `setDefaultHeaders` {boolean}: Specifies whether or not to automatically add + default headers such as `Connection`, `Content-Length`, `Transfer-Encoding`, + and `Host`. If set to `false` then all necessary headers must be added + manually. Defaults to `true`. * `setHost` {boolean}: Specifies whether or not to automatically add the - `Host` header. Defaults to `true`. + `Host` header. If provided, this overrides `setDefaultHeaders`. Defaults to + `true`. * `signal` {AbortSignal}: An AbortSignal that may be used to abort an ongoing request. * `socketPath` {string} Unix domain socket. Cannot be used if one of `host` diff --git a/doc/api/inspector.md b/doc/api/inspector.md index 2d8bf185de0971..4a3e61d5be6c00 100644 --- a/doc/api/inspector.md +++ b/doc/api/inspector.md @@ -505,7 +505,7 @@ inspector.Network.requestWillBeSent({ request: { url: 'https://nodejs.org/en', method: 'GET', - } + }, }); ``` diff --git a/doc/api/module.md b/doc/api/module.md index 44aeda21177609..7225a3f71b6d82 100644 --- a/doc/api/module.md +++ b/doc/api/module.md @@ -64,6 +64,325 @@ const require = createRequire(import.meta.url); const siblingModule = require('./sibling-module'); ``` +### `module.findPackageJSON(specifier[, base])` + + + +> Stability: 1.1 - Active Development + +* `specifier` {string|URL} The specifier for the module whose `package.json` to + retrieve. When passing a _bare specifier_, the `package.json` at the root of + the package is returned. When passing a _relative specifier_ or an _absolute specifier_, + the closest parent `package.json` is returned. +* `base` {string|URL} The absolute location (`file:` URL string or FS path) of the + containing module. For CJS, use `__filename` (not `__dirname`!); for ESM, use + `import.meta.url`. You do not need to pass it if `specifier` is an `absolute specifier`. +* Returns: {string|undefined} A path if the `package.json` is found. When `startLocation` + is a package, the package's root `package.json`; when a relative or unresolved, the closest + `package.json` to the `startLocation`. + +> **Caveat**: Do not use this to try to determine module format. There are many things effecting +> that determination; the `type` field of package.json is the _least_ definitive (ex file extension +> superceeds it, and a loader hook superceeds that). + +```text +/path/to/project + ├ packages/ + ├ bar/ + ├ bar.js + └ package.json // name = '@foo/bar' + └ qux/ + ├ node_modules/ + └ some-package/ + └ package.json // name = 'some-package' + ├ qux.js + └ package.json // name = '@foo/qux' + ├ main.js + └ package.json // name = '@foo' +``` + +```mjs +// /path/to/project/packages/bar/bar.js +import { findPackageJSON } from 'node:module'; + +findPackageJSON('..', import.meta.url); +// '/path/to/project/package.json' +// Same result when passing an absolute specifier instead: +findPackageJSON(new URL('../', import.meta.url)); +findPackageJSON(import.meta.resolve('../')); + +findPackageJSON('some-package', import.meta.url); +// '/path/to/project/packages/bar/node_modules/some-package/package.json' +// When passing an absolute specifier, you might get a different result if the +// resolved module is inside a subfolder that has nested `package.json`. +findPackageJSON(import.meta.resolve('some-package')); +// '/path/to/project/packages/bar/node_modules/some-package/some-subfolder/package.json' + +findPackageJSON('@foo/qux', import.meta.url); +// '/path/to/project/packages/qux/package.json' +``` + +```cjs +// /path/to/project/packages/bar/bar.js +const { findPackageJSON } = require('node:module'); +const { pathToFileURL } = require('node:url'); +const path = require('node:path'); + +findPackageJSON('..', __filename); +// '/path/to/project/package.json' +// Same result when passing an absolute specifier instead: +findPackageJSON(pathToFileURL(path.join(__dirname, '..'))); + +findPackageJSON('some-package', __filename); +// '/path/to/project/packages/bar/node_modules/some-package/package.json' +// When passing an absolute specifier, you might get a different result if the +// resolved module is inside a subfolder that has nested `package.json`. +findPackageJSON(pathToFileURL(require.resolve('some-package'))); +// '/path/to/project/packages/bar/node_modules/some-package/some-subfolder/package.json' + +findPackageJSON('@foo/qux', __filename); +// '/path/to/project/packages/qux/package.json' +``` + +### `module.isBuiltin(moduleName)` + + + +* `moduleName` {string} name of the module +* Returns: {boolean} returns true if the module is builtin else returns false + +```mjs +import { isBuiltin } from 'node:module'; +isBuiltin('node:fs'); // true +isBuiltin('fs'); // true +isBuiltin('wss'); // false +``` + +### `module.register(specifier[, parentURL][, options])` + + + +> Stability: 1.2 - Release candidate + +* `specifier` {string|URL} Customization hooks to be registered; this should be + the same string that would be passed to `import()`, except that if it is + relative, it is resolved relative to `parentURL`. +* `parentURL` {string|URL} If you want to resolve `specifier` relative to a base + URL, such as `import.meta.url`, you can pass that URL here. **Default:** + `'data:'` +* `options` {Object} + * `parentURL` {string|URL} If you want to resolve `specifier` relative to a + base URL, such as `import.meta.url`, you can pass that URL here. This + property is ignored if the `parentURL` is supplied as the second argument. + **Default:** `'data:'` + * `data` {any} Any arbitrary, cloneable JavaScript value to pass into the + [`initialize`][] hook. + * `transferList` {Object\[]} [transferrable objects][] to be passed into the + `initialize` hook. + +Register a module that exports [hooks][] that customize Node.js module +resolution and loading behavior. See [Customization hooks][]. + +### `module.stripTypeScriptTypes(code[, options])` + + + +> Stability: 1.1 - Active development + +* `code` {string} The code to strip type annotations from. +* `options` {Object} + * `mode` {string} **Default:** `'strip'`. Possible values are: + * `'strip'` Only strip type annotations without performing the transformation of TypeScript features. + * `'transform'` Strip type annotations and transform TypeScript features to JavaScript. + * `sourceMap` {boolean} **Default:** `false`. Only when `mode` is `'transform'`, if `true`, a source map + will be generated for the transformed code. + * `sourceUrl` {string} Specifies the source url used in the source map. +* Returns: {string} The code with type annotations stripped. + `module.stripTypeScriptTypes()` removes type annotations from TypeScript code. It + can be used to strip type annotations from TypeScript code before running it + with `vm.runInContext()` or `vm.compileFunction()`. + By default, it will throw an error if the code contains TypeScript features + that require transformation such as `Enums`, + see [type-stripping][] for more information. + When mode is `'transform'`, it also transforms TypeScript features to JavaScript, + see [transform TypeScript features][] for more information. + When mode is `'strip'`, source maps are not generated, because locations are preserved. + If `sourceMap` is provided, when mode is `'strip'`, an error will be thrown. + +_WARNING_: The output of this function should not be considered stable across Node.js versions, +due to changes in the TypeScript parser. + +```mjs +import { stripTypeScriptTypes } from 'node:module'; +const code = 'const a: number = 1;'; +const strippedCode = stripTypeScriptTypes(code); +console.log(strippedCode); +// Prints: const a = 1; +``` + +```cjs +const { stripTypeScriptTypes } = require('node:module'); +const code = 'const a: number = 1;'; +const strippedCode = stripTypeScriptTypes(code); +console.log(strippedCode); +// Prints: const a = 1; +``` + +If `sourceUrl` is provided, it will be used appended as a comment at the end of the output: + +```mjs +import { stripTypeScriptTypes } from 'node:module'; +const code = 'const a: number = 1;'; +const strippedCode = stripTypeScriptTypes(code, { mode: 'strip', sourceUrl: 'source.ts' }); +console.log(strippedCode); +// Prints: const a = 1\n\n//# sourceURL=source.ts; +``` + +```cjs +const { stripTypeScriptTypes } = require('node:module'); +const code = 'const a: number = 1;'; +const strippedCode = stripTypeScriptTypes(code, { mode: 'strip', sourceUrl: 'source.ts' }); +console.log(strippedCode); +// Prints: const a = 1\n\n//# sourceURL=source.ts; +``` + +When `mode` is `'transform'`, the code is transformed to JavaScript: + +```mjs +import { stripTypeScriptTypes } from 'node:module'; +const code = ` + namespace MathUtil { + export const add = (a: number, b: number) => a + b; + }`; +const strippedCode = stripTypeScriptTypes(code, { mode: 'transform', sourceMap: true }); +console.log(strippedCode); +// Prints: +// var MathUtil; +// (function(MathUtil) { +// MathUtil.add = (a, b)=>a + b; +// })(MathUtil || (MathUtil = {})); +// # sourceMappingURL=data:application/json;base64, ... +``` + +```cjs +const { stripTypeScriptTypes } = require('node:module'); +const code = ` + namespace MathUtil { + export const add = (a: number, b: number) => a + b; + }`; +const strippedCode = stripTypeScriptTypes(code, { mode: 'transform', sourceMap: true }); +console.log(strippedCode); +// Prints: +// var MathUtil; +// (function(MathUtil) { +// MathUtil.add = (a, b)=>a + b; +// })(MathUtil || (MathUtil = {})); +// # sourceMappingURL=data:application/json;base64, ... +``` + +### `module.syncBuiltinESMExports()` + + + +The `module.syncBuiltinESMExports()` method updates all the live bindings for +builtin [ES Modules][] to match the properties of the [CommonJS][] exports. It +does not add or remove exported names from the [ES Modules][]. + +```js +const fs = require('node:fs'); +const assert = require('node:assert'); +const { syncBuiltinESMExports } = require('node:module'); + +fs.readFile = newAPI; + +delete fs.readFileSync; + +function newAPI() { + // ... +} + +fs.newAPI = newAPI; + +syncBuiltinESMExports(); + +import('node:fs').then((esmFS) => { + // It syncs the existing readFile property with the new value + assert.strictEqual(esmFS.readFile, newAPI); + // readFileSync has been deleted from the required fs + assert.strictEqual('readFileSync' in fs, false); + // syncBuiltinESMExports() does not remove readFileSync from esmFS + assert.strictEqual('readFileSync' in esmFS, true); + // syncBuiltinESMExports() does not add names + assert.strictEqual(esmFS.newAPI, undefined); +}); +``` + +## Module compile cache + + + +The module compile cache can be enabled either using the [`module.enableCompileCache()`][] +method or the [`NODE_COMPILE_CACHE=dir`][] environment variable. After it is enabled, +whenever Node.js compiles a CommonJS or a ECMAScript Module, it will use on-disk +[V8 code cache][] persisted in the specified directory to speed up the compilation. +This may slow down the first load of a module graph, but subsequent loads of the same module +graph may get a significant speedup if the contents of the modules do not change. + +To clean up the generated compile cache on disk, simply remove the cache directory. The cache +directory will be recreated the next time the same directory is used for for compile cache +storage. To avoid filling up the disk with stale cache, it is recommended to use a directory +under the [`os.tmpdir()`][]. If the compile cache is enabled by a call to +[`module.enableCompileCache()`][] without specifying the directory, Node.js will use +the [`NODE_COMPILE_CACHE=dir`][] environment variable if it's set, or defaults +to `path.join(os.tmpdir(), 'node-compile-cache')` otherwise. To locate the compile cache +directory used by a running Node.js instance, use [`module.getCompileCacheDir()`][]. + +Currently when using the compile cache with [V8 JavaScript code coverage][], the +coverage being collected by V8 may be less precise in functions that are +deserialized from the code cache. It's recommended to turn this off when +running tests to generate precise coverage. + +The enabled module compile cache can be disabled by the [`NODE_DISABLE_COMPILE_CACHE=1`][] +environment variable. This can be useful when the compile cache leads to unexpected or +undesired behaviors (e.g. less precise test coverage). + +Compilation cache generated by one version of Node.js can not be reused by a different +version of Node.js. Cache generated by different versions of Node.js will be stored +separately if the same base directory is used to persist the cache, so they can co-exist. + +At the moment, when the compile cache is enabled and a module is loaded afresh, the +code cache is generated from the compiled code immediately, but will only be written +to disk when the Node.js instance is about to exit. This is subject to change. The +[`module.flushCompileCache()`][] method can be used to ensure the accumulated code cache +is flushed to disk in case the application wants to spawn other Node.js instances +and let them share the cache long before the parent exits. + ### `module.constants.compileCacheStatus` -The module compile cache can be enabled either using the [`module.enableCompileCache()`][] -method or the [`NODE_COMPILE_CACHE=dir`][] environment variable. After it is enabled, -whenever Node.js compiles a CommonJS or a ECMAScript Module, it will use on-disk -[V8 code cache][] persisted in the specified directory to speed up the compilation. -This may slow down the first load of a module graph, but subsequent loads of the same module -graph may get a significant speedup if the contents of the modules do not change. - -To clean up the generated compile cache on disk, simply remove the cache directory. The cache -directory will be recreated the next time the same directory is used for for compile cache -storage. To avoid filling up the disk with stale cache, it is recommended to use a directory -under the [`os.tmpdir()`][]. If the compile cache is enabled by a call to -[`module.enableCompileCache()`][] without specifying the directory, Node.js will use -the [`NODE_COMPILE_CACHE=dir`][] environment variable if it's set, or defaults -to `path.join(os.tmpdir(), 'node-compile-cache')` otherwise. To locate the compile cache -directory used by a running Node.js instance, use [`module.getCompileCacheDir()`][]. - -Currently when using the compile cache with [V8 JavaScript code coverage][], the -coverage being collected by V8 may be less precise in functions that are -deserialized from the code cache. It's recommended to turn this off when -running tests to generate precise coverage. - -The enabled module compile cache can be disabled by the [`NODE_DISABLE_COMPILE_CACHE=1`][] -environment variable. This can be useful when the compile cache leads to unexpected or -undesired behaviors (e.g. less precise test coverage). - -Compilation cache generated by one version of Node.js can not be reused by a different -version of Node.js. Cache generated by different versions of Node.js will be stored -separately if the same base directory is used to persist the cache, so they can co-exist. +> Stability: 1.1 - Active Development -At the moment, when the compile cache is enabled and a module is loaded afresh, the -code cache is generated from the compiled code immediately, but will only be written -to disk when the Node.js instance is about to exit. This is subject to change. The -[`module.flushCompileCache()`][] method can be used to ensure the accumulated code cache -is flushed to disk in case the application wants to spawn other Node.js instances -and let them share the cache long before the parent exits. +Flush the [module compile cache][] accumulated from modules already loaded +in the current Node.js instance to disk. This returns after all the flushing +file system operations come to an end, no matter they succeed or not. If there +are any errors, this will fail silently, since compile cache misses should not +interfere with the actual operation of the application. ### `module.getCompileCacheDir()` @@ -217,98 +506,6 @@ added: v22.8.0 * Returns: {string|undefined} Path to the [module compile cache][] directory if it is enabled, or `undefined` otherwise. -### `module.isBuiltin(moduleName)` - - - -* `moduleName` {string} name of the module -* Returns: {boolean} returns true if the module is builtin else returns false - -```mjs -import { isBuiltin } from 'node:module'; -isBuiltin('node:fs'); // true -isBuiltin('fs'); // true -isBuiltin('wss'); // false -``` - -### `module.register(specifier[, parentURL][, options])` - - - -> Stability: 1.2 - Release candidate - -* `specifier` {string|URL} Customization hooks to be registered; this should be - the same string that would be passed to `import()`, except that if it is - relative, it is resolved relative to `parentURL`. -* `parentURL` {string|URL} If you want to resolve `specifier` relative to a base - URL, such as `import.meta.url`, you can pass that URL here. **Default:** - `'data:'` -* `options` {Object} - * `parentURL` {string|URL} If you want to resolve `specifier` relative to a - base URL, such as `import.meta.url`, you can pass that URL here. This - property is ignored if the `parentURL` is supplied as the second argument. - **Default:** `'data:'` - * `data` {any} Any arbitrary, cloneable JavaScript value to pass into the - [`initialize`][] hook. - * `transferList` {Object\[]} [transferrable objects][] to be passed into the - `initialize` hook. - -Register a module that exports [hooks][] that customize Node.js module -resolution and loading behavior. See [Customization hooks][]. - -### `module.syncBuiltinESMExports()` - - - -The `module.syncBuiltinESMExports()` method updates all the live bindings for -builtin [ES Modules][] to match the properties of the [CommonJS][] exports. It -does not add or remove exported names from the [ES Modules][]. - -```js -const fs = require('node:fs'); -const assert = require('node:assert'); -const { syncBuiltinESMExports } = require('node:module'); - -fs.readFile = newAPI; - -delete fs.readFileSync; - -function newAPI() { - // ... -} - -fs.newAPI = newAPI; - -syncBuiltinESMExports(); - -import('node:fs').then((esmFS) => { - // It syncs the existing readFile property with the new value - assert.strictEqual(esmFS.readFile, newAPI); - // readFileSync has been deleted from the required fs - assert.strictEqual('readFileSync' in fs, false); - // syncBuiltinESMExports() does not remove readFileSync from esmFS - assert.strictEqual('readFileSync' in esmFS, true); - // syncBuiltinESMExports() does not add names - assert.strictEqual(esmFS.newAPI, undefined); -}); -``` - ## Customization Hooks @@ -1105,21 +1302,6 @@ added: `path` is the resolved path for the file for which a corresponding source map should be fetched. -### `module.flushCompileCache()` - - - -> Stability: 1.1 - Active Development - -Flush the [module compile cache][] accumulated from modules already loaded -in the current Node.js instance to disk. This returns after all the flushing -file system operations come to an end, no matter they succeed or not. If there -are any errors, this will fail silently, since compile cache misses should not -interfer with the actual operation of the application. - ### Class: `module.SourceMap` + ```js const assert = require('node:assert'); @@ -1266,6 +1271,7 @@ This section was moved to [GLOBAL_FOLDERS]: #loading-from-the-global-folders [`"main"`]: packages.md#main [`"type"`]: packages.md#type +[`--trace-require-module`]: cli.md#--trace-require-modulemode [`ERR_REQUIRE_ASYNC_MODULE`]: errors.md#err_require_async_module [`ERR_UNSUPPORTED_DIR_IMPORT`]: errors.md#err_unsupported_dir_import [`MODULE_NOT_FOUND`]: errors.md#module_not_found diff --git a/doc/api/net.md b/doc/api/net.md index 32be5b6bd69ea5..5384ad7a34c42c 100644 --- a/doc/api/net.md +++ b/doc/api/net.md @@ -170,6 +170,15 @@ added: The list of rules added to the blocklist. +### `BlockList.isBlockList(value)` + + + +* `value` {any} Any JS value +* Returns `true` if the `value` is a `net.BlockList`. + ## Class: `net.SocketAddress` + +* `input` {string} An input string containing an IP address and optional port, + e.g. `123.1.2.3:1234` or `[1::1]:1234`. +* Returns: {net.SocketAddress} Returns a `SocketAddress` if parsing was successful. + Otherwise returns `undefined`. + ## Class: `net.Server` + +* Returns: {PerformanceEntry\[]} Current list of entries stored in the performance observer, emptying it out. + ## Class: `PerformanceObserverEntryList` -```js +```mjs +import { performance, PerformanceObserver } from 'node:perf_hooks'; + +// Activate the observer +const obs = new PerformanceObserver((list) => { + const entries = list.getEntries(); + entries.forEach((entry) => { + console.log(`import('${entry[0]}')`, entry.duration); + }); + performance.clearMarks(); + performance.clearMeasures(); + obs.disconnect(); +}); +obs.observe({ entryTypes: ['function'], buffered: true }); + +const timedImport = performance.timerify(async (module) => { + return await import(module); +}); + +await timedImport('some-module'); +``` + +```cjs 'use strict'; const { performance, @@ -1896,7 +2165,28 @@ it means the time interval between starting the request and receiving the response, and for HTTP request, it means the time interval between receiving the request and sending the response: -```js +```mjs +import { PerformanceObserver } from 'node:perf_hooks'; +import { createServer, get } from 'node:http'; + +const obs = new PerformanceObserver((items) => { + items.getEntries().forEach((item) => { + console.log(item); + }); +}); + +obs.observe({ entryTypes: ['http'] }); + +const PORT = 8080; + +createServer((req, res) => { + res.end('ok'); +}).listen(PORT, () => { + get(`http://127.0.0.1:${PORT}`); +}); +``` + +```cjs 'use strict'; const { PerformanceObserver } = require('node:perf_hooks'); const http = require('node:http'); @@ -1920,7 +2210,25 @@ http.createServer((req, res) => { ### Measuring how long the `net.connect` (only for TCP) takes when the connection is successful -```js +```mjs +import { PerformanceObserver } from 'node:perf_hooks'; +import { connect, createServer } from 'node:net'; + +const obs = new PerformanceObserver((items) => { + items.getEntries().forEach((item) => { + console.log(item); + }); +}); +obs.observe({ entryTypes: ['net'] }); +const PORT = 8080; +createServer((socket) => { + socket.destroy(); +}).listen(PORT, () => { + connect(PORT); +}); +``` + +```cjs 'use strict'; const { PerformanceObserver } = require('node:perf_hooks'); const net = require('node:net'); @@ -1940,7 +2248,21 @@ net.createServer((socket) => { ### Measuring how long the DNS takes when the request is successful -```js +```mjs +import { PerformanceObserver } from 'node:perf_hooks'; +import { lookup, promises } from 'node:dns'; + +const obs = new PerformanceObserver((items) => { + items.getEntries().forEach((item) => { + console.log(item); + }); +}); +obs.observe({ entryTypes: ['dns'] }); +lookup('localhost', () => {}); +promises.resolve('localhost'); +``` + +```cjs 'use strict'; const { PerformanceObserver } = require('node:perf_hooks'); const dns = require('node:dns'); diff --git a/doc/api/permissions.md b/doc/api/permissions.md index e37c2982bc146a..ea3ccc17b306b7 100644 --- a/doc/api/permissions.md +++ b/doc/api/permissions.md @@ -28,34 +28,27 @@ If you find a potential security vulnerability, please refer to our -> Stability: 1.1 - Active development +> Stability: 2 - Stable. The Node.js Permission Model is a mechanism for restricting access to specific resources during execution. -The API exists behind a flag [`--experimental-permission`][] which when enabled, +The API exists behind a flag [`--permission`][] which when enabled, will restrict access to all available permissions. -The available permissions are documented by the [`--experimental-permission`][] +The available permissions are documented by the [`--permission`][] flag. -When starting Node.js with `--experimental-permission`, +When starting Node.js with `--permission`, the ability to access the file system through the `fs` module, spawn processes, use `node:worker_threads`, use native addons, use WASI, and enable the runtime inspector will be restricted. ```console -$ node --experimental-permission index.js -node:internal/modules/cjs/loader:171 - const result = internalModuleStat(receiver, filename); - ^ +$ node --permission index.js Error: Access to this API has been restricted - at stat (node:internal/modules/cjs/loader:171:18) - at Module._findPath (node:internal/modules/cjs/loader:627:16) - at resolveMainPath (node:internal/modules/run_main:19:25) - at Function.executeUserEntryPoint [as runMain] (node:internal/modules/run_main:76:24) at node:internal/main/run_main_module:23:47 { code: 'ERR_ACCESS_DENIED', permission: 'FileSystemRead', @@ -71,7 +64,7 @@ flag. For WASI, use the [`--allow-wasi`][] flag. #### Runtime API -When enabling the Permission Model through the [`--experimental-permission`][] +When enabling the Permission Model through the [`--permission`][] flag a new property `permission` is added to the `process` object. This property contains one function: @@ -97,10 +90,8 @@ To allow access to the file system, use the [`--allow-fs-read`][] and [`--allow-fs-write`][] flags: ```console -$ node --experimental-permission --allow-fs-read=* --allow-fs-write=* index.js +$ node --permission --allow-fs-read=* --allow-fs-write=* index.js Hello world! -(node:19836) ExperimentalWarning: Permission is an experimental feature -(Use `node --trace-warnings ...` to show where the warning was created) ``` The valid arguments for both flags are: @@ -154,6 +145,8 @@ There are constraints you need to know before using this system: flags that can be set via runtime through `v8.setFlagsFromString`. * OpenSSL engines cannot be requested at runtime when the Permission Model is enabled, affecting the built-in crypto, https, and tls modules. +* Run-Time Loadable Extensions cannot be loaded when the Permission Model is + enabled, affecting the sqlite module. * Using existing file descriptors via the `node:fs` module bypasses the Permission Model. @@ -172,5 +165,5 @@ There are constraints you need to know before using this system: [`--allow-fs-write`]: cli.md#--allow-fs-write [`--allow-wasi`]: cli.md#--allow-wasi [`--allow-worker`]: cli.md#--allow-worker -[`--experimental-permission`]: cli.md#--experimental-permission +[`--permission`]: cli.md#--permission [`permission.has()`]: process.md#processpermissionhasscope-reference diff --git a/doc/api/process.md b/doc/api/process.md index 1fa77523f1b667..8e7fe148daa9a6 100644 --- a/doc/api/process.md +++ b/doc/api/process.md @@ -1922,12 +1922,18 @@ A boolean value that is `true` if the current Node.js build includes the inspect +> Stability: 0 - Deprecated. This property is always true, and any checks based on it are +> redundant. + * {boolean} A boolean value that is `true` if the current Node.js build includes support for IPv6. +Since all Node.js builds have IPv6 support, this value is always `true`. + ## `process.features.require_module` +> Stability: 0 - Deprecated. Use `process.features.tls` instead. + * {boolean} A boolean value that is `true` if the current Node.js build includes support for ALPN in TLS. +In Node.js 11.0.0 and later versions, the OpenSSL dependencies feature unconditional ALPN support. +This value is therefore identical to that of `process.features.tls`. + ## `process.features.tls_ocsp` +> Stability: 0 - Deprecated. Use `process.features.tls` instead. + * {boolean} A boolean value that is `true` if the current Node.js build includes support for OCSP in TLS. +In Node.js 11.0.0 and later versions, the OpenSSL dependencies feature unconditional OCSP support. +This value is therefore identical to that of `process.features.tls`. + ## `process.features.tls_sni` +> Stability: 0 - Deprecated. Use `process.features.tls` instead. + * {boolean} A boolean value that is `true` if the current Node.js build includes support for SNI in TLS. +In Node.js 11.0.0 and later versions, the OpenSSL dependencies feature unconditional SNI support. +This value is therefore identical to that of `process.features.tls`. + ## `process.features.typescript` -> Stability: 1.0 - Early development +> Stability: 1.1 - Active development * {boolean|string} @@ -1996,12 +2020,17 @@ A value that is `"strip"` if Node.js is run with `--experimental-strip-types`, +> Stability: 0 - Deprecated. This property is always true, and any checks based on it are +> redundant. + * {boolean} A boolean value that is `true` if the current Node.js build includes support for libuv. -Since it's currently not possible to build Node.js without libuv, this value is always `true`. + +Since it's not possible to build Node.js without libuv, this value is always `true`. ## `process.finalization.register(ref, callback)` @@ -2099,10 +2128,10 @@ class Test { constructor() { finalization.register(this, (ref) => ref.dispose()); - // even something like this is highly discouraged + // Even something like this is highly discouraged // finalization.register(this, () => this.dispose()); - } - dispose() {} + } + dispose() {} } ``` @@ -3062,7 +3091,7 @@ added: v20.0.0 * {Object} -This API is available through the [`--experimental-permission`][] flag. +This API is available through the [`--permission`][] flag. `process.permission` is an object whose methods are used to manage permissions for the current process. Additional documentation is available in the @@ -3478,6 +3507,16 @@ const { report } = require('node:process'); console.log(`Report on exception: ${report.reportOnUncaughtException}`); ``` +### `process.report.excludeEnv` + + + +* {boolean} + +If `true`, a diagnostic report is generated without the environment variables. + ### `process.report.signal` + +Replace the keys `data_seg_size_kbytes`, `max_memory_size_kbytes`, and `virtual_memory_kbytes` +with `data_seg_size_bytes`, `max_memory_size_bytes`, and `virtual_memory_bytes` +respectively in the `userLimits` section, as these values are given in bytes. + +```json +{ + "userLimits": { + // Skip some keys ... + "data_seg_size_bytes": { // replacing data_seg_size_kbytes + "soft": "unlimited", + "hard": "unlimited" + }, + // ... + "max_memory_size_bytes": { // replacing max_memory_size_kbytes + "soft": "unlimited", + "hard": "unlimited" + }, + // ... + "virtual_memory_bytes": { // replacing virtual_memory_kbytes + "soft": "unlimited", + "hard": "unlimited" + } + } +} +``` + +#### Version 4 + + + +New fields `ipv4` and `ipv6` are added to `tcp` and `udp` libuv handles endpoints. Examples: + +```json +{ + "libuv": [ + { + "type": "tcp", + "is_active": true, + "is_referenced": true, + "address": "0x000055e70fcb85d8", + "localEndpoint": { + "host": "localhost", + "ip4": "127.0.0.1", // new key + "port": 48986 + }, + "remoteEndpoint": { + "host": "localhost", + "ip4": "127.0.0.1", // new key + "port": 38573 + }, + "sendBufferSize": 2626560, + "recvBufferSize": 131072, + "fd": 24, + "writeQueueSize": 0, + "readable": true, + "writable": true + }, + { + "type": "tcp", + "is_active": true, + "is_referenced": true, + "address": "0x000055e70fcd68c8", + "localEndpoint": { + "host": "ip6-localhost", + "ip6": "::1", // new key + "port": 52266 + }, + "remoteEndpoint": { + "host": "ip6-localhost", + "ip6": "::1", // new key + "port": 38573 + }, + "sendBufferSize": 2626560, + "recvBufferSize": 131072, + "fd": 25, + "writeQueueSize": 0, + "readable": false, + "writable": false + } + ] +} +``` + +#### Version 3 + + + +The following memory usage keys are added to the `resourceUsage` section. + +```json +{ + "resourceUsage": { + "rss": "35766272", + "free_memory": "1598337024", + "total_memory": "17179869184", + "available_memory": "1598337024", + "constrained_memory": "36624662528" + } +} +``` + +#### Version 2 + + + +Added [`Worker`][] support. Refer to [Interaction with workers](#interaction-with-workers) section for more details. + +#### Version 1 + +This is the first version of the diagnostic report. + ## Configuration Additional runtime configuration of report generation is available via diff --git a/doc/api/single-executable-applications.md b/doc/api/single-executable-applications.md index be86536291ba7d..30ebd0fc38aaa6 100644 --- a/doc/api/single-executable-applications.md +++ b/doc/api/single-executable-applications.md @@ -352,7 +352,7 @@ writes to the returned array buffer is likely to result in a crash. * `key` {string} the key for the asset in the dictionary specified by the `assets` field in the single-executable application configuration. -* Returns: {string|ArrayBuffer} +* Returns: {ArrayBuffer} ### `require(id)` in the injected main script is not file based diff --git a/doc/api/sqlite.md b/doc/api/sqlite.md index 9fd7a022b3fc9f..270b8e8d8b7507 100644 --- a/doc/api/sqlite.md +++ b/doc/api/sqlite.md @@ -6,8 +6,7 @@ added: v22.5.0 --> -> Stability: 1.1 - Active development. Enable this API with the -> [`--experimental-sqlite`][] CLI flag. +> Stability: 1.1 - Active development. @@ -109,6 +108,10 @@ added: v22.5.0 [double-quoted string literals][]. This is not recommended but can be enabled for compatibility with legacy database schemas. **Default:** `false`. + * `allowExtension` {boolean} If `true`, the `loadExtension` SQL function + and the `loadExtension()` method are enabled. + You can call `enableLoadExtension(false)` later to disable this feature. + **Default:** `false`. Constructs a new `DatabaseSync` instance. @@ -121,6 +124,30 @@ added: v22.5.0 Closes the database connection. An exception is thrown if the database is not open. This method is a wrapper around [`sqlite3_close_v2()`][]. +### `database.loadExtension(path)` + + + +* `path` {string} The path to the shared library to load. + +Loads a shared library into the database connection. This method is a wrapper +around [`sqlite3_load_extension()`][]. It is required to enable the +`allowExtension` option when constructing the `DatabaseSync` instance. + +### `database.enableLoadExtension(allow)` + + + +* `allow` {boolean} Whether to allow loading extensions. + +Enables or disables the `loadExtension` SQL function, and the `loadExtension()` +method. When `allowExtension` is `false` when constructing, you cannot enable +loading extensions for security reasons. + ### `database.exec(sql)` + +* `name` {string} The name of the SQLite function to create. +* `options` {Object} Optional configuration settings for the function. The + following properties are supported: + * `deterministic` {boolean} If `true`, the [`SQLITE_DETERMINISTIC`][] flag is + set on the created function. **Default:** `false`. + * `directOnly` {boolean} If `true`, the [`SQLITE_DIRECTONLY`][] flag is set on + the created function. **Default:** `false`. + * `useBigIntArguments` {boolean} If `true`, integer arguments to `function` + are converted to `BigInt`s. If `false`, integer arguments are passed as + JavaScript numbers. **Default:** `false`. + * `varargs` {boolean} If `true`, `function` can accept a variable number of + arguments. If `false`, `function` must be invoked with exactly + `function.length` arguments. **Default:** `false`. +* `function` {Function} The JavaScript function to call when the SQLite + function is invoked. + +This method is used to create SQLite user-defined functions. This method is a +wrapper around [`sqlite3_create_function_v2()`][]. + ### `database.open()` + +* `namedParameters` {Object} An optional object used to bind named parameters. + The keys of this object are used to configure the mapping. +* `...anonymousParameters` {null|number|bigint|string|Buffer|Uint8Array} Zero or + more values to bind to anonymous parameters. +* Returns: {Iterator} An iterable iterator of objects. Each object corresponds to a row + returned by executing the prepared statement. The keys and values of each + object correspond to the column names and values of the row. + +This method executes a prepared statement and returns an iterator of +objects. If the prepared statement does not return any results, this method +returns an empty iterator. The prepared statement [parameters are bound][] using +the values in `namedParameters` and `anonymousParameters`. + ### `statement.run([namedParameters][, ...anonymousParameters])` + +* {Object} + +An object containing commonly used constants for SQLite operations. -The following constants are exported by the `node:sqlite` module. +### SQLite constants -### SQLite Session constants +The following constants are exported by the `sqlite.constants` object. #### Conflict-resolution constants @@ -426,20 +505,23 @@ The following constants are meant for use with [`database.applyChangeset()`](#da SQLITE_CHANGESET_ABORT - Abort when a change encounters a conflict and roll back databsase. + Abort when a change encounters a conflict and roll back database. [Changesets and Patchsets]: https://www.sqlite.org/sessionintro.html#changesets_and_patchsets [SQL injection]: https://en.wikipedia.org/wiki/SQL_injection -[`--experimental-sqlite`]: cli.md#--experimental-sqlite [`ATTACH DATABASE`]: https://www.sqlite.org/lang_attach.html [`PRAGMA foreign_keys`]: https://www.sqlite.org/pragma.html#pragma_foreign_keys +[`SQLITE_DETERMINISTIC`]: https://www.sqlite.org/c3ref/c_deterministic.html +[`SQLITE_DIRECTONLY`]: https://www.sqlite.org/c3ref/c_deterministic.html [`sqlite3_changes64()`]: https://www.sqlite.org/c3ref/changes.html [`sqlite3_close_v2()`]: https://www.sqlite.org/c3ref/close.html +[`sqlite3_create_function_v2()`]: https://www.sqlite.org/c3ref/create_function.html [`sqlite3_exec()`]: https://www.sqlite.org/c3ref/exec.html [`sqlite3_expanded_sql()`]: https://www.sqlite.org/c3ref/expanded_sql.html [`sqlite3_last_insert_rowid()`]: https://www.sqlite.org/c3ref/last_insert_rowid.html +[`sqlite3_load_extension()`]: https://www.sqlite.org/c3ref/load_extension.html [`sqlite3_prepare_v2()`]: https://www.sqlite.org/c3ref/prepare.html [`sqlite3_sql()`]: https://www.sqlite.org/c3ref/expanded_sql.html [`sqlite3changeset_apply()`]: https://www.sqlite.org/session/sqlite3changeset_apply.html diff --git a/doc/api/stream.md b/doc/api/stream.md index fbd9d3c93cb7ab..c8f1e2b528c7dc 100644 --- a/doc/api/stream.md +++ b/doc/api/stream.md @@ -315,7 +315,7 @@ events (due to incorrect stream implementations) do not cause unexpected crashes. If this is unwanted behavior then `options.cleanup` should be set to `true`: -```js +```mjs await finished(rs, { cleanup: true }); ``` @@ -3836,8 +3836,6 @@ added: v8.0.0 The `_destroy()` method is called by [`writable.destroy()`][writable-destroy]. It can be overridden by child classes but it **must not** be called directly. -Furthermore, the `callback` should not be mixed with async/await -once it is executed when a promise is resolved. #### `writable._final(callback)` @@ -3916,7 +3914,7 @@ const { StringDecoder } = require('node:string_decoder'); class StringWritable extends Writable { constructor(options) { super(options); - this._decoder = new StringDecoder(options && options.defaultEncoding); + this._decoder = new StringDecoder(options?.defaultEncoding); this.data = ''; } _write(chunk, encoding, callback) { diff --git a/doc/api/test.md b/doc/api/test.md index 2ccc87a3f2ae00..b8d5dfb7f69ead 100644 --- a/doc/api/test.md +++ b/doc/api/test.md @@ -937,8 +937,7 @@ compared against a set of known good values. The known good values are known as snapshots, and are stored in a snapshot file. Snapshot files are managed by the test runner, but are designed to be human readable to aid in debugging. Best practice is for snapshot files to be checked into source control along with your -test files. In order to enable snapshot testing, Node.js must be started with -the [`--experimental-test-snapshots`][] command-line flag. +test files. Snapshot files are generated by starting Node.js with the [`--test-update-snapshots`][] command-line flag. A separate snapshot file is @@ -3257,7 +3256,7 @@ test('snapshot test with default serialization', (t) => { test('snapshot test with custom serialization', (t) => { t.assert.snapshot({ value3: 3, value4: 4 }, { - serializers: [(value) => JSON.stringify(value)] + serializers: [(value) => JSON.stringify(value)], }); }); ``` @@ -3315,10 +3314,12 @@ The name of the test. -> Stability: 1 - Experimental - * `count` {number} The number of assertions and subtests that are expected to run. This function is used to set the number of assertions and subtests that are expected to run @@ -3562,7 +3563,6 @@ Can be used to abort test subtasks when the test has been aborted. [`--experimental-strip-types`]: cli.md#--experimental-strip-types [`--experimental-test-coverage`]: cli.md#--experimental-test-coverage [`--experimental-test-module-mocks`]: cli.md#--experimental-test-module-mocks -[`--experimental-test-snapshots`]: cli.md#--experimental-test-snapshots [`--import`]: cli.md#--importmodule [`--test-concurrency`]: cli.md#--test-concurrency [`--test-coverage-include`]: cli.md#--test-coverage-include diff --git a/doc/api/timers.md b/doc/api/timers.md index da3eec691627be..7ceec0bb53a1dd 100644 --- a/doc/api/timers.md +++ b/doc/api/timers.md @@ -292,7 +292,24 @@ returned Promises will be rejected with an `'AbortError'`. For `setImmediate()`: -```js +```mjs +import { setImmediate as setImmediatePromise } from 'node:timers/promises'; + +const ac = new AbortController(); +const signal = ac.signal; + +// We do not `await` the promise so `ac.abort()` is called concurrently. +setImmediatePromise('foobar', { signal }) + .then(console.log) + .catch((err) => { + if (err.name === 'AbortError') + console.error('The immediate was aborted'); + }); + +ac.abort(); +``` + +```cjs const { setImmediate: setImmediatePromise } = require('node:timers/promises'); const ac = new AbortController(); @@ -310,7 +327,24 @@ ac.abort(); For `setTimeout()`: -```js +```mjs +import { setTimeout as setTimeoutPromise } from 'node:timers/promises'; + +const ac = new AbortController(); +const signal = ac.signal; + +// We do not `await` the promise so `ac.abort()` is called concurrently. +setTimeoutPromise(1000, 'foobar', { signal }) + .then(console.log) + .catch((err) => { + if (err.name === 'AbortError') + console.error('The timeout was aborted'); + }); + +ac.abort(); +``` + +```cjs const { setTimeout: setTimeoutPromise } = require('node:timers/promises'); const ac = new AbortController(); diff --git a/doc/api/tls.md b/doc/api/tls.md index 0ac0b7e740e5c7..a00c32ab8b2988 100644 --- a/doc/api/tls.md +++ b/doc/api/tls.md @@ -10,7 +10,11 @@ The `node:tls` module provides an implementation of the Transport Layer Security (TLS) and Secure Socket Layer (SSL) protocols that is built on top of OpenSSL. The module can be accessed using: -```js +```mjs +import tls from 'node:tls'; +``` + +```cjs const tls = require('node:tls'); ``` @@ -461,17 +465,31 @@ To adjust the security level in your Node.js application, you can include `@SECL within a cipher string, where `X` is the desired security level. For example, to set the security level to 0 while using the default OpenSSL cipher list, you could use: -```js -const tls = require('node:tls'); +```mjs +import { createServer, connect } from 'node:tls'; const port = 443; -tls.createServer({ciphers: 'DEFAULT@SECLEVEL=0', minVersion: 'TLSv1'}, function (socket) { +createServer({ ciphers: 'DEFAULT@SECLEVEL=0', minVersion: 'TLSv1' }, function(socket) { console.log('Client connected with protocol:', socket.getProtocol()); socket.end(); this.close(); -}). -listen(port, () => { - tls.connect(port, {ciphers: 'DEFAULT@SECLEVEL=0', maxVersion: 'TLSv1'}); +}) +.listen(port, () => { + connect(port, { ciphers: 'DEFAULT@SECLEVEL=0', maxVersion: 'TLSv1' }); +}); +``` + +```cjs +const { createServer, connect } = require('node:tls'); +const port = 443; + +createServer({ ciphers: 'DEFAULT@SECLEVEL=0', minVersion: 'TLSv1' }, function(socket) { + console.log('Client connected with protocol:', socket.getProtocol()); + socket.end(); + this.close(); +}) +.listen(port, () => { + connect(port, { ciphers: 'DEFAULT@SECLEVEL=0', maxVersion: 'TLSv1' }); }); ``` @@ -1783,24 +1801,57 @@ to `host`. The following illustrates a client for the echo server example from [`tls.createServer()`][]: -```js +```mjs // Assumes an echo server that is listening on port 8000. -const tls = require('node:tls'); -const fs = require('node:fs'); +import { connect } from 'node:tls'; +import { readFileSync } from 'node:fs'; +import { stdin } from 'node:process'; + +const options = { + // Necessary only if the server requires client certificate authentication. + key: readFileSync('client-key.pem'), + cert: readFileSync('client-cert.pem'), + + // Necessary only if the server uses a self-signed certificate. + ca: [ readFileSync('server-cert.pem') ], + + // Necessary only if the server's cert isn't for "localhost". + checkServerIdentity: () => { return null; }, +}; + +const socket = connect(8000, options, () => { + console.log('client connected', + socket.authorized ? 'authorized' : 'unauthorized'); + stdin.pipe(socket); + stdin.resume(); +}); +socket.setEncoding('utf8'); +socket.on('data', (data) => { + console.log(data); +}); +socket.on('end', () => { + console.log('server ends connection'); +}); +``` + +```cjs +// Assumes an echo server that is listening on port 8000. +const { connect } = require('node:tls'); +const { readFileSync } = require('node:fs'); const options = { // Necessary only if the server requires client certificate authentication. - key: fs.readFileSync('client-key.pem'), - cert: fs.readFileSync('client-cert.pem'), + key: readFileSync('client-key.pem'), + cert: readFileSync('client-cert.pem'), // Necessary only if the server uses a self-signed certificate. - ca: [ fs.readFileSync('server-cert.pem') ], + ca: [ readFileSync('server-cert.pem') ], // Necessary only if the server's cert isn't for "localhost". checkServerIdentity: () => { return null; }, }; -const socket = tls.connect(8000, options, () => { +const socket = connect(8000, options, () => { console.log('client connected', socket.authorized ? 'authorized' : 'unauthorized'); process.stdin.pipe(socket); @@ -1815,6 +1866,20 @@ socket.on('end', () => { }); ``` +To generate the certificate and key for this example, run: + +```bash +openssl req -x509 -newkey rsa:2048 -nodes -sha256 -subj '/CN=localhost' \ + -keyout client-key.pem -out client-cert.pem +``` + +Then, to generate the `server-cert.pem` certificate for this example, run: + +```bash +openssl pkcs12 -certpbe AES-256-CBC -export -out server-cert.pem \ + -inkey client-key.pem -in client-cert.pem +``` + ## `tls.connect(path[, options][, callback])` -> Stability: 1.0 - Early development +> Stability: 1.1 - Active development ## Enabling @@ -50,7 +50,7 @@ To use TypeScript with full support for all TypeScript features, including added: v22.6.0 --> -> Stability: 1.0 - Early development +> Stability: 1.1 - Active development The flag [`--experimental-strip-types`][] enables Node.js to run TypeScript files. By default Node.js will execute only files that contain no diff --git a/doc/api/util.md b/doc/api/util.md index d46092a815774d..c6c6e0fe37b59c 100644 --- a/doc/api/util.md +++ b/doc/api/util.md @@ -364,7 +364,7 @@ util.formatWithOptions({ colors: true }, 'See object %O', { foo: 42 }); // when printed to a terminal. ``` -## `util.getCallSites(frameCount)` +## `util.getCallSites(frameCountOrOptions, [options])` > Stability: 1.1 - Active development @@ -376,8 +376,11 @@ changes: description: The API is renamed from `util.getCallSite` to `util.getCallSites()`. --> -* `frameCount` {number} Number of frames to capture as call site objects. +* `frameCount` {number} Optional number of frames to capture as call site objects. **Default:** `10`. Allowable range is between 1 and 200. +* `options` {Object} Optional + * `sourceMap` {boolean} Reconstruct the original location in the stacktrace from the source-map. + Enabled by default with the flag `--enable-source-maps`. * Returns: {Object\[]} An array of call site objects * `functionName` {string} Returns the name of the function associated with this call site. * `scriptName` {string} Returns the name of the resource that contains the script for the @@ -425,6 +428,33 @@ function anotherFunction() { anotherFunction(); ``` +It is possible to reconstruct the original locations by setting the option `sourceMap` to `true`. +If the source map is not available, the original location will be the same as the current location. +When the `--enable-source-maps` flag is enabled, for example when using `--experimental-transform-types`, +`sourceMap` will be true by default. + +```ts +import util from 'node:util'; + +interface Foo { + foo: string; +} + +const callSites = util.getCallSites({ sourceMap: true }); + +// With sourceMap: +// Function Name: '' +// Script Name: example.js +// Line Number: 7 +// Column Number: 26 + +// Without sourceMap: +// Function Name: '' +// Script Name: example.js +// Line Number: 2 +// Column Number: 26 +``` + ## `util.getSystemErrorName(err)` An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API. @@ -2042,14 +2081,6 @@ is not supported. ### `new TextDecoder([encoding[, options]])` - - * `encoding` {string} Identifies the `encoding` that this `TextDecoder` instance supports. **Default:** `'utf-8'`. * `options` {Object} @@ -2132,6 +2163,10 @@ encoded bytes. ### `textEncoder.encodeInto(src, dest)` + + * `src` {string} The text to encode. * `dest` {Uint8Array} The array to hold the encode result. * Returns: {Object} @@ -2210,39 +2245,55 @@ added: > Stability: 1 - Experimental * `signal` {AbortSignal} -* `resource` {Object} Any non-null entity, reference to which is held weakly. +* `resource` {Object} Any non-null object tied to the abortable operation and held weakly. + If `resource` is garbage collected before the `signal` aborts, the promise remains pending, + allowing Node.js to stop tracking it. + This helps prevent memory leaks in long-running or non-cancelable operations. * Returns: {Promise} -Listens to abort event on the provided `signal` and -returns a promise that is fulfilled when the `signal` is -aborted. If the passed `resource` is garbage collected before the `signal` is -aborted, the returned promise shall remain pending indefinitely. +Listens to abort event on the provided `signal` and returns a promise that resolves when the `signal` is aborted. +If `resource` is provided, it weakly references the operation's associated object, +so if `resource` is garbage collected before the `signal` aborts, +then returned promise shall remain pending. +This prevents memory leaks in long-running or non-cancelable operations. ```cjs const { aborted } = require('node:util'); +// Obtain an object with an abortable signal, like a custom resource or operation. const dependent = obtainSomethingAbortable(); +// Pass `dependent` as the resource, indicating the promise should only resolve +// if `dependent` is still in memory when the signal is aborted. aborted(dependent.signal, dependent).then(() => { - // Do something when dependent is aborted. + + // This code runs when `dependent` is aborted. + console.log('Dependent resource was aborted.'); }); +// Simulate an event that triggers the abort. dependent.on('event', () => { - dependent.abort(); + dependent.abort(); // This will cause the `aborted` promise to resolve. }); ``` ```mjs import { aborted } from 'node:util'; +// Obtain an object with an abortable signal, like a custom resource or operation. const dependent = obtainSomethingAbortable(); +// Pass `dependent` as the resource, indicating the promise should only resolve +// if `dependent` is still in memory when the signal is aborted. aborted(dependent.signal, dependent).then(() => { - // Do something when dependent is aborted. + + // This code runs when `dependent` is aborted. + console.log('Dependent resource was aborted.'); }); +// Simulate an event that triggers the abort. dependent.on('event', () => { - dependent.abort(); + dependent.abort(); // This will cause the `aborted` promise to resolve. }); ``` diff --git a/doc/api/v8.md b/doc/api/v8.md index e7164c43364246..ed2a030b5fd015 100644 --- a/doc/api/v8.md +++ b/doc/api/v8.md @@ -1294,7 +1294,7 @@ is as follows. Here's an example. ```js -const { GCProfiler } = require('v8'); +const { GCProfiler } = require('node:v8'); const profiler = new GCProfiler(); profiler.start(); setTimeout(() => { diff --git a/doc/api/vm.md b/doc/api/vm.md index f8575e3815403a..06200e475fcd2e 100644 --- a/doc/api/vm.md +++ b/doc/api/vm.md @@ -1618,12 +1618,12 @@ in the outer context. const vm = require('node:vm'); // An undefined `contextObject` option makes the global object contextified. -let context = vm.createContext(); +const context = vm.createContext(); console.log(vm.runInContext('globalThis', context) === context); // false // A contextified global object cannot be frozen. try { vm.runInContext('Object.freeze(globalThis);', context); -} catch(e) { +} catch (e) { console.log(e); // TypeError: Cannot freeze } console.log(vm.runInContext('globalThis.foo = 1; foo;', context)); // 1 @@ -1648,7 +1648,7 @@ const context = vm.createContext(vm.constants.DONT_CONTEXTIFY); vm.runInContext('Object.freeze(globalThis);', context); try { vm.runInContext('bar = 1; bar;', context); -} catch(e) { +} catch (e) { console.log(e); // Uncaught ReferenceError: bar is not defined } ``` @@ -1677,7 +1677,7 @@ console.log(vm.runInContext('bar;', context)); // 1 Object.freeze(context); try { vm.runInContext('baz = 1; baz;', context); -} catch(e) { +} catch (e) { console.log(e); // Uncaught ReferenceError: baz is not defined } ``` diff --git a/doc/api/webcrypto.md b/doc/api/webcrypto.md index 18d17aab78a084..6f50c2a99bfdd3 100644 --- a/doc/api/webcrypto.md +++ b/doc/api/webcrypto.md @@ -2,6 +2,9 @@ + ```js const b = Symbol('b'); diff --git a/doc/api/zlib.md b/doc/api/zlib.md index f82fd626ab89e1..2d3ea8050f230b 100644 --- a/doc/api/zlib.md +++ b/doc/api/zlib.md @@ -11,7 +11,11 @@ Gzip, Deflate/Inflate, and Brotli. To access it: -```js +```mjs +import os from 'node:zlib'; +``` + +```cjs const zlib = require('node:zlib'); ``` @@ -21,13 +25,35 @@ Compressing or decompressing a stream (such as a file) can be accomplished by piping the source stream through a `zlib` `Transform` stream into a destination stream: -```js -const { createGzip } = require('node:zlib'); -const { pipeline } = require('node:stream'); +```mjs +import { + createReadStream, + createWriteStream, +} from 'node:fs'; +import process from 'node:process'; +import { createGzip } from 'node:zlib'; +import { pipeline } from 'node:stream'; + +const gzip = createGzip(); +const source = createReadStream('input.txt'); +const destination = createWriteStream('input.txt.gz'); + +pipeline(source, gzip, destination, (err) => { + if (err) { + console.error('An error occurred:', err); + process.exitCode = 1; + } +}); +``` + +```cjs const { createReadStream, createWriteStream, } = require('node:fs'); +const process = require('node:process'); +const { createGzip } = require('node:zlib'); +const { pipeline } = require('node:stream'); const gzip = createGzip(); const source = createReadStream('input.txt'); @@ -39,17 +65,43 @@ pipeline(source, gzip, destination, (err) => { process.exitCode = 1; } }); +``` -// Or, Promisified +Or, using the promise `pipeline` API: -const { promisify } = require('node:util'); -const pipe = promisify(pipeline); +```mjs +import { + createReadStream, + createWriteStream, +} from 'node:fs'; +import process from 'node:process'; +import { createGzip } from 'node:zlib'; +import { pipeline } from 'node:stream/promises'; + +async function do_gzip(input, output) { + const gzip = createGzip(); + const source = createReadStream(input); + const destination = createWriteStream(output); + await pipeline(source, gzip, destination); +} + +await do_gzip('input.txt', 'input.txt.gz'); +``` + +```cjs +const { + createReadStream, + createWriteStream, +} = require('node:fs'); +const process = require('node:process'); +const { createGzip } = require('node:zlib'); +const { pipeline } = require('node:stream/promises'); async function do_gzip(input, output) { const gzip = createGzip(); const source = createReadStream(input); const destination = createWriteStream(output); - await pipe(source, gzip, destination); + await pipeline(source, gzip, destination); } do_gzip('input.txt', 'input.txt.gz') @@ -61,7 +113,39 @@ do_gzip('input.txt', 'input.txt.gz') It is also possible to compress or decompress data in a single step: -```js +```mjs +import process from 'node:process'; +import { Buffer } from 'node:buffer'; +import { deflate, unzip } from 'node:zlib'; + +const input = '.................................'; +deflate(input, (err, buffer) => { + if (err) { + console.error('An error occurred:', err); + process.exitCode = 1; + } + console.log(buffer.toString('base64')); +}); + +const buffer = Buffer.from('eJzT0yMAAGTvBe8=', 'base64'); +unzip(buffer, (err, buffer) => { + if (err) { + console.error('An error occurred:', err); + process.exitCode = 1; + } + console.log(buffer.toString()); +}); + +// Or, Promisified + +import { promisify } from 'node:util'; +const do_unzip = promisify(unzip); + +const unzippedBuffer = await do_unzip(buffer); +console.log(unzippedBuffer.toString()); +``` + +```cjs const { deflate, unzip } = require('node:zlib'); const input = '.................................'; @@ -104,7 +188,19 @@ limitations in some applications. Creating and using a large number of zlib objects simultaneously can cause significant memory fragmentation. -```js +```mjs +import zlib from 'node:zlib'; +import { Buffer } from 'node:buffer'; + +const payload = Buffer.from('This is some data'); + +// WARNING: DO NOT DO THIS! +for (let i = 0; i < 30000; ++i) { + zlib.deflate(payload, (err, buffer) => {}); +} +``` + +```cjs const zlib = require('node:zlib'); const payload = Buffer.from('This is some data'); @@ -138,7 +234,47 @@ Using `zlib` encoding can be expensive, and the results ought to be cached. See [Memory usage tuning][] for more information on the speed/memory/compression tradeoffs involved in `zlib` usage. -```js +```mjs +// Client request example +import fs from 'node:fs'; +import zlib from 'node:zlib'; +import http from 'node:http'; +import process from 'node:process'; +import { pipeline } from 'node:stream'; + +const request = http.get({ host: 'example.com', + path: '/', + port: 80, + headers: { 'Accept-Encoding': 'br,gzip,deflate' } }); +request.on('response', (response) => { + const output = fs.createWriteStream('example.com_index.html'); + + const onError = (err) => { + if (err) { + console.error('An error occurred:', err); + process.exitCode = 1; + } + }; + + switch (response.headers['content-encoding']) { + case 'br': + pipeline(response, zlib.createBrotliDecompress(), output, onError); + break; + // Or, just use zlib.createUnzip() to handle both of the following cases: + case 'gzip': + pipeline(response, zlib.createGunzip(), output, onError); + break; + case 'deflate': + pipeline(response, zlib.createInflate(), output, onError); + break; + default: + pipeline(response, output, onError); + break; + } +}); +``` + +```cjs // Client request example const zlib = require('node:zlib'); const http = require('node:http'); @@ -177,7 +313,52 @@ request.on('response', (response) => { }); ``` -```js +```mjs +// server example +// Running a gzip operation on every request is quite expensive. +// It would be much more efficient to cache the compressed buffer. +import zlib from 'node:zlib'; +import http from 'node:http'; +import fs from 'node:fs'; +import { pipeline } from 'node:stream'; + +http.createServer((request, response) => { + const raw = fs.createReadStream('index.html'); + // Store both a compressed and an uncompressed version of the resource. + response.setHeader('Vary', 'Accept-Encoding'); + const acceptEncoding = request.headers['accept-encoding'] || ''; + + const onError = (err) => { + if (err) { + // If an error occurs, there's not much we can do because + // the server has already sent the 200 response code and + // some amount of data has already been sent to the client. + // The best we can do is terminate the response immediately + // and log the error. + response.end(); + console.error('An error occurred:', err); + } + }; + + // Note: This is not a conformant accept-encoding parser. + // See https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.3 + if (/\bdeflate\b/.test(acceptEncoding)) { + response.writeHead(200, { 'Content-Encoding': 'deflate' }); + pipeline(raw, zlib.createDeflate(), response, onError); + } else if (/\bgzip\b/.test(acceptEncoding)) { + response.writeHead(200, { 'Content-Encoding': 'gzip' }); + pipeline(raw, zlib.createGzip(), response, onError); + } else if (/\bbr\b/.test(acceptEncoding)) { + response.writeHead(200, { 'Content-Encoding': 'br' }); + pipeline(raw, zlib.createBrotliCompress(), response, onError); + } else { + response.writeHead(200, {}); + pipeline(raw, response, onError); + } +}).listen(1337); +``` + +```cjs // server example // Running a gzip operation on every request is quite expensive. // It would be much more efficient to cache the compressed buffer. @@ -190,10 +371,7 @@ http.createServer((request, response) => { const raw = fs.createReadStream('index.html'); // Store both a compressed and an uncompressed version of the resource. response.setHeader('Vary', 'Accept-Encoding'); - let acceptEncoding = request.headers['accept-encoding']; - if (!acceptEncoding) { - acceptEncoding = ''; - } + const acceptEncoding = request.headers['accept-encoding'] || ''; const onError = (err) => { if (err) { @@ -318,7 +496,43 @@ quality, but can be useful when data needs to be available as soon as possible. In the following example, `flush()` is used to write a compressed partial HTTP response to the client: -```js +```mjs +import zlib from 'node:zlib'; +import http from 'node:http'; +import { pipeline } from 'node:stream'; + +http.createServer((request, response) => { + // For the sake of simplicity, the Accept-Encoding checks are omitted. + response.writeHead(200, { 'content-encoding': 'gzip' }); + const output = zlib.createGzip(); + let i; + + pipeline(output, response, (err) => { + if (err) { + // If an error occurs, there's not much we can do because + // the server has already sent the 200 response code and + // some amount of data has already been sent to the client. + // The best we can do is terminate the response immediately + // and log the error. + clearInterval(i); + response.end(); + console.error('An error occurred:', err); + } + }); + + i = setInterval(() => { + output.write(`The current time is ${Date()}\n`, () => { + // The data has been passed to zlib, but the compression algorithm may + // have decided to buffer the data for more efficient compression. + // Calling .flush() will make the data available as soon as the client + // is ready to receive it. + output.flush(); + }); + }, 1000); +}).listen(1337); +``` + +```cjs const zlib = require('node:zlib'); const http = require('node:http'); const { pipeline } = require('node:stream'); diff --git a/doc/api_assets/hljs.css b/doc/api_assets/hljs.css index 86bd405709276c..91cece59e7cced 100644 --- a/doc/api_assets/hljs.css +++ b/doc/api_assets/hljs.css @@ -49,7 +49,7 @@ color: #808080; } .hljs-attr { - color: #f00; + color: #d00; } .hljs-symbol, .hljs-bullet, diff --git a/doc/changelogs/CHANGELOG_V22.md b/doc/changelogs/CHANGELOG_V22.md index d077dca501b914..187069e9749918 100644 --- a/doc/changelogs/CHANGELOG_V22.md +++ b/doc/changelogs/CHANGELOG_V22.md @@ -9,6 +9,7 @@ +22.13.0
    22.12.0
    22.11.0
    @@ -54,6 +55,249 @@ * [io.js](CHANGELOG_IOJS.md) * [Archive](CHANGELOG_ARCHIVE.md) + + +## 2025-01-07, Version 22.13.0 'Jod' (LTS), @ruyadorno + +### Notable Changes + +#### Graduate WebCryptoAPI Ed25519 and X25519 algorithms as stable + +Following the merge of Curve25519 into the [Web Cryptography API Editor's Draft](https://w3c.github.io/webcrypto/) the `Ed25519` and `X25519` algorithm identifiers are now stable and will no longer emit an ExperimentalWarning upon use. + +Contributed by (Filip Skokan) [#56142](https://github.com/nodejs/node/pull/56142) + +#### Other Notable Changes + +* \[[`a35ce92dc6`](https://github.com/nodejs/node/commit/a35ce92dc6)] - **(SEMVER-MINOR)** **assert**: add partialDeepStrictEqual (Giovanni Bucci) [#54630](https://github.com/nodejs/node/pull/54630) +* \[[`91b6cbc541`](https://github.com/nodejs/node/commit/91b6cbc541)] - **(SEMVER-MINOR)** **cli**: implement --trace-env and --trace-env-\[js|native]-stack (Joyee Cheung) [#55604](https://github.com/nodejs/node/pull/55604) +* \[[`8b83253335`](https://github.com/nodejs/node/commit/8b83253335)] - **(SEMVER-MINOR)** **dgram**: support blocklist in udp (theanarkh) [#56087](https://github.com/nodejs/node/pull/56087) +* \[[`4a8f042dfc`](https://github.com/nodejs/node/commit/4a8f042dfc)] - **doc**: stabilize util.styleText (Rafael Gonzaga) [#56265](https://github.com/nodejs/node/pull/56265) +* \[[`016d609147`](https://github.com/nodejs/node/commit/016d609147)] - **doc**: move typescript support to active development (Marco Ippolito) [#55536](https://github.com/nodejs/node/pull/55536) +* \[[`e8f5a5d59c`](https://github.com/nodejs/node/commit/e8f5a5d59c)] - **doc**: add LJHarb to collaborators (Jordan Harband) [#56132](https://github.com/nodejs/node/pull/56132) +* \[[`ba63702dc3`](https://github.com/nodejs/node/commit/ba63702dc3)] - **(SEMVER-MINOR)** **doc**: add report version and history section (Chengzhong Wu) [#56130](https://github.com/nodejs/node/pull/56130) +* \[[`803cc3b0d2`](https://github.com/nodejs/node/commit/803cc3b0d2)] - **(SEMVER-MINOR)** **doc**: sort --report-exclude alphabetically (Rafael Gonzaga) [#55697](https://github.com/nodejs/node/pull/55697) +* \[[`695f582f3c`](https://github.com/nodejs/node/commit/695f582f3c)] - **(SEMVER-MINOR)** **doc,lib,src,test**: unflag sqlite module (Colin Ihrig) [#55788](https://github.com/nodejs/node/pull/55788) +* \[[`c02c17ed35`](https://github.com/nodejs/node/commit/c02c17ed35)] - **(SEMVER-MINOR)** **module**: only emit require(esm) warning under --trace-require-module (Joyee Cheung) [#56194](https://github.com/nodejs/node/pull/56194) +* \[[`7ed5aad275`](https://github.com/nodejs/node/commit/7ed5aad275)] - **(SEMVER-MINOR)** **module**: add module.stripTypeScriptTypes (Marco Ippolito) [#55282](https://github.com/nodejs/node/pull/55282) +* \[[`4f85047aae`](https://github.com/nodejs/node/commit/4f85047aae)] - **(SEMVER-MINOR)** **module**: simplify `findPackageJSON` implementation (Antoine du Hamel) [#55543](https://github.com/nodejs/node/pull/55543) +* \[[`807f34f310`](https://github.com/nodejs/node/commit/807f34f310)] - **(SEMVER-MINOR)** **module**: add `findPackageJSON` util (Jacob Smith) [#55412](https://github.com/nodejs/node/pull/55412) +* \[[`26e76129f2`](https://github.com/nodejs/node/commit/26e76129f2)] - **(SEMVER-MINOR)** **net**: support blocklist in net.connect (theanarkh) [#56075](https://github.com/nodejs/node/pull/56075) +* \[[`a0a5fe8865`](https://github.com/nodejs/node/commit/a0a5fe8865)] - **(SEMVER-MINOR)** **net**: add SocketAddress.parse (James M Snell) [#56076](https://github.com/nodejs/node/pull/56076) +* \[[`92a6c56ea1`](https://github.com/nodejs/node/commit/92a6c56ea1)] - **(SEMVER-MINOR)** **net**: add net.BlockList.isBlockList(value) (James M Snell) [#56078](https://github.com/nodejs/node/pull/56078) +* \[[`5f80f993a4`](https://github.com/nodejs/node/commit/5f80f993a4)] - **(SEMVER-MINOR)** **net**: support blocklist for net.Server (theanarkh) [#56079](https://github.com/nodejs/node/pull/56079) +* \[[`ca417430a1`](https://github.com/nodejs/node/commit/ca417430a1)] - **(SEMVER-MINOR)** **process**: deprecate `features.{ipv6,uv}` and `features.tls_*` (René) [#55545](https://github.com/nodejs/node/pull/55545) +* \[[`4aa768a441`](https://github.com/nodejs/node/commit/4aa768a441)] - **(SEMVER-MINOR)** **report**: fix typos in report keys and bump the version (Yuan-Ming Hsu) [#56068](https://github.com/nodejs/node/pull/56068) +* \[[`b7ce626e9e`](https://github.com/nodejs/node/commit/b7ce626e9e)] - **(SEMVER-MINOR)** **sqlite**: aggregate constants in a single property (Edigleysson Silva (Edy)) [#56213](https://github.com/nodejs/node/pull/56213) +* \[[`30628eb92b`](https://github.com/nodejs/node/commit/30628eb92b)] - **(SEMVER-MINOR)** **sqlite**: add `StatementSync.prototype.iterate` method (tpoisseau) [#54213](https://github.com/nodejs/node/pull/54213) +* \[[`0aee9af183`](https://github.com/nodejs/node/commit/0aee9af183)] - **(SEMVER-MINOR)** **src**: add cli option to preserve env vars on dr (Rafael Gonzaga) [#55697](https://github.com/nodejs/node/pull/55697) +* \[[`c0906aeddc`](https://github.com/nodejs/node/commit/c0906aeddc)] - **(SEMVER-MINOR)** **src,lib**: stabilize permission model (Rafael Gonzaga) [#56201](https://github.com/nodejs/node/pull/56201) +* \[[`5d92a5f156`](https://github.com/nodejs/node/commit/5d92a5f156)] - **(SEMVER-MINOR)** **util**: add sourcemap support to getCallSites (Marco Ippolito) [#55589](https://github.com/nodejs/node/pull/55589) + +### Commits + +* \[[`088d985e3d`](https://github.com/nodejs/node/commit/088d985e3d)] - **assert**: make Maps be partially compared in partialDeepStrictEqual (Giovanni Bucci) [#56195](https://github.com/nodejs/node/pull/56195) +* \[[`e7b9e194c9`](https://github.com/nodejs/node/commit/e7b9e194c9)] - **assert**: make partialDeepStrictEqual work with ArrayBuffers (Giovanni Bucci) [#56098](https://github.com/nodejs/node/pull/56098) +* \[[`2117a70c8f`](https://github.com/nodejs/node/commit/2117a70c8f)] - **assert**: optimize partial comparison of two `Set`s (Antoine du Hamel) [#55970](https://github.com/nodejs/node/pull/55970) +* \[[`a35ce92dc6`](https://github.com/nodejs/node/commit/a35ce92dc6)] - **(SEMVER-MINOR)** **assert**: add partialDeepStrictEqual (Giovanni Bucci) [#54630](https://github.com/nodejs/node/pull/54630) +* \[[`f5c2f679ef`](https://github.com/nodejs/node/commit/f5c2f679ef)] - **buffer**: document concat zero-fill (Duncan) [#55562](https://github.com/nodejs/node/pull/55562) +* \[[`fe468a6719`](https://github.com/nodejs/node/commit/fe468a6719)] - **build**: set DESTCPU correctly for 'make binary' on loongarch64 (吴小白) [#56271](https://github.com/nodejs/node/pull/56271) +* \[[`7621805090`](https://github.com/nodejs/node/commit/7621805090)] - **build**: fix missing fp16 dependency in d8 builds (Joyee Cheung) [#56266](https://github.com/nodejs/node/pull/56266) +* \[[`db9a3b2753`](https://github.com/nodejs/node/commit/db9a3b2753)] - **build**: add major release action (Rafael Gonzaga) [#56199](https://github.com/nodejs/node/pull/56199) +* \[[`626959a949`](https://github.com/nodejs/node/commit/626959a949)] - **build**: fix C string encoding for `PRODUCT_DIR_ABS` (Anna Henningsen) [#56111](https://github.com/nodejs/node/pull/56111) +* \[[`e470079ab5`](https://github.com/nodejs/node/commit/e470079ab5)] - **build**: use variable for simdutf path (Shelley Vohr) [#56196](https://github.com/nodejs/node/pull/56196) +* \[[`cf8a2d9158`](https://github.com/nodejs/node/commit/cf8a2d9158)] - **build**: allow overriding clang usage (Shelley Vohr) [#56016](https://github.com/nodejs/node/pull/56016) +* \[[`6ff40436b2`](https://github.com/nodejs/node/commit/6ff40436b2)] - **build**: remove defaults for create-release-proposal (Rafael Gonzaga) [#56042](https://github.com/nodejs/node/pull/56042) +* \[[`216a785a20`](https://github.com/nodejs/node/commit/216a785a20)] - **build**: set node\_arch to target\_cpu in GN (Shelley Vohr) [#55967](https://github.com/nodejs/node/pull/55967) +* \[[`0a819a68d2`](https://github.com/nodejs/node/commit/0a819a68d2)] - **build**: use variable for crypto dep path (Shelley Vohr) [#55928](https://github.com/nodejs/node/pull/55928) +* \[[`d3b6510d05`](https://github.com/nodejs/node/commit/d3b6510d05)] - **build**: fix GN build for sqlite (Cheng) [#55912](https://github.com/nodejs/node/pull/55912) +* \[[`f557c0e258`](https://github.com/nodejs/node/commit/f557c0e258)] - **build**: compile bundled simdutf conditionally (Jakub Jirutka) [#55886](https://github.com/nodejs/node/pull/55886) +* \[[`e309310b17`](https://github.com/nodejs/node/commit/e309310b17)] - **build**: compile bundled simdjson conditionally (Jakub Jirutka) [#55886](https://github.com/nodejs/node/pull/55886) +* \[[`50faa641c6`](https://github.com/nodejs/node/commit/50faa641c6)] - **build**: compile bundled ada conditionally (Jakub Jirutka) [#55886](https://github.com/nodejs/node/pull/55886) +* \[[`5e4cdd1ffe`](https://github.com/nodejs/node/commit/5e4cdd1ffe)] - **build**: use glob for dependencies of out/Makefile (Richard Lau) [#55789](https://github.com/nodejs/node/pull/55789) +* \[[`91b6cbc541`](https://github.com/nodejs/node/commit/91b6cbc541)] - **(SEMVER-MINOR)** **cli**: implement --trace-env and --trace-env-\[js|native]-stack (Joyee Cheung) [#55604](https://github.com/nodejs/node/pull/55604) +* \[[`6630cccc2f`](https://github.com/nodejs/node/commit/6630cccc2f)] - **crypto**: graduate WebCryptoAPI Ed25519 and X25519 algorithms as stable (Filip Skokan) [#56142](https://github.com/nodejs/node/pull/56142) +* \[[`4f0b4565c4`](https://github.com/nodejs/node/commit/4f0b4565c4)] - **crypto**: ensure CryptoKey usages and algorithm are cached objects (Filip Skokan) [#56108](https://github.com/nodejs/node/pull/56108) +* \[[`bee73f9e6a`](https://github.com/nodejs/node/commit/bee73f9e6a)] - **crypto**: allow non-multiple of 8 in SubtleCrypto.deriveBits (Filip Skokan) [#55296](https://github.com/nodejs/node/pull/55296) +* \[[`ff8852d93a`](https://github.com/nodejs/node/commit/ff8852d93a)] - **deps**: update nghttp3 to 1.6.0 (Node.js GitHub Bot) [#56258](https://github.com/nodejs/node/pull/56258) +* \[[`8fb515e5a6`](https://github.com/nodejs/node/commit/8fb515e5a6)] - **deps**: update simdutf to 5.6.4 (Node.js GitHub Bot) [#56255](https://github.com/nodejs/node/pull/56255) +* \[[`f327d47b35`](https://github.com/nodejs/node/commit/f327d47b35)] - **deps**: update libuv to 1.49.2 (Luigi Pinca) [#56224](https://github.com/nodejs/node/pull/56224) +* \[[`591bf961ca`](https://github.com/nodejs/node/commit/591bf961ca)] - **deps**: update c-ares to v1.34.4 (Node.js GitHub Bot) [#56256](https://github.com/nodejs/node/pull/56256) +* \[[`929df7de5e`](https://github.com/nodejs/node/commit/929df7de5e)] - **deps**: define V8\_PRESERVE\_MOST as no-op on Windows (Stefan Stojanovic) [#56238](https://github.com/nodejs/node/pull/56238) +* \[[`2f594ca6c5`](https://github.com/nodejs/node/commit/2f594ca6c5)] - **deps**: update sqlite to 3.47.2 (Node.js GitHub Bot) [#56178](https://github.com/nodejs/node/pull/56178) +* \[[`fc0e238e7c`](https://github.com/nodejs/node/commit/fc0e238e7c)] - **deps**: update ngtcp2 to 1.9.1 (Node.js GitHub Bot) [#56095](https://github.com/nodejs/node/pull/56095) +* \[[`7073e9772f`](https://github.com/nodejs/node/commit/7073e9772f)] - **deps**: upgrade npm to 10.9.2 (npm team) [#56135](https://github.com/nodejs/node/pull/56135) +* \[[`8f98c5f86c`](https://github.com/nodejs/node/commit/8f98c5f86c)] - **deps**: update sqlite to 3.47.1 (Node.js GitHub Bot) [#56094](https://github.com/nodejs/node/pull/56094) +* \[[`b6ca859e01`](https://github.com/nodejs/node/commit/b6ca859e01)] - **deps**: update zlib to 1.3.0.1-motley-82a5fec (Node.js GitHub Bot) [#55980](https://github.com/nodejs/node/pull/55980) +* \[[`ab693c3e07`](https://github.com/nodejs/node/commit/ab693c3e07)] - **deps**: update corepack to 0.30.0 (Node.js GitHub Bot) [#55977](https://github.com/nodejs/node/pull/55977) +* \[[`935b105953`](https://github.com/nodejs/node/commit/935b105953)] - **deps**: update ngtcp2 to 1.9.0 (Node.js GitHub Bot) [#55975](https://github.com/nodejs/node/pull/55975) +* \[[`7a6b6aba5b`](https://github.com/nodejs/node/commit/7a6b6aba5b)] - **deps**: update simdutf to 5.6.3 (Node.js GitHub Bot) [#55973](https://github.com/nodejs/node/pull/55973) +* \[[`bf78498863`](https://github.com/nodejs/node/commit/bf78498863)] - **deps**: upgrade npm to 10.9.1 (npm team) [#55951](https://github.com/nodejs/node/pull/55951) +* \[[`fac2b7ff54`](https://github.com/nodejs/node/commit/fac2b7ff54)] - **deps**: update zlib to 1.3.0.1-motley-7e2e4d7 (Node.js GitHub Bot) [#54432](https://github.com/nodejs/node/pull/54432) +* \[[`4b96875319`](https://github.com/nodejs/node/commit/4b96875319)] - **deps**: update simdjson to 3.10.1 (Node.js GitHub Bot) [#54678](https://github.com/nodejs/node/pull/54678) +* \[[`0bd1d3f3da`](https://github.com/nodejs/node/commit/0bd1d3f3da)] - **deps**: update simdutf to 5.6.2 (Node.js GitHub Bot) [#55889](https://github.com/nodejs/node/pull/55889) +* \[[`2c406a5b5b`](https://github.com/nodejs/node/commit/2c406a5b5b)] - **(SEMVER-MINOR)** **dgram**: support blocklist in udp (theanarkh) [#56087](https://github.com/nodejs/node/pull/56087) +* \[[`76ba37fe5e`](https://github.com/nodejs/node/commit/76ba37fe5e)] - **dgram**: check udp buffer size to avoid fd leak (theanarkh) [#56084](https://github.com/nodejs/node/pull/56084) +* \[[`c1b78da42c`](https://github.com/nodejs/node/commit/c1b78da42c)] - **doc**: fix color contrast issue in light mode (Rich Trott) [#56272](https://github.com/nodejs/node/pull/56272) +* \[[`753829f14a`](https://github.com/nodejs/node/commit/753829f14a)] - **doc**: stabilize util.styleText (Rafael Gonzaga) [#56265](https://github.com/nodejs/node/pull/56265) +* \[[`d48c57fd1b`](https://github.com/nodejs/node/commit/d48c57fd1b)] - **doc**: clarify util.aborted resource usage (Kunal Kumar) [#55780](https://github.com/nodejs/node/pull/55780) +* \[[`28d3ce386b`](https://github.com/nodejs/node/commit/28d3ce386b)] - **doc**: add esm examples to node:repl (Alfredo González) [#55432](https://github.com/nodejs/node/pull/55432) +* \[[`4676607a6d`](https://github.com/nodejs/node/commit/4676607a6d)] - **doc**: add esm examples to node:readline (Alfredo González) [#55335](https://github.com/nodejs/node/pull/55335) +* \[[`e910c39859`](https://github.com/nodejs/node/commit/e910c39859)] - **doc**: fix 'which' to 'that' and add commas (Selveter Senitro) [#56216](https://github.com/nodejs/node/pull/56216) +* \[[`94a7282655`](https://github.com/nodejs/node/commit/94a7282655)] - **doc**: fix winget config path (Alex Yang) [#56233](https://github.com/nodejs/node/pull/56233) +* \[[`eafbb9f4dc`](https://github.com/nodejs/node/commit/eafbb9f4dc)] - **doc**: add esm examples to node:tls (Alfredo González) [#56229](https://github.com/nodejs/node/pull/56229) +* \[[`dacf91f06d`](https://github.com/nodejs/node/commit/dacf91f06d)] - **doc**: add esm examples to node:perf\_hooks (Alfredo González) [#55257](https://github.com/nodejs/node/pull/55257) +* \[[`e90fbafa75`](https://github.com/nodejs/node/commit/e90fbafa75)] - **doc**: `sea.getRawAsset(key)` always returns an ArrayBuffer (沈鸿飞) [#56206](https://github.com/nodejs/node/pull/56206) +* \[[`632171ab8d`](https://github.com/nodejs/node/commit/632171ab8d)] - **doc**: update announce documentation for releases (Rafael Gonzaga) [#56200](https://github.com/nodejs/node/pull/56200) +* \[[`80fa854ad0`](https://github.com/nodejs/node/commit/80fa854ad0)] - **doc**: update blog link to /vulnerability (Rafael Gonzaga) [#56198](https://github.com/nodejs/node/pull/56198) +* \[[`16b1ad8334`](https://github.com/nodejs/node/commit/16b1ad8334)] - **doc**: call out import.meta is only supported in ES modules (Anton Kastritskii) [#56186](https://github.com/nodejs/node/pull/56186) +* \[[`aac85ebb21`](https://github.com/nodejs/node/commit/aac85ebb21)] - **doc**: add ambassador message - benefits of Node.js (Michael Dawson) [#56085](https://github.com/nodejs/node/pull/56085) +* \[[`ed29f0ae13`](https://github.com/nodejs/node/commit/ed29f0ae13)] - **doc**: fix incorrect link to style guide (Yuan-Ming Hsu) [#56181](https://github.com/nodejs/node/pull/56181) +* \[[`f699375ff9`](https://github.com/nodejs/node/commit/f699375ff9)] - **doc**: fix c++ addon hello world sample (Edigleysson Silva (Edy)) [#56172](https://github.com/nodejs/node/pull/56172) +* \[[`bda545df7e`](https://github.com/nodejs/node/commit/bda545df7e)] - **doc**: update blog release-post link (Ruy Adorno) [#56123](https://github.com/nodejs/node/pull/56123) +* \[[`6101383fc7`](https://github.com/nodejs/node/commit/6101383fc7)] - **doc**: fix module.md headings (Chengzhong Wu) [#56131](https://github.com/nodejs/node/pull/56131) +* \[[`016d609147`](https://github.com/nodejs/node/commit/016d609147)] - **doc**: move typescript support to active development (Marco Ippolito) [#55536](https://github.com/nodejs/node/pull/55536) +* \[[`624568ba76`](https://github.com/nodejs/node/commit/624568ba76)] - **doc**: mention `-a` flag for the release script (Ruy Adorno) [#56124](https://github.com/nodejs/node/pull/56124) +* \[[`e8f5a5d59c`](https://github.com/nodejs/node/commit/e8f5a5d59c)] - **doc**: add LJHarb to collaborators (Jordan Harband) [#56132](https://github.com/nodejs/node/pull/56132) +* \[[`5100350988`](https://github.com/nodejs/node/commit/5100350988)] - **doc**: add create-release-action to process (Rafael Gonzaga) [#55993](https://github.com/nodejs/node/pull/55993) +* \[[`b8ce636bc9`](https://github.com/nodejs/node/commit/b8ce636bc9)] - **doc**: rename file to advocacy-ambassador-program.md (Tobias Nießen) [#56046](https://github.com/nodejs/node/pull/56046) +* \[[`495a624314`](https://github.com/nodejs/node/commit/495a624314)] - **doc**: remove unused import from sample code (Blended Bram) [#55570](https://github.com/nodejs/node/pull/55570) +* \[[`2762c52237`](https://github.com/nodejs/node/commit/2762c52237)] - **doc**: add FAQ to releases section (Rafael Gonzaga) [#55992](https://github.com/nodejs/node/pull/55992) +* \[[`2e6306ab1a`](https://github.com/nodejs/node/commit/2e6306ab1a)] - **doc**: move history entry to class description (Luigi Pinca) [#55991](https://github.com/nodejs/node/pull/55991) +* \[[`3558a19b37`](https://github.com/nodejs/node/commit/3558a19b37)] - **doc**: add history entry for textEncoder.encodeInto() (Luigi Pinca) [#55990](https://github.com/nodejs/node/pull/55990) +* \[[`20e62e3aa3`](https://github.com/nodejs/node/commit/20e62e3aa3)] - **doc**: improve GN build documentation a bit (Shelley Vohr) [#55968](https://github.com/nodejs/node/pull/55968) +* \[[`109cb12c59`](https://github.com/nodejs/node/commit/109cb12c59)] - **doc**: fix deprecation codes (Filip Skokan) [#56018](https://github.com/nodejs/node/pull/56018) +* \[[`717531da77`](https://github.com/nodejs/node/commit/717531da77)] - **doc**: remove confusing and outdated sentence (Luigi Pinca) [#55988](https://github.com/nodejs/node/pull/55988) +* \[[`42ce29ceb9`](https://github.com/nodejs/node/commit/42ce29ceb9)] - **doc**: deprecate passing invalid types in `fs.existsSync` (Carlos Espa) [#55892](https://github.com/nodejs/node/pull/55892) +* \[[`b5181ed094`](https://github.com/nodejs/node/commit/b5181ed094)] - **doc**: add doc for PerformanceObserver.takeRecords() (skyclouds2001) [#55786](https://github.com/nodejs/node/pull/55786) +* \[[`5808a0773f`](https://github.com/nodejs/node/commit/5808a0773f)] - **doc**: add vetted courses to the ambassador benefits (Matteo Collina) [#55934](https://github.com/nodejs/node/pull/55934) +* \[[`15b0ffbac3`](https://github.com/nodejs/node/commit/15b0ffbac3)] - **doc**: order `node:crypto` APIs alphabetically (Julian Gassner) [#55831](https://github.com/nodejs/node/pull/55831) +* \[[`7859172ff6`](https://github.com/nodejs/node/commit/7859172ff6)] - **doc**: doc how to add message for promotion (Michael Dawson) [#55843](https://github.com/nodejs/node/pull/55843) +* \[[`a83378ba3d`](https://github.com/nodejs/node/commit/a83378ba3d)] - **doc**: add esm example for zlib (Leonardo Peixoto) [#55946](https://github.com/nodejs/node/pull/55946) +* \[[`64fdfa953d`](https://github.com/nodejs/node/commit/64fdfa953d)] - **doc**: fix typo (Alex Yang) [#56125](https://github.com/nodejs/node/pull/56125) +* \[[`123aae037c`](https://github.com/nodejs/node/commit/123aae037c)] - **doc**: document approach for building wasm in deps (Michael Dawson) [#55940](https://github.com/nodejs/node/pull/55940) +* \[[`6db9d0d96b`](https://github.com/nodejs/node/commit/6db9d0d96b)] - **doc**: remove RedYetiDev from triagers team (Aviv Keller) [#55947](https://github.com/nodejs/node/pull/55947) +* \[[`41e5cabc99`](https://github.com/nodejs/node/commit/41e5cabc99)] - **doc**: add esm examples to node:timers (Alfredo González) [#55857](https://github.com/nodejs/node/pull/55857) +* \[[`02d4504aa4`](https://github.com/nodejs/node/commit/02d4504aa4)] - **doc**: fix relative path mention in --allow-fs (Rafael Gonzaga) [#55791](https://github.com/nodejs/node/pull/55791) +* \[[`478fe360cf`](https://github.com/nodejs/node/commit/478fe360cf)] - **doc**: include git node release --promote to steps (Rafael Gonzaga) [#55835](https://github.com/nodejs/node/pull/55835) +* \[[`8f91622e64`](https://github.com/nodejs/node/commit/8f91622e64)] - **doc**: add history entry for import assertion removal (Antoine du Hamel) [#55883](https://github.com/nodejs/node/pull/55883) +* \[[`b6bef392d3`](https://github.com/nodejs/node/commit/b6bef392d3)] - **doc**: add a note on console stream behavior (Gireesh Punathil) [#55616](https://github.com/nodejs/node/pull/55616) +* \[[`ba63702dc3`](https://github.com/nodejs/node/commit/ba63702dc3)] - **(SEMVER-MINOR)** **doc**: add report version and history section (Chengzhong Wu) [#55697](https://github.com/nodejs/node/pull/55697) +* \[[`803cc3b0d2`](https://github.com/nodejs/node/commit/803cc3b0d2)] - **(SEMVER-MINOR)** **doc**: sort --report-exclude alphabetically (Rafael Gonzaga) [#55697](https://github.com/nodejs/node/pull/55697) +* \[[`695f582f3c`](https://github.com/nodejs/node/commit/695f582f3c)] - **(SEMVER-MINOR)** **doc,lib,src,test**: unflag sqlite module (Colin Ihrig) [#55890](https://github.com/nodejs/node/pull/55890) +* \[[`9364d7bcbc`](https://github.com/nodejs/node/commit/9364d7bcbc)] - **fs**: make mutating `options` in Callback `readdir()` not affect results (LiviaMedeiros) [#56057](https://github.com/nodejs/node/pull/56057) +* \[[`645056033c`](https://github.com/nodejs/node/commit/645056033c)] - **fs**: make mutating `options` in Promises `readdir()` not affect results (LiviaMedeiros) [#56057](https://github.com/nodejs/node/pull/56057) +* \[[`900edd8e01`](https://github.com/nodejs/node/commit/900edd8e01)] - **fs**: lazily load ReadFileContext (Gürgün Dayıoğlu) [#55998](https://github.com/nodejs/node/pull/55998) +* \[[`437021d680`](https://github.com/nodejs/node/commit/437021d680)] - **fs,win**: fix readdir for named pipe (Hüseyin Açacak) [#56110](https://github.com/nodejs/node/pull/56110) +* \[[`53e0c59cc5`](https://github.com/nodejs/node/commit/53e0c59cc5)] - **http**: add setDefaultHeaders option to http.request (Tim Perry) [#56112](https://github.com/nodejs/node/pull/56112) +* \[[`025f057798`](https://github.com/nodejs/node/commit/025f057798)] - **http**: don't emit error after destroy (Robert Nagy) [#55457](https://github.com/nodejs/node/pull/55457) +* \[[`1b35f72915`](https://github.com/nodejs/node/commit/1b35f72915)] - **http2**: remove duplicate codeblock (Vitaly Aminev) [#55915](https://github.com/nodejs/node/pull/55915) +* \[[`18fb548ad4`](https://github.com/nodejs/node/commit/18fb548ad4)] - **http2**: support ALPNCallback option (ZYSzys) [#56187](https://github.com/nodejs/node/pull/56187) +* \[[`930e26b52f`](https://github.com/nodejs/node/commit/930e26b52f)] - **http2**: fix memory leak caused by premature listener removing (ywave620) [#55966](https://github.com/nodejs/node/pull/55966) +* \[[`72db1842e7`](https://github.com/nodejs/node/commit/72db1842e7)] - **lib**: remove redundant global regexps (Gürgün Dayıoğlu) [#56182](https://github.com/nodejs/node/pull/56182) +* \[[`9d515f306c`](https://github.com/nodejs/node/commit/9d515f306c)] - **lib**: clean up persisted signals when they are settled (Edigleysson Silva (Edy)) [#56001](https://github.com/nodejs/node/pull/56001) +* \[[`9ef8d38bc2`](https://github.com/nodejs/node/commit/9ef8d38bc2)] - **lib**: handle Float16Array in node:v8 serdes (Bartek Iwańczuk) [#55996](https://github.com/nodejs/node/pull/55996) +* \[[`2146841f65`](https://github.com/nodejs/node/commit/2146841f65)] - **lib**: disable default memory leak warning for AbortSignal (Lenz Weber-Tronic) [#55816](https://github.com/nodejs/node/pull/55816) +* \[[`31e5037f6f`](https://github.com/nodejs/node/commit/31e5037f6f)] - **lib**: add validation for options in compileFunction (Taejin Kim) [#56023](https://github.com/nodejs/node/pull/56023) +* \[[`1d6cbc1ba5`](https://github.com/nodejs/node/commit/1d6cbc1ba5)] - **lib**: fix `fs.readdir` recursive async (Rafael Gonzaga) [#56041](https://github.com/nodejs/node/pull/56041) +* \[[`0e36fd9fb5`](https://github.com/nodejs/node/commit/0e36fd9fb5)] - **lib**: avoid excluding symlinks in recursive fs.readdir with filetypes (Juan José) [#55714](https://github.com/nodejs/node/pull/55714) +* \[[`b99b779dec`](https://github.com/nodejs/node/commit/b99b779dec)] - **meta**: bump github/codeql-action from 3.27.0 to 3.27.5 (dependabot\[bot]) [#56103](https://github.com/nodejs/node/pull/56103) +* \[[`809d1a1863`](https://github.com/nodejs/node/commit/809d1a1863)] - **meta**: bump actions/checkout from 4.1.7 to 4.2.2 (dependabot\[bot]) [#56102](https://github.com/nodejs/node/pull/56102) +* \[[`d411a1e029`](https://github.com/nodejs/node/commit/d411a1e029)] - **meta**: bump step-security/harden-runner from 2.10.1 to 2.10.2 (dependabot\[bot]) [#56101](https://github.com/nodejs/node/pull/56101) +* \[[`f38f18d48f`](https://github.com/nodejs/node/commit/f38f18d48f)] - **meta**: bump actions/setup-node from 4.0.3 to 4.1.0 (dependabot\[bot]) [#56100](https://github.com/nodejs/node/pull/56100) +* \[[`ca41980be8`](https://github.com/nodejs/node/commit/ca41980be8)] - **meta**: add releasers as CODEOWNERS to proposal action (Rafael Gonzaga) [#56043](https://github.com/nodejs/node/pull/56043) +* \[[`0a21f300e8`](https://github.com/nodejs/node/commit/0a21f300e8)] - **(SEMVER-MINOR)** **module**: only emit require(esm) warning under --trace-require-module (Joyee Cheung) [#56194](https://github.com/nodejs/node/pull/56194) +* \[[`f99075ad87`](https://github.com/nodejs/node/commit/f99075ad87)] - **module**: prevent main thread exiting before esm worker ends (Shima Ryuhei) [#56183](https://github.com/nodejs/node/pull/56183) +* \[[`7ed5aad275`](https://github.com/nodejs/node/commit/7ed5aad275)] - **(SEMVER-MINOR)** **module**: add module.stripTypeScriptTypes (Marco Ippolito) [#55282](https://github.com/nodejs/node/pull/55282) +* \[[`420d599bf0`](https://github.com/nodejs/node/commit/420d599bf0)] - **module**: simplify ts under node\_modules check (Marco Ippolito) [#55440](https://github.com/nodejs/node/pull/55440) +* \[[`e8ca1717b4`](https://github.com/nodejs/node/commit/e8ca1717b4)] - **module**: mark evaluation rejection in require(esm) as handled (Joyee Cheung) [#56122](https://github.com/nodejs/node/pull/56122) +* \[[`4f85047aae`](https://github.com/nodejs/node/commit/4f85047aae)] - **(SEMVER-MINOR)** **module**: simplify `findPackageJSON` implementation (Antoine du Hamel) [#55412](https://github.com/nodejs/node/pull/55412) +* \[[`807f34f310`](https://github.com/nodejs/node/commit/807f34f310)] - **(SEMVER-MINOR)** **module**: add `findPackageJSON` util (Jacob Smith) [#55412](https://github.com/nodejs/node/pull/55412) +* \[[`26e76129f2`](https://github.com/nodejs/node/commit/26e76129f2)] - **(SEMVER-MINOR)** **net**: support blocklist in net.connect (theanarkh) [#56075](https://github.com/nodejs/node/pull/56075) +* \[[`a0a5fe8865`](https://github.com/nodejs/node/commit/a0a5fe8865)] - **(SEMVER-MINOR)** **net**: add SocketAddress.parse (James M Snell) [#56076](https://github.com/nodejs/node/pull/56076) +* \[[`92a6c56ea1`](https://github.com/nodejs/node/commit/92a6c56ea1)] - **(SEMVER-MINOR)** **net**: add net.BlockList.isBlockList(value) (James M Snell) [#56078](https://github.com/nodejs/node/pull/56078) +* \[[`5f80f993a4`](https://github.com/nodejs/node/commit/5f80f993a4)] - **(SEMVER-MINOR)** **net**: support blocklist for net.Server (theanarkh) [#56079](https://github.com/nodejs/node/pull/56079) +* \[[`3742b75675`](https://github.com/nodejs/node/commit/3742b75675)] - **node-api**: allow napi\_delete\_reference in finalizers (Chengzhong Wu) [#55620](https://github.com/nodejs/node/pull/55620) +* \[[`aa6b0e8bf4`](https://github.com/nodejs/node/commit/aa6b0e8bf4)] - **permission**: ignore internalModuleStat on module loading (Rafael Gonzaga) [#55797](https://github.com/nodejs/node/pull/55797) +* \[[`ca417430a1`](https://github.com/nodejs/node/commit/ca417430a1)] - **(SEMVER-MINOR)** **process**: deprecate `features.{ipv6,uv}` and `features.tls_*` (René) [#55545](https://github.com/nodejs/node/pull/55545) +* \[[`cfcce15573`](https://github.com/nodejs/node/commit/cfcce15573)] - **quic**: update more QUIC implementation (James M Snell) [#55986](https://github.com/nodejs/node/pull/55986) +* \[[`9a6a51c039`](https://github.com/nodejs/node/commit/9a6a51c039)] - **quic**: multiple updates to quic impl (James M Snell) [#55971](https://github.com/nodejs/node/pull/55971) +* \[[`2d06c8b660`](https://github.com/nodejs/node/commit/2d06c8b660)] - **(SEMVER-MINOR)** **report**: fix typos in report keys and bump the version (Yuan-Ming Hsu) [#56068](https://github.com/nodejs/node/pull/56068) +* \[[`d1809c054f`](https://github.com/nodejs/node/commit/d1809c054f)] - **sea**: only assert snapshot main function for main threads (Joyee Cheung) [#56120](https://github.com/nodejs/node/pull/56120) +* \[[`d4aabcf7c8`](https://github.com/nodejs/node/commit/d4aabcf7c8)] - **(SEMVER-MINOR)** **sqlite**: aggregate constants in a single property (Edigleysson Silva (Edy)) [#56213](https://github.com/nodejs/node/pull/56213) +* \[[`86e4b1ce97`](https://github.com/nodejs/node/commit/86e4b1ce97)] - **sqlite**: add support for custom functions (Colin Ihrig) [#55985](https://github.com/nodejs/node/pull/55985) +* \[[`2f2316a55d`](https://github.com/nodejs/node/commit/2f2316a55d)] - **sqlite**: support `db.loadExtension` (Alex Yang) [#53900](https://github.com/nodejs/node/pull/53900) +* \[[`535317e27d`](https://github.com/nodejs/node/commit/535317e27d)] - **sqlite**: deps include `sqlite3ext.h` (Alex Yang) [#56010](https://github.com/nodejs/node/pull/56010) +* \[[`30628eb92b`](https://github.com/nodejs/node/commit/30628eb92b)] - **(SEMVER-MINOR)** **sqlite**: add `StatementSync.prototype.iterate` method (tpoisseau) [#54213](https://github.com/nodejs/node/pull/54213) +* \[[`06ad4ac008`](https://github.com/nodejs/node/commit/06ad4ac008)] - **src**: fix outdated js2c.cc references (Chengzhong Wu) [#56133](https://github.com/nodejs/node/pull/56133) +* \[[`a37d75901f`](https://github.com/nodejs/node/commit/a37d75901f)] - **src**: use spaceship operator in SocketAddress (James M Snell) [#56059](https://github.com/nodejs/node/pull/56059) +* \[[`21704d0d5c`](https://github.com/nodejs/node/commit/21704d0d5c)] - **src**: add missing qualifiers to env.cc (Yagiz Nizipli) [#56062](https://github.com/nodejs/node/pull/56062) +* \[[`ab350595ea`](https://github.com/nodejs/node/commit/ab350595ea)] - **src**: use std::string\_view for process emit fns (Yagiz Nizipli) [#56086](https://github.com/nodejs/node/pull/56086) +* \[[`4f4756e7b4`](https://github.com/nodejs/node/commit/4f4756e7b4)] - **src**: remove dead code in async\_wrap (Gerhard Stöbich) [#56065](https://github.com/nodejs/node/pull/56065) +* \[[`f759aa47cf`](https://github.com/nodejs/node/commit/f759aa47cf)] - **src**: avoid copy on getV8FastApiCallCount (Yagiz Nizipli) [#56081](https://github.com/nodejs/node/pull/56081) +* \[[`b5ddedbcad`](https://github.com/nodejs/node/commit/b5ddedbcad)] - **src**: fix check fd (theanarkh) [#56000](https://github.com/nodejs/node/pull/56000) +* \[[`009bf6f5f1`](https://github.com/nodejs/node/commit/009bf6f5f1)] - **src**: safely remove the last line from dotenv (Shima Ryuhei) [#55982](https://github.com/nodejs/node/pull/55982) +* \[[`bdae1b258a`](https://github.com/nodejs/node/commit/bdae1b258a)] - **src**: fix kill signal on Windows (Hüseyin Açacak) [#55514](https://github.com/nodejs/node/pull/55514) +* \[[`0aee9af183`](https://github.com/nodejs/node/commit/0aee9af183)] - **(SEMVER-MINOR)** **src**: add cli option to preserve env vars on dr (Rafael Gonzaga) [#55697](https://github.com/nodejs/node/pull/55697) +* \[[`0b1b6fdd68`](https://github.com/nodejs/node/commit/0b1b6fdd68)] - **src,build**: add no user defined deduction guides of CTAD check (Chengzhong Wu) [#56071](https://github.com/nodejs/node/pull/56071) +* \[[`428a9d8576`](https://github.com/nodejs/node/commit/428a9d8576)] - **(SEMVER-MINOR)** **src,lib**: stabilize permission model (Rafael Gonzaga) [#56201](https://github.com/nodejs/node/pull/56201) +* \[[`1691e62301`](https://github.com/nodejs/node/commit/1691e62301)] - **stream**: commit pull-into descriptors after filling from queue (Mattias Buelens) [#56072](https://github.com/nodejs/node/pull/56072) +* \[[`3fec5e31e8`](https://github.com/nodejs/node/commit/3fec5e31e8)] - **test**: remove test-sqlite-statement-sync flaky designation (Luigi Pinca) [#56051](https://github.com/nodejs/node/pull/56051) +* \[[`6a12323659`](https://github.com/nodejs/node/commit/6a12323659)] - **test**: use --permission over --experimental-permission (Rafael Gonzaga) [#56239](https://github.com/nodejs/node/pull/56239) +* \[[`43fa7eff99`](https://github.com/nodejs/node/commit/43fa7eff99)] - **test**: remove exludes for sea tests on PPC (Michael Dawson) [#56217](https://github.com/nodejs/node/pull/56217) +* \[[`3fc6e2eeb0`](https://github.com/nodejs/node/commit/3fc6e2eeb0)] - **test**: fix test-abortsignal-drop-settled-signals flakiness (Edigleysson Silva (Edy)) [#56197](https://github.com/nodejs/node/pull/56197) +* \[[`ad4275b69a`](https://github.com/nodejs/node/commit/ad4275b69a)] - **test**: move localizationd data from `test-icu-env` to external file (Livia Medeiros) [#55618](https://github.com/nodejs/node/pull/55618) +* \[[`668f6a901b`](https://github.com/nodejs/node/commit/668f6a901b)] - **test**: update WPT for url to 6fa3fe8a92 (Node.js GitHub Bot) [#56136](https://github.com/nodejs/node/pull/56136) +* \[[`e241de375e`](https://github.com/nodejs/node/commit/e241de375e)] - **test**: remove `hasOpenSSL3x` utils (Antoine du Hamel) [#56164](https://github.com/nodejs/node/pull/56164) +* \[[`b6d8925096`](https://github.com/nodejs/node/commit/b6d8925096)] - **test**: update streams wpt (Mattias Buelens) [#56072](https://github.com/nodejs/node/pull/56072) +* \[[`985248047e`](https://github.com/nodejs/node/commit/985248047e)] - **test**: remove test-fs-utimes flaky designation (Luigi Pinca) [#56052](https://github.com/nodejs/node/pull/56052) +* \[[`c794234bcf`](https://github.com/nodejs/node/commit/c794234bcf)] - **test**: ensure `cli.md` is in alphabetical order (Antoine du Hamel) [#56025](https://github.com/nodejs/node/pull/56025) +* \[[`f97ecefc0d`](https://github.com/nodejs/node/commit/f97ecefc0d)] - **test**: update WPT for WebCryptoAPI to 3e3374efde (Node.js GitHub Bot) [#56093](https://github.com/nodejs/node/pull/56093) +* \[[`6effe48ef0`](https://github.com/nodejs/node/commit/6effe48ef0)] - **test**: update WPT for WebCryptoAPI to 76dfa54e5d (Node.js GitHub Bot) [#56093](https://github.com/nodejs/node/pull/56093) +* \[[`f4615da6f0`](https://github.com/nodejs/node/commit/f4615da6f0)] - **test**: move test-worker-arraybuffer-zerofill to parallel (Luigi Pinca) [#56053](https://github.com/nodejs/node/pull/56053) +* \[[`afc7fb300b`](https://github.com/nodejs/node/commit/afc7fb300b)] - **test**: update WPT for url to 67880a4eb83ca9aa732eec4b35a1971ff5bf37ff (Node.js GitHub Bot) [#55999](https://github.com/nodejs/node/pull/55999) +* \[[`3c1b0dcf40`](https://github.com/nodejs/node/commit/3c1b0dcf40)] - **test**: make HTTP/1.0 connection test more robust (Arne Keller) [#55959](https://github.com/nodejs/node/pull/55959) +* \[[`d1b7404e88`](https://github.com/nodejs/node/commit/d1b7404e88)] - **test**: convert readdir test to use test runner (Thomas Chetwin) [#55750](https://github.com/nodejs/node/pull/55750) +* \[[`2f3298a032`](https://github.com/nodejs/node/commit/2f3298a032)] - **test**: make x509 crypto tests work with BoringSSL (Shelley Vohr) [#55927](https://github.com/nodejs/node/pull/55927) +* \[[`12ebb56b57`](https://github.com/nodejs/node/commit/12ebb56b57)] - **test**: fix determining lower priority (Livia Medeiros) [#55908](https://github.com/nodejs/node/pull/55908) +* \[[`8807e55327`](https://github.com/nodejs/node/commit/8807e55327)] - **test,crypto**: update WebCryptoAPI WPT (Filip Skokan) [#55997](https://github.com/nodejs/node/pull/55997) +* \[[`14c35bb351`](https://github.com/nodejs/node/commit/14c35bb351)] - **test\_runner**: refactor Promise chain in run() (Colin Ihrig) [#55958](https://github.com/nodejs/node/pull/55958) +* \[[`09234cc6f2`](https://github.com/nodejs/node/commit/09234cc6f2)] - **test\_runner**: refactor build Promise in Suite() (Colin Ihrig) [#55958](https://github.com/nodejs/node/pull/55958) +* \[[`7608306222`](https://github.com/nodejs/node/commit/7608306222)] - **test\_runner**: simplify hook running logic (Colin Ihrig) [#55963](https://github.com/nodejs/node/pull/55963) +* \[[`802601ca33`](https://github.com/nodejs/node/commit/802601ca33)] - **test\_runner**: mark context.plan() as stable (Colin Ihrig) [#55895](https://github.com/nodejs/node/pull/55895) +* \[[`618d451b70`](https://github.com/nodejs/node/commit/618d451b70)] - **test\_runner**: mark snapshot testing as stable (Colin Ihrig) [#55897](https://github.com/nodejs/node/pull/55897) +* \[[`a5702a0c86`](https://github.com/nodejs/node/commit/a5702a0c86)] - **tools**: fix `node:` enforcement for docs (Antoine du Hamel) [#56284](https://github.com/nodejs/node/pull/56284) +* \[[`318bab427b`](https://github.com/nodejs/node/commit/318bab427b)] - **tools**: update github\_reporter to 1.7.2 (Node.js GitHub Bot) [#56205](https://github.com/nodejs/node/pull/56205) +* \[[`0b317c97ae`](https://github.com/nodejs/node/commit/0b317c97ae)] - **tools**: add REPLACEME check to workflow (Mert Can Altin) [#56251](https://github.com/nodejs/node/pull/56251) +* \[[`cdb15d9b7d`](https://github.com/nodejs/node/commit/cdb15d9b7d)] - **tools**: use `github.actor` instead of bot username for release proposals (Antoine du Hamel) [#56232](https://github.com/nodejs/node/pull/56232) +* \[[`b40439989f`](https://github.com/nodejs/node/commit/b40439989f)] - _**Revert**_ "**tools**: disable automated libuv updates" (Luigi Pinca) [#56223](https://github.com/nodejs/node/pull/56223) +* \[[`a5fa657d80`](https://github.com/nodejs/node/commit/a5fa657d80)] - **tools**: update gyp-next to 0.19.1 (Anna Henningsen) [#56111](https://github.com/nodejs/node/pull/56111) +* \[[`94a6564605`](https://github.com/nodejs/node/commit/94a6564605)] - **tools**: fix release proposal linter to support more than 1 folk preparing (Antoine du Hamel) [#56203](https://github.com/nodejs/node/pull/56203) +* \[[`4e5e3562e6`](https://github.com/nodejs/node/commit/4e5e3562e6)] - **tools**: enable linter for `tools/icu/**` (Livia Medeiros) [#56176](https://github.com/nodejs/node/pull/56176) +* \[[`379fc403a0`](https://github.com/nodejs/node/commit/379fc403a0)] - **tools**: use commit title as PR title when creating release proposal (Antoine du Hamel) [#56165](https://github.com/nodejs/node/pull/56165) +* \[[`f7e9bbb2a7`](https://github.com/nodejs/node/commit/f7e9bbb2a7)] - **tools**: update gyp-next to 0.19.0 (Node.js GitHub Bot) [#56158](https://github.com/nodejs/node/pull/56158) +* \[[`35feb6cec6`](https://github.com/nodejs/node/commit/35feb6cec6)] - **tools**: bump the eslint group in /tools/eslint with 4 updates (dependabot\[bot]) [#56099](https://github.com/nodejs/node/pull/56099) +* \[[`cc861ad476`](https://github.com/nodejs/node/commit/cc861ad476)] - **tools**: improve release proposal PR opening (Antoine du Hamel) [#56161](https://github.com/nodejs/node/pull/56161) +* \[[`eab17d4b03`](https://github.com/nodejs/node/commit/eab17d4b03)] - **tools**: update `create-release-proposal` workflow (Antoine du Hamel) [#56054](https://github.com/nodejs/node/pull/56054) +* \[[`3195af27f1`](https://github.com/nodejs/node/commit/3195af27f1)] - **tools**: fix update-undici script (Michaël Zasso) [#56069](https://github.com/nodejs/node/pull/56069) +* \[[`beb0ba4b43`](https://github.com/nodejs/node/commit/beb0ba4b43)] - **tools**: allow dispatch of `tools.yml` from forks (Antoine du Hamel) [#56008](https://github.com/nodejs/node/pull/56008) +* \[[`9812e90090`](https://github.com/nodejs/node/commit/9812e90090)] - **tools**: fix nghttp3 updater script (Antoine du Hamel) [#56007](https://github.com/nodejs/node/pull/56007) +* \[[`51b29ccc34`](https://github.com/nodejs/node/commit/51b29ccc34)] - **tools**: filter release keys to reduce interactivity (Antoine du Hamel) [#55950](https://github.com/nodejs/node/pull/55950) +* \[[`03118a7685`](https://github.com/nodejs/node/commit/03118a7685)] - **tools**: update WPT updater (Antoine du Hamel) [#56003](https://github.com/nodejs/node/pull/56003) +* \[[`9f59af9813`](https://github.com/nodejs/node/commit/9f59af9813)] - **tools**: add WPT updater for specific subsystems (Mert Can Altin) [#54460](https://github.com/nodejs/node/pull/54460) +* \[[`ad2060fb2a`](https://github.com/nodejs/node/commit/ad2060fb2a)] - **tools**: use tokenless Codecov uploads (Michaël Zasso) [#55943](https://github.com/nodejs/node/pull/55943) +* \[[`94105c092f`](https://github.com/nodejs/node/commit/94105c092f)] - **tools**: lint js in `doc/**/*.md` (Livia Medeiros) [#55904](https://github.com/nodejs/node/pull/55904) +* \[[`267c8f8aca`](https://github.com/nodejs/node/commit/267c8f8aca)] - **tools**: add linter for release commit proposals (Antoine du Hamel) [#55923](https://github.com/nodejs/node/pull/55923) +* \[[`d9284ac214`](https://github.com/nodejs/node/commit/d9284ac214)] - **tools**: fix riscv64 build failed (Lu Yahan) [#52888](https://github.com/nodejs/node/pull/52888) +* \[[`5826993564`](https://github.com/nodejs/node/commit/5826993564)] - **tools**: bump cross-spawn from 7.0.3 to 7.0.5 in /tools/eslint (dependabot\[bot]) [#55894](https://github.com/nodejs/node/pull/55894) +* \[[`fbc2830b97`](https://github.com/nodejs/node/commit/fbc2830b97)] - **util**: harden more built-in classes against prototype pollution (Antoine du Hamel) [#56225](https://github.com/nodejs/node/pull/56225) +* \[[`597a78ecff`](https://github.com/nodejs/node/commit/597a78ecff)] - **util**: fix Latin1 decoding to return string output (Mert Can Altin) [#56222](https://github.com/nodejs/node/pull/56222) +* \[[`c365263019`](https://github.com/nodejs/node/commit/c365263019)] - **util**: do not rely on mutable `Object` and `Function`' `constructor` prop (Antoine du Hamel) [#56188](https://github.com/nodejs/node/pull/56188) +* \[[`99eddecd64`](https://github.com/nodejs/node/commit/99eddecd64)] - **util**: add fast path for Latin1 decoding (Mert Can Altin) [#55275](https://github.com/nodejs/node/pull/55275) +* \[[`5d92a5f156`](https://github.com/nodejs/node/commit/5d92a5f156)] - **(SEMVER-MINOR)** **util**: add sourcemap support to getCallSites (Marco Ippolito) [#55589](https://github.com/nodejs/node/pull/55589) +* \[[`3920a5cff6`](https://github.com/nodejs/node/commit/3920a5cff6)] - **v8,tools**: expose experimental wasm revectorize feature (Yolanda-Chen) [#54896](https://github.com/nodejs/node/pull/54896) +* \[[`1d865b2196`](https://github.com/nodejs/node/commit/1d865b2196)] - **worker**: fix crash when a worker joins after exit (Stephen Belanger) [#56191](https://github.com/nodejs/node/pull/56191) + ## 2024-12-03, Version 22.12.0 'Jod' (LTS), @ruyadorno @@ -79,11 +323,11 @@ When a `Buffer` is created using a resizable `ArrayBuffer`, the `Buffer` length ```js const ab = new ArrayBuffer(10, { maxByteLength: 20 }); const buffer = Buffer.from(ab); -console.log(buffer.byteLength); 10 +console.log(buffer.byteLength); // 10 ab.resize(15); -console.log(buffer.byteLength); 15 +console.log(buffer.byteLength); // 15 ab.resize(5); -console.log(buffer.byteLength); 5 +console.log(buffer.byteLength); // 5 ``` Contributed by James Snell in [#55377](https://github.com/nodejs/node/pull/55377) diff --git a/doc/contributing/advocacy-ambasador-program.md b/doc/contributing/advocacy-ambassador-program.md similarity index 58% rename from doc/contributing/advocacy-ambasador-program.md rename to doc/contributing/advocacy-ambassador-program.md index 3b518d092e1feb..31d8fd58a1a4bf 100644 --- a/doc/contributing/advocacy-ambasador-program.md +++ b/doc/contributing/advocacy-ambassador-program.md @@ -18,6 +18,10 @@ The ambassador program does that by: messages and topics defined. * Advocating for ambassadors to be part of the OpenJS speakers bureau, even if the ambassador is not otherwise an active member of the project itself. +* Each ambassador could add a maximum of three links to resources to learn Node.js + on a dedicated page on the main Node.js website. At least one of those must be a + free resource. The Node.js TSC members could ask for coupon codes to verify the + material if they so decide. ## Ambassadors nominations @@ -96,3 +100,68 @@ an ambassador. These requests can be made through the existing social channel in the OpenJS Slack. For that reason and for communication purposes and collaboration opportunities, ambassadors should be members of the [OpenJS Slack](https://slack-invite.openjsf.org/). + +## Messages and topics to promote + +### How to add messages or topics to promote + +Messages or topics that ambassadors are asked to promote are added to this +document in the [Current messages for promotion](#current-messages-for-promotion) +section through the standard PR process except that they should be open +for 7 days before landing and should include and at mention to the +nodejs/TSC for awareness. They should be removed through the same process +when no longer relevant. + +### Current messages for promotion + +#### Sample message (Leave this one at the top) + +##### Goal + +The goal is to raise awareness of XYZ in the JavaScript ecosystem. + +#### Related Links + +List of links with more information about the topic to provide brackground +or the information to be shared. + +#### Project contacts + +Add a list of GitHub handles for those within the project that +have volunteered to be contacated when necessary by ambassadors +to get more info about the message to be promoted. + +#### Node.js is a great choice for a JavaScript runtime + +##### Goal + +Highlight the benefits of chosing Node.js as your backend JavaScript runtime. Focus on what is great +about Node.js without drawing comparisons to alternatives. We don't want to say negative things about +other options, only highlight what is great about Node.js as a choice. + +Some of the things to highlight include: + +* How widely it is used (you never get fired for chosing Node.js). +* The openess of the project. It is part of the OpenJS Foundation and it's governance is set up to avoid + any one company from dominating the project. Decisions are made by the collaborators (of which there are quite + a few) versus a small number of people. +* It has predictable and stable releases and has delivered on the release schedule since 2015. +* It was a well defined security release process and manages security releases well. +* As the defacto standard, it has the highest likelihood of being supported for a given package on npm. +* It is not dependent on any one company for its continued existence reducing risk of using it. +* The large number of platforms supported. +* Asynchronous non-blocking i/o architecture drives high transactional throughput, making it ideal for web workloads. +* Single threaded programming model enables very low resource consumption, making it ideal for containerised workloads. +* Highly vibrant ecosystem with enterprise support from many vendors. + +#### Related Links + +* +* +* +* + for slide usage and topping recent surveys. + +#### Project contacts + +* @mhdawson diff --git a/doc/contributing/gn-build.md b/doc/contributing/gn-build.md index 0981472805a4f8..9054ed5e7d41c4 100644 --- a/doc/contributing/gn-build.md +++ b/doc/contributing/gn-build.md @@ -28,27 +28,33 @@ Node.js contains following GN build files: Unlike GYP, the GN tool does not include any built-in rules for compiling a project, which means projects building with GN must provide their own build -configurations for things like how to invoke a C++ compiler. Chromium related -projects like V8 and skia choose to reuse Chromium's build configurations, and -V8's Node.js integration testing repository -([node-ci](https://chromium.googlesource.com/v8/node-ci/)) can be reused for -building Node.js. +configurations for things like how to invoke a C++ compiler. + +Chromium related projects like V8 and skia choose to reuse Chromium's build +configurations, and V8's Node.js integration testing repository +[`node-ci`][node-ci] can be reused for building Node.js. ### 1. Install `depot_tools` -The `depot_tools` is a set of tools used by Chromium related projects for -checking out code and managing dependencies, and since this guide is reusing the -infra of V8, it needs to be installed and added to `PATH`: +You'll need to install [`depot_tools`][depot-tools] the toolset +used for fetching Chromium and its dependencies. ```bash git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git export PATH=/path/to/depot_tools:$PATH ``` -You can also follow the [official tutorial of -`depot_tools`](https://commondatastorage.googleapis.com/chrome-infra-docs/flat/depot_tools/docs/html/depot_tools_tutorial.html). +You can ensure `depot_tools` is correctly added to your PATH by running +`which gn` and confirming that it returns `/path/to/depot_tools/gn`. + +**NOTE:** On Windows you'll also need to set the environment variable +`DEPOT_TOOLS_WIN_TOOLCHAIN=0`. To do so, open `Control Panel` → `System and +Security` → `System` → `Advanced system settings` and add a system variable +`DEPOT_TOOLS_WIN_TOOLCHAIN` with value `0`. This tells `depot_tools` to use +your locally installed version of Visual Studio (by default, `depot_tools` will +try to download a Google-internal version that only Googlers have access to). -### 2. Check out code of Node.js +### 2. Checkout Node.js Source Code To check out the latest main branch of Node.js for building, use the `fetch` tool from `depot_tools`: @@ -91,9 +97,9 @@ out at `node_gn/node/node`. ### 3. Build -GN only supports [`ninja`](https://ninja-build.org) for building, so to build -Node.js with GN, `ninja` build files should be generated first, and then -`ninja` can be invoked to do the building. +GN only supports [`ninja`](https://ninja-build.org) for building. To build +Node.js with GN you'll first need to generate `ninja` build files and then invoke +`ninja` to perform the build. The `node-ci` repository provides a script for calling GN: @@ -103,9 +109,10 @@ cd node # Enter `node_gn/node` which contains a node-ci checkout ``` which writes `ninja` build files into the `out/Release` directory under -`node_gn/node`. +`node_gn/node`. To see all possible configurable options, run +`tools/gn-gen.py --help`. -And then you can execute `ninja`: +When `gn-gen.py` has executed successfully, you can then execute `ninja`: ```bash ninja -C out/Release node @@ -116,10 +123,12 @@ After the build is completed, the compiled Node.js executable can be found in ## Status of the GN build -Currently the GN build of Node.js is not fully functioning. It builds for macOS -and Linux, while the Windows build is still a work in progress. And some tests -are still failing with the GN build. +Currently the GN build of Node.js is not fully functioning. Some tests +are still failing with the GN build, and there may be other small pitfall +for certain configuration options. + +An effort is currently underway to make GN build work without using `depot_tools`, +which is tracked in [#51689](https://github.com/nodejs/node/issues/51689). -There are also efforts on making GN build work without using `depot_tools`, -which is tracked in the issue -[#51689](https://github.com/nodejs/node/issues/51689). +[depot-tools]: https://commondatastorage.googleapis.com/chrome-infra-docs/flat/depot_tools/docs/html/depot_tools_tutorial.html#_setting_up +[node-ci]: https://chromium.googlesource.com/v8/node-ci diff --git a/doc/contributing/maintaining/maintaining-dependencies.md b/doc/contributing/maintaining/maintaining-dependencies.md index 8ebbd3f7bb8924..f5d9c69c41a664 100644 --- a/doc/contributing/maintaining/maintaining-dependencies.md +++ b/doc/contributing/maintaining/maintaining-dependencies.md @@ -145,6 +145,48 @@ can be added as a non-externalizable dependency. In this case simply add the path to the JavaScript file in the `deps_files` list in the `node.gyp` file. +## Common approach for dependencies with WASM components + +WASM components within dependencies are most often built +outside of the regular Node.js `make build` step. They also +require different tools. + +It is important that the tools and their versions used to build +WASM components shipped within Node.js are well documented and +be available if needed to rebuild/update older Node.js versions. + +In order to minimize the different number of tools and versions +used to build WASM components and to document and ensure future +availability, the project builds and maintains a common +[wasm-builder](https://github.com/nodejs/wasm-builder) container +that should be use to build WASM components in Node.js +dependencies. + +The container provides a durable copy of the versions of the tools +used for a specific build which are under the control of the Node.js +project. In addition, the tools and verions are documented through metadata +within the container in the `/home/node/metadata directory`. + +The available tools can be found by looking at the current version of the +[Dockerfile](https://github.com/nodejs/wasm-builder/blob/main/container-build-info/Dockerfile) +used to create the container. + +If additional WASM tool are needed beyond those available in the +container, additions should be PR'd into the wasm-builder container. + +Examples of using the container include: + +* [build/wasm.js](https://github.com/nodejs/undici/blob/main/build/wasm.js) from undici +* [tools/build-wasm.js](https://github.com/nodejs/amaro/blob/main/tools/build-wasm.js) from amaro + +In addition to using the container to build WASM components, the goal is also +for the WASM components and final files that are shipped with Node.js to be +built by the [dep-updaters](https://github.com/nodejs/node/tree/main/tools/dep_updaters) +that are run on a regular basis and that they use only the files available in the Node.js +repo for the dependency. For example, being able to rebuild the WASM and files that +we ship in Node.js using only the files in +[../deps/undici](https://github.com/nodejs/node/tree/main/deps/undici). + ## Updating dependencies Most dependencies are automatically updated by diff --git a/doc/contributing/pull-requests.md b/doc/contributing/pull-requests.md index 295e9d3695c47e..2ad538b3fd8e29 100644 --- a/doc/contributing/pull-requests.md +++ b/doc/contributing/pull-requests.md @@ -122,7 +122,7 @@ If you are modifying code, please be sure to run `make lint` (or code style guide. Any documentation you write (including code comments and API documentation) -should follow the [Style Guide](../../README.md). Code samples +should follow the [Style Guide](../../doc/README.md). Code samples included in the API docs will also be checked when running `make lint` (or `vcbuild.bat lint` on Windows). If you are adding to or deprecating an API, add or change the appropriate YAML documentation. Use `REPLACEME` for the diff --git a/doc/contributing/releases.md b/doc/contributing/releases.md index 82eec2833998ad..b3b20b8ae5589e 100644 --- a/doc/contributing/releases.md +++ b/doc/contributing/releases.md @@ -282,7 +282,15 @@ You can integrate the PRs into the proposal without running full CI. ### 2. Create a new branch for the release -⚠️ At this point, you can either run `git node release --prepare`: +> \[!TIP] Once the staging branch is up-to-date you can use the +> [`create-release-proposal`][] action to generate the proposal. + +```bash +gh workflow run "Create Release Proposal" -f release-line=N -f release-date=YYYY-MM-DD +``` + +If you prefer to run it locally you can either run +`git node release --prepare`: ```bash git node release -S --prepare x.y.z @@ -707,12 +715,23 @@ the build before moving forward. Use the following list as a baseline: ### 11. Tag and sign the release commit -Once you have produced builds that you're happy with, create a new tag. By -waiting until this stage to create tags, you can discard a proposed release if -something goes wrong or additional commits are required. Once you have created a -tag and pushed it to GitHub, you _**must not**_ delete and re-tag. If you make -a mistake after tagging then you'll have to version-bump and start again and -count that tag/version as lost. +Once you have produced builds that you're happy with you can either run +`git node release --promote` + +```bash +git node release -S --promote https://github.com/nodejs/node/pull/XXXX +``` + +to automate the remaining steps until step 16 or you can perform it manually +following the below steps. + +*** + +Create a new tag: By waiting until this stage to create tags, you can discard +a proposed release if something goes wrong or additional commits are required. +Once you have created a tag and pushed it to GitHub, you _**must not**_ delete +and re-tag. If you make a mistake after tagging then you'll have to version-bump +and start again and count that tag/version as lost. Tag summaries have a predictable format. Look at a recent tag to see: @@ -947,6 +966,13 @@ a `NODEJS_RELEASE_HOST` environment variable: NODEJS_RELEASE_HOST=proxy.xyz ./tools/release.sh ``` +In case `gpg` is unable to autoselect a key, you can retry using the +`-a` option to enable an interactive interface: + +```bash +./tools/release.sh -a +``` + > \[!TIP] > Sometimes, due to machines being overloaded or other external factors, > the files at , @@ -1064,20 +1090,18 @@ This script will use the promoted builds and changelog to generate the post. Run ### 19. Announce The nodejs.org website will automatically rebuild and include the new version. -To announce the build on Twitter through the official @nodejs account, email - with a message such as: +To announce the build on social media, please ping the @nodejs-social-team +on offical slack channel. + +Node.js is also available on Bluesky and a release announcement can be +reposted using [nodejs/bluesky](https://github.com/nodejs/bluesky) repository. + +The post content can be as simple as: > v5.8.0 of @nodejs is out: > … > something here about notable changes -To ensure communication goes out with the timing of the blog post, please allow -24 hour prior notice. If known, please include the date and time the release -will be shared with the community in the email to coordinate these -announcements. - -Ping the IRC ops and the other [Partner Communities][] liaisons. -
    Security release @@ -1372,12 +1396,48 @@ Infrastructure team is able to perform the switch of the default. An issue should be opened on the [Node.js Snap management repository][] requesting this take place once a new LTS line has been released. +## FAQ + +Due to how `tools/release.sh` work, it isn't uncommon to face some errors +during the promotion process as it depends on network communication and machine +availability. This section aims to guide the releaser through potential +failures. + +### Error on dist-indexer while promoting + +```bash +node:events:491 + throw er; // Unhandled 'error' event + ^ + +Error: read ECONNRESET + at TLSWrap.onStreamRead (node:internal/stream_base_commons:217:20) +Emitted 'error' event on DestroyableTransform instance at: + at ClientRequest. (/usr/lib/node_modules/nodejs-dist-indexer/node_modules/hyperquest/index.js:14:19) + at ClientRequest.emit (node:events:513:28) + at TLSSocket.socketErrorListener (node:_http_client:494:9) + at TLSSocket.emit (node:events:513:28) + at emitErrorNT (node:internal/streams/destroy:157:8) + at emitErrorCloseNT (node:internal/streams/destroy:122:3) + at processTicksAndRejections (node:internal/process/task_queues:83:21) { + errno: -104, + code: 'ECONNRESET', + syscall: 'read' +} +``` + +Typical resolution: sign the release again. + +```bash +./tools/release.sh -s vX.Y.Z +``` + [Build issue tracker]: https://github.com/nodejs/build/issues/new [CI lockdown procedure]: https://github.com/nodejs/build/blob/HEAD/doc/jenkins-guide.md#restricting-access-for-security-releases [Node.js Snap management repository]: https://github.com/nodejs/snap -[Partner Communities]: https://github.com/nodejs/community-committee/blob/HEAD/governance/PARTNER_COMMUNITIES.md [Snap]: https://snapcraft.io/node +[`create-release-proposal`]: https://github.com/nodejs/node/actions/workflows/create-release-proposal.yml [build-infra team]: https://github.com/orgs/nodejs/teams/build-infra [expected assets]: https://github.com/nodejs/build/tree/HEAD/ansible/www-standalone/tools/promote/expected_assets -[nodejs.org release-post.js script]: https://github.com/nodejs/nodejs.org/blob/HEAD/scripts/release-post/index.mjs +[nodejs.org release-post.js script]: https://github.com/nodejs/nodejs.org/blob/HEAD/apps/site/scripts/release-post/index.mjs [nodejs.org repository]: https://github.com/nodejs/nodejs.org diff --git a/doc/contributing/technical-priorities.md b/doc/contributing/technical-priorities.md index 9e566f12ae6750..68ac6f8dd0d00a 100644 --- a/doc/contributing/technical-priorities.md +++ b/doc/contributing/technical-priorities.md @@ -21,11 +21,11 @@ on October 1st 2022. _Present in: 2021_ -Base HTTP support is a key component of modern cloud-native applications +Base HTTP support is a key component of modern cloud-native applications, and built-in support was part of what made Node.js a success in the first 10 years. The current implementation is hard to support and a common source of vulnerabilities. We must work towards an -implementation which is easier to support and makes it easier to integrate +implementation that is easier to support and makes it easier to integrate the new HTTP versions (HTTP3, QUIC) and to support efficient implementations of different versions concurrently. @@ -96,7 +96,7 @@ supported tools to implement those processes (logging, metrics and tracing). This includes support within the Node.js runtime itself (for example generating heap dumps, performance metrics, etc.) as well as support for applications on top of the runtime. In addition, it is also important to -clearly document the use cases, problem determination methods and best +clearly document the use cases, problem determination methods, and best practices for those tools. ## Better multithreaded support diff --git a/doc/contributing/writing-and-running-benchmarks.md b/doc/contributing/writing-and-running-benchmarks.md index 3d16c7d14fc033..63fbf75c798833 100644 --- a/doc/contributing/writing-and-running-benchmarks.md +++ b/doc/contributing/writing-and-running-benchmarks.md @@ -538,7 +538,7 @@ The arguments of `createBenchmark` are: source: ['buffer', 'string'], len: [2048], n: [50, 2048], - } + }, }, { byGroups: true }); ``` diff --git a/doc/eslint.config_partial.mjs b/doc/eslint.config_partial.mjs index edcc72930875b0..609c38c26e9336 100644 --- a/doc/eslint.config_partial.mjs +++ b/doc/eslint.config_partial.mjs @@ -3,7 +3,9 @@ import { noRestrictedSyntaxCommonLib, requireEslintTool, } from '../tools/eslint/eslint.config_utils.mjs'; -import { builtinModules as builtin } from 'node:module'; +import { builtinModules } from 'node:module'; + +const builtin = builtinModules.filter((name) => !name.startsWith('node:')); const globals = requireEslintTool('globals'); diff --git a/doc/node.1 b/doc/node.1 index 46829cc3b2b0de..9f534746ef9d9c 100644 --- a/doc/node.1 +++ b/doc/node.1 @@ -176,15 +176,12 @@ Specify the .Ar module to use as a custom module loader. . -.It Fl -experimental-permission -Enable the experimental permission model. +.It Fl -permission +Enable the permission model. . .It Fl -experimental-shadow-realm Use this flag to enable ShadowRealm support. . -.It Fl -experimental-sqlite -Enable the experimental node:sqlite module. -. .It Fl -experimental-test-coverage Enable code coverage in the test runner. . @@ -194,9 +191,6 @@ Configures the type of test isolation used in the test runner. .It Fl -experimental-test-module-mocks Enable module mocking in the test runner. . -.It Fl -experimental-test-snapshots -Enable snapshot testing in the test runner. -. .It Fl -experimental-strip-types Enable experimental type-stripping for TypeScript files. . @@ -224,6 +218,9 @@ Disable exposition of the Web Crypto API on the global scope. .It Fl -no-experimental-repl-await Disable top-level await keyword support in REPL. . +.It Fl -no-experimental-sqlite +Disable the experimental node:sqlite module. +. .It Fl -experimental-vm-modules Enable experimental ES module support in VM module. . diff --git a/eslint.config.mjs b/eslint.config.mjs index f37774bace871d..2e8dd65a1b3733 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -43,16 +43,13 @@ export default [ '**/node_modules/**', 'benchmark/fixtures/**', 'benchmark/tmp/**', - 'doc/**/*.js', 'doc/changelogs/CHANGELOG_V1*.md', - '!doc/api_assets/*.js', '!doc/changelogs/CHANGELOG_V18.md', 'lib/punycode.js', 'test/.tmp.*/**', 'test/addons/??_*', 'test/fixtures/**', 'tools/github_reporter/**', - 'tools/icu/**', ], }, // #endregion diff --git a/lib/_http_client.js b/lib/_http_client.js index 91ba264339fa4f..00b59f357fa45d 100644 --- a/lib/_http_client.js +++ b/lib/_http_client.js @@ -199,7 +199,13 @@ function ClientRequest(input, options, cb) { const host = optsWithoutSignal.host = validateHost(options.hostname, 'hostname') || validateHost(options.host, 'host') || 'localhost'; - const setHost = (options.setHost === undefined || Boolean(options.setHost)); + const setHost = options.setHost !== undefined ? + Boolean(options.setHost) : + options.setDefaultHeaders !== false; + + this._removedConnection = options.setDefaultHeaders === false; + this._removedContLen = options.setDefaultHeaders === false; + this._removedTE = options.setDefaultHeaders === false; this.socketPath = options.socketPath; diff --git a/lib/_http_outgoing.js b/lib/_http_outgoing.js index 7a402a0f900f30..23b850d1522c97 100644 --- a/lib/_http_outgoing.js +++ b/lib/_http_outgoing.js @@ -908,6 +908,10 @@ OutgoingMessage.prototype.write = function write(chunk, encoding, callback) { }; function onError(msg, err, callback) { + if (msg.destroyed) { + return; + } + const triggerAsyncId = msg.socket ? msg.socket[async_id_symbol] : undefined; defaultTriggerAsyncIdScope(triggerAsyncId, process.nextTick, @@ -919,7 +923,7 @@ function onError(msg, err, callback) { function emitErrorNt(msg, err, callback) { callback(err); - if (typeof msg.emit === 'function' && !msg._closed) { + if (typeof msg.emit === 'function' && !msg.destroyed) { msg.emit('error', err); } } diff --git a/lib/assert.js b/lib/assert.js index 2a65457b39387e..a2991a096ac081 100644 --- a/lib/assert.js +++ b/lib/assert.js @@ -21,22 +21,44 @@ 'use strict'; const { + ArrayBufferIsView, + ArrayBufferPrototypeGetByteLength, + ArrayFrom, + ArrayIsArray, ArrayPrototypeIndexOf, ArrayPrototypeJoin, ArrayPrototypePush, ArrayPrototypeSlice, + DataViewPrototypeGetBuffer, + DataViewPrototypeGetByteLength, + DataViewPrototypeGetByteOffset, Error, + FunctionPrototypeCall, + MapPrototypeGet, + MapPrototypeGetSize, + MapPrototypeHas, NumberIsNaN, ObjectAssign, ObjectIs, ObjectKeys, ObjectPrototypeIsPrototypeOf, + ObjectPrototypeToString, ReflectApply, + ReflectHas, + ReflectOwnKeys, RegExpPrototypeExec, + SafeArrayIterator, + SafeMap, + SafeSet, + SafeWeakSet, + SetPrototypeGetSize, String, StringPrototypeIndexOf, StringPrototypeSlice, StringPrototypeSplit, + SymbolIterator, + TypedArrayPrototypeGetLength, + Uint8Array, } = primordials; const { @@ -50,8 +72,22 @@ const { } = require('internal/errors'); const AssertionError = require('internal/assert/assertion_error'); const { inspect } = require('internal/util/inspect'); -const { isPromise, isRegExp } = require('internal/util/types'); -const { isError, deprecate } = require('internal/util'); +const { Buffer } = require('buffer'); +const { + isArrayBuffer, + isDataView, + isKeyObject, + isPromise, + isRegExp, + isMap, + isSet, + isDate, + isWeakSet, + isWeakMap, + isSharedArrayBuffer, + isAnyArrayBuffer, +} = require('internal/util/types'); +const { isError, deprecate, emitExperimentalWarning } = require('internal/util'); const { innerOk } = require('internal/assert/utils'); const CallTracker = require('internal/assert/calltracker'); @@ -341,6 +377,277 @@ assert.notStrictEqual = function notStrictEqual(actual, expected, message) { } }; +function isSpecial(obj) { + return obj == null || typeof obj !== 'object' || isError(obj) || isRegExp(obj) || isDate(obj); +} + +const typesToCallDeepStrictEqualWith = [ + isKeyObject, isWeakSet, isWeakMap, Buffer.isBuffer, isSharedArrayBuffer, +]; + +function partiallyCompareMaps(actual, expected, comparedObjects) { + if (MapPrototypeGetSize(expected) > MapPrototypeGetSize(actual)) { + return false; + } + + comparedObjects ??= new SafeWeakSet(); + const expectedIterator = FunctionPrototypeCall(SafeMap.prototype[SymbolIterator], expected); + + for (const { 0: key, 1: expectedValue } of expectedIterator) { + if (!MapPrototypeHas(actual, key)) { + return false; + } + + const actualValue = MapPrototypeGet(actual, key); + + if (!compareBranch(actualValue, expectedValue, comparedObjects)) { + return false; + } + } + + return true; +} + +function partiallyCompareArrayBuffersOrViews(actual, expected) { + let actualView, expectedView, expectedViewLength; + + if (!ArrayBufferIsView(actual)) { + let actualViewLength; + + if (isArrayBuffer(actual) && isArrayBuffer(expected)) { + actualViewLength = ArrayBufferPrototypeGetByteLength(actual); + expectedViewLength = ArrayBufferPrototypeGetByteLength(expected); + } else if (isSharedArrayBuffer(actual) && isSharedArrayBuffer(expected)) { + actualViewLength = actual.byteLength; + expectedViewLength = expected.byteLength; + } else { + // Cannot compare ArrayBuffers with SharedArrayBuffers + return false; + } + + if (expectedViewLength > actualViewLength) { + return false; + } + actualView = new Uint8Array(actual); + expectedView = new Uint8Array(expected); + + } else if (isDataView(actual)) { + if (!isDataView(expected)) { + return false; + } + const actualByteLength = DataViewPrototypeGetByteLength(actual); + expectedViewLength = DataViewPrototypeGetByteLength(expected); + if (expectedViewLength > actualByteLength) { + return false; + } + + actualView = new Uint8Array( + DataViewPrototypeGetBuffer(actual), + DataViewPrototypeGetByteOffset(actual), + actualByteLength, + ); + expectedView = new Uint8Array( + DataViewPrototypeGetBuffer(expected), + DataViewPrototypeGetByteOffset(expected), + expectedViewLength, + ); + } else { + if (ObjectPrototypeToString(actual) !== ObjectPrototypeToString(expected)) { + return false; + } + actualView = actual; + expectedView = expected; + expectedViewLength = TypedArrayPrototypeGetLength(expected); + + if (expectedViewLength > TypedArrayPrototypeGetLength(actual)) { + return false; + } + } + + for (let i = 0; i < expectedViewLength; i++) { + if (actualView[i] !== expectedView[i]) { + return false; + } + } + + return true; +} + +function partiallyCompareSets(actual, expected, comparedObjects) { + if (SetPrototypeGetSize(expected) > SetPrototypeGetSize(actual)) { + return false; // `expected` can't be a subset if it has more elements + } + + if (isDeepEqual === undefined) lazyLoadComparison(); + + const actualArray = ArrayFrom(FunctionPrototypeCall(SafeSet.prototype[SymbolIterator], actual)); + const expectedIterator = FunctionPrototypeCall(SafeSet.prototype[SymbolIterator], expected); + const usedIndices = new SafeSet(); + + expectedIteration: for (const expectedItem of expectedIterator) { + for (let actualIdx = 0; actualIdx < actualArray.length; actualIdx++) { + if (!usedIndices.has(actualIdx) && isDeepStrictEqual(actualArray[actualIdx], expectedItem)) { + usedIndices.add(actualIdx); + continue expectedIteration; + } + } + return false; + } + + return true; +} + +function partiallyCompareArrays(actual, expected, comparedObjects) { + if (expected.length > actual.length) { + return false; + } + + if (isDeepEqual === undefined) lazyLoadComparison(); + + // Create a map to count occurrences of each element in the expected array + const expectedCounts = new SafeMap(); + for (const expectedItem of expected) { + let found = false; + for (const { 0: key, 1: count } of expectedCounts) { + if (isDeepStrictEqual(key, expectedItem)) { + expectedCounts.set(key, count + 1); + found = true; + break; + } + } + if (!found) { + expectedCounts.set(expectedItem, 1); + } + } + + const safeActual = new SafeArrayIterator(actual); + + // Create a map to count occurrences of relevant elements in the actual array + for (const actualItem of safeActual) { + for (const { 0: key, 1: count } of expectedCounts) { + if (isDeepStrictEqual(key, actualItem)) { + if (count === 1) { + expectedCounts.delete(key); + } else { + expectedCounts.set(key, count - 1); + } + break; + } + } + } + + const { size } = expectedCounts; + expectedCounts.clear(); + return size === 0; +} + +/** + * Compares two objects or values recursively to check if they are equal. + * @param {any} actual - The actual value to compare. + * @param {any} expected - The expected value to compare. + * @param {Set} [comparedObjects=new Set()] - Set to track compared objects for handling circular references. + * @returns {boolean} - Returns `true` if the actual value matches the expected value, otherwise `false`. + * @example + * compareBranch({a: 1, b: 2, c: 3}, {a: 1, b: 2}); // true + */ +function compareBranch( + actual, + expected, + comparedObjects, +) { + // Check for Map object equality + if (isMap(actual) && isMap(expected)) { + return partiallyCompareMaps(actual, expected, comparedObjects); + } + + if ( + ArrayBufferIsView(actual) || + isAnyArrayBuffer(actual) || + ArrayBufferIsView(expected) || + isAnyArrayBuffer(expected) + ) { + return partiallyCompareArrayBuffersOrViews(actual, expected); + } + + for (const type of typesToCallDeepStrictEqualWith) { + if (type(actual) || type(expected)) { + if (isDeepStrictEqual === undefined) lazyLoadComparison(); + return isDeepStrictEqual(actual, expected); + } + } + + // Check for Set object equality + if (isSet(actual) && isSet(expected)) { + return partiallyCompareSets(actual, expected, comparedObjects); + } + + // Check if expected array is a subset of actual array + if (ArrayIsArray(actual) && ArrayIsArray(expected)) { + return partiallyCompareArrays(actual, expected, comparedObjects); + } + + // Comparison done when at least one of the values is not an object + if (isSpecial(actual) || isSpecial(expected)) { + if (isDeepEqual === undefined) { + lazyLoadComparison(); + } + return isDeepStrictEqual(actual, expected); + } + + // Use Reflect.ownKeys() instead of Object.keys() to include symbol properties + const keysExpected = ReflectOwnKeys(expected); + + comparedObjects ??= new SafeWeakSet(); + + // Handle circular references + if (comparedObjects.has(actual)) { + return true; + } + comparedObjects.add(actual); + + // Check if all expected keys and values match + for (let i = 0; i < keysExpected.length; i++) { + const key = keysExpected[i]; + assert( + ReflectHas(actual, key), + new AssertionError({ message: `Expected key ${String(key)} not found in actual object` }), + ); + if (!compareBranch(actual[key], expected[key], comparedObjects)) { + return false; + } + } + + return true; +} + +/** + * The strict equivalence assertion test between two objects + * @param {any} actual + * @param {any} expected + * @param {string | Error} [message] + * @returns {void} + */ +assert.partialDeepStrictEqual = function partialDeepStrictEqual( + actual, + expected, + message, +) { + emitExperimentalWarning('assert.partialDeepStrictEqual'); + if (arguments.length < 2) { + throw new ERR_MISSING_ARGS('actual', 'expected'); + } + + if (!compareBranch(actual, expected)) { + innerFail({ + actual, + expected, + message, + operator: 'partialDeepStrictEqual', + stackStartFn: partialDeepStrictEqual, + }); + } +}; + class Comparison { constructor(obj, keys, actual) { for (const key of keys) { diff --git a/lib/buffer.js b/lib/buffer.js index 756657e910893e..8f235e5f0dae6c 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -35,6 +35,7 @@ const { NumberMIN_SAFE_INTEGER, ObjectDefineProperties, ObjectDefineProperty, + ObjectPrototypeHasOwnProperty, ObjectSetPrototypeOf, RegExpPrototypeSymbolReplace, StringPrototypeCharCodeAt, @@ -910,7 +911,14 @@ Buffer.prototype[customInspectSymbol] = function inspect(recurseTimes, ctx) { }), 27, -2); } } - return `<${this.constructor.name} ${str}>`; + let constructorName = 'Buffer'; + try { + const { constructor } = this; + if (typeof constructor === 'function' && ObjectPrototypeHasOwnProperty(constructor, 'name')) { + constructorName = constructor.name; + } + } catch { /* Ignore error and use default name */ } + return `<${constructorName} ${str}>`; }; Buffer.prototype.inspect = Buffer.prototype[customInspectSymbol]; diff --git a/lib/dgram.js b/lib/dgram.js index 2f184887181a22..44f4e4298ad6fd 100644 --- a/lib/dgram.js +++ b/lib/dgram.js @@ -39,6 +39,7 @@ const { ERR_BUFFER_OUT_OF_BOUNDS, ERR_INVALID_ARG_TYPE, ERR_INVALID_FD_TYPE, + ERR_IP_BLOCKED, ERR_MISSING_ARGS, ERR_SOCKET_ALREADY_BOUND, ERR_SOCKET_BAD_BUFFER_SIZE, @@ -53,12 +54,14 @@ const { _createSocketHandle, newHandle, } = require('internal/dgram'); +const { isIP } = require('internal/net'); const { isInt32, validateAbortSignal, validateString, validateNumber, validatePort, + validateUint32, } = require('internal/validators'); const { Buffer } = require('buffer'); const { deprecate, guessHandleType, promisify, SymbolAsyncDispose, SymbolDispose } = require('internal/util'); @@ -96,20 +99,44 @@ let _cluster = null; function lazyLoadCluster() { return _cluster ??= require('cluster'); } +let _blockList = null; +function lazyLoadBlockList() { + return _blockList ??= require('internal/blocklist').BlockList; +} function Socket(type, listener) { FunctionPrototypeCall(EventEmitter, this); let lookup; let recvBufferSize; let sendBufferSize; + let receiveBlockList; + let sendBlockList; let options; if (type !== null && typeof type === 'object') { options = type; type = options.type; lookup = options.lookup; + if (options.recvBufferSize) { + validateUint32(options.recvBufferSize, 'options.recvBufferSize'); + } + if (options.sendBufferSize) { + validateUint32(options.sendBufferSize, 'options.sendBufferSize'); + } recvBufferSize = options.recvBufferSize; sendBufferSize = options.sendBufferSize; + if (options.receiveBlockList) { + if (!lazyLoadBlockList().isBlockList(options.receiveBlockList)) { + throw new ERR_INVALID_ARG_TYPE('options.receiveBlockList', 'net.BlockList', options.receiveBlockList); + } + receiveBlockList = options.receiveBlockList; + } + if (options.sendBlockList) { + if (!lazyLoadBlockList().isBlockList(options.sendBlockList)) { + throw new ERR_INVALID_ARG_TYPE('options.sendBlockList', 'net.BlockList', options.sendBlockList); + } + sendBlockList = options.sendBlockList; + } } const handle = newHandle(type, lookup); @@ -132,6 +159,8 @@ function Socket(type, listener) { ipv6Only: options?.ipv6Only, recvBufferSize, sendBufferSize, + receiveBlockList, + sendBlockList, }; if (options?.signal !== undefined) { @@ -430,7 +459,9 @@ function doConnect(ex, self, ip, address, port, callback) { const state = self[kStateSymbol]; if (!state.handle) return; - + if (!ex && state.sendBlockList?.check(ip, `ipv${isIP(ip)}`)) { + ex = new ERR_IP_BLOCKED(ip); + } if (!ex) { const err = state.handle.connect(ip, port); if (err) { @@ -694,6 +725,13 @@ function doSend(ex, self, ip, list, address, port, callback) { return; } + if (ip && state.sendBlockList?.check(ip, `ipv${isIP(ip)}`)) { + if (callback) { + process.nextTick(callback, new ERR_IP_BLOCKED(ip)); + } + return; + } + const req = new SendWrap(); req.list = list; // Keep reference alive. req.address = address; @@ -942,6 +980,10 @@ function onMessage(nread, handle, buf, rinfo) { if (nread < 0) { return self.emit('error', new ErrnoException(nread, 'recvmsg')); } + if (self[kStateSymbol]?.receiveBlockList?.check(rinfo.address, + rinfo.family?.toLocaleLowerCase())) { + return; + } rinfo.size = buf.length; // compatibility self.emit('message', buf, rinfo); } diff --git a/lib/eslint.config_partial.mjs b/lib/eslint.config_partial.mjs index 4535ee36f5c88a..0a295f71eaff9a 100644 --- a/lib/eslint.config_partial.mjs +++ b/lib/eslint.config_partial.mjs @@ -343,6 +343,11 @@ export default [ name: 'SubtleCrypto', message: "Use `const { SubtleCrypto } = require('internal/crypto/webcrypto');` instead of the global.", }, + // Float16Array is not available in primordials because it's only available with --js-float16array CLI flag. + { + name: 'Float16Array', + message: 'Use `const { Float16Array } = globalThis;` instead of the global.', + }, ], 'no-restricted-modules': [ 'error', diff --git a/lib/fs.js b/lib/fs.js index b759c8e124bb45..c1df53f6aa9785 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -162,6 +162,7 @@ let WriteStream; let rimraf; let rimrafSync; let kResistStopPropagation; +let ReadFileContext; // These have to be separate because of how graceful-fs happens to do it's // monkeypatching. @@ -363,7 +364,7 @@ function readFile(path, options, callback) { callback ||= options; validateFunction(callback, 'cb'); options = getOptions(options, { flag: 'r' }); - const ReadFileContext = require('internal/fs/read/context'); + ReadFileContext ??= require('internal/fs/read/context'); const context = new ReadFileContext(callback, options.encoding); context.isUserFd = isFd(path); // File descriptor ownership @@ -1371,6 +1372,102 @@ function mkdirSync(path, options) { } } +/* + * An recursive algorithm for reading the entire contents of the `basePath` directory. + * This function does not validate `basePath` as a directory. It is passed directly to + * `binding.readdir`. + * @param {string} basePath + * @param {{ encoding: string, withFileTypes: boolean }} options + * @param {( + * err?: Error, + * files?: string[] | Buffer[] | Dirent[] + * ) => any} callback + * @returns {void} +*/ +function readdirRecursive(basePath, options, callback) { + const context = { + withFileTypes: Boolean(options.withFileTypes), + encoding: options.encoding, + basePath, + readdirResults: [], + pathsQueue: [basePath], + }; + + let i = 0; + + function read(path) { + const req = new FSReqCallback(); + req.oncomplete = (err, result) => { + if (err) { + callback(err); + return; + } + + if (result === undefined) { + callback(null, context.readdirResults); + return; + } + + processReaddirResult({ + result, + currentPath: path, + context, + }); + + if (i < context.pathsQueue.length) { + read(context.pathsQueue[i++]); + } else { + callback(null, context.readdirResults); + } + }; + + binding.readdir( + path, + context.encoding, + context.withFileTypes, + req, + ); + } + + read(context.pathsQueue[i++]); +} + +// Calling `readdir` with `withFileTypes=true`, the result is an array of arrays. +// The first array is the names, and the second array is the types. +// They are guaranteed to be the same length; hence, setting `length` to the length +// of the first array within the result. +const processReaddirResult = (args) => (args.context.withFileTypes ? handleDirents(args) : handleFilePaths(args)); + +function handleDirents({ result, currentPath, context }) { + const { 0: names, 1: types } = result; + const { length } = names; + + for (let i = 0; i < length; i++) { + // Avoid excluding symlinks, as they are not directories. + // Refs: https://github.com/nodejs/node/issues/52663 + const fullPath = pathModule.join(currentPath, names[i]); + const dirent = getDirent(currentPath, names[i], types[i]); + ArrayPrototypePush(context.readdirResults, dirent); + + if (dirent.isDirectory() || binding.internalModuleStat(binding, fullPath) === 1) { + ArrayPrototypePush(context.pathsQueue, fullPath); + } + } +} + +function handleFilePaths({ result, currentPath, context }) { + for (let i = 0; i < result.length; i++) { + const resultPath = pathModule.join(currentPath, result[i]); + const relativeResultPath = pathModule.relative(context.basePath, resultPath); + const stat = binding.internalModuleStat(binding, resultPath); + ArrayPrototypePush(context.readdirResults, relativeResultPath); + + if (stat === 1) { + ArrayPrototypePush(context.pathsQueue, resultPath); + } + } +} + /** * An iterative algorithm for reading the entire contents of the `basePath` directory. * This function does not validate `basePath` as a directory. It is passed directly to @@ -1380,55 +1477,37 @@ function mkdirSync(path, options) { * @returns {string[] | Dirent[]} */ function readdirSyncRecursive(basePath, options) { - const withFileTypes = Boolean(options.withFileTypes); - const encoding = options.encoding; - - const readdirResults = []; - const pathsQueue = [basePath]; + const context = { + withFileTypes: Boolean(options.withFileTypes), + encoding: options.encoding, + basePath, + readdirResults: [], + pathsQueue: [basePath], + }; function read(path) { const readdirResult = binding.readdir( path, - encoding, - withFileTypes, + context.encoding, + context.withFileTypes, ); if (readdirResult === undefined) { return; } - if (withFileTypes) { - // Calling `readdir` with `withFileTypes=true`, the result is an array of arrays. - // The first array is the names, and the second array is the types. - // They are guaranteed to be the same length; hence, setting `length` to the length - // of the first array within the result. - const length = readdirResult[0].length; - for (let i = 0; i < length; i++) { - const dirent = getDirent(path, readdirResult[0][i], readdirResult[1][i]); - ArrayPrototypePush(readdirResults, dirent); - if (dirent.isDirectory()) { - ArrayPrototypePush(pathsQueue, pathModule.join(dirent.parentPath, dirent.name)); - } - } - } else { - for (let i = 0; i < readdirResult.length; i++) { - const resultPath = pathModule.join(path, readdirResult[i]); - const relativeResultPath = pathModule.relative(basePath, resultPath); - const stat = binding.internalModuleStat(binding, resultPath); - ArrayPrototypePush(readdirResults, relativeResultPath); - // 1 indicates directory - if (stat === 1) { - ArrayPrototypePush(pathsQueue, resultPath); - } - } - } + processReaddirResult({ + result: readdirResult, + currentPath: path, + context, + }); } - for (let i = 0; i < pathsQueue.length; i++) { - read(pathsQueue[i]); + for (let i = 0; i < context.pathsQueue.length; i++) { + read(context.pathsQueue[i]); } - return readdirResults; + return context.readdirResults; } /** @@ -1454,7 +1533,10 @@ function readdir(path, options, callback) { } if (options.recursive) { - callback(null, readdirSyncRecursive(path, options)); + // Make shallow copy to prevent mutating options from affecting results + options = copyObject(options); + + readdirRecursive(path, options, callback); return; } diff --git a/lib/internal/abort_controller.js b/lib/internal/abort_controller.js index cfcef5201c07b1..8ef3e6a79a6f5c 100644 --- a/lib/internal/abort_controller.js +++ b/lib/internal/abort_controller.js @@ -28,6 +28,7 @@ const { kResistStopPropagation, kWeakHandler, } = require('internal/event_target'); +const { kMaxEventTargetListeners } = require('events'); const { customInspectSymbol, kEmptyObject, @@ -96,8 +97,21 @@ const dependantSignalsCleanupRegistry = new SafeFinalizationRegistry((signalWeak } }); }); + const gcPersistentSignals = new SafeSet(); +const sourceSignalsCleanupRegistry = new SafeFinalizationRegistry(({ sourceSignalRef, composedSignalRef }) => { + const composedSignal = composedSignalRef.deref(); + if (composedSignal !== undefined) { + composedSignal[kSourceSignals].delete(sourceSignalRef); + + if (composedSignal[kSourceSignals].size === 0) { + // This signal will no longer abort. There's no need to keep it in the gcPersistentSignals set. + gcPersistentSignals.delete(composedSignal); + } + } +}); + const kAborted = Symbol('kAborted'); const kReason = Symbol('kReason'); const kCloneData = Symbol('kCloneData'); @@ -165,6 +179,7 @@ class AbortSignal extends EventTarget { } super(); + this[kMaxEventTargetListeners] = 0; const { aborted = false, reason = undefined, @@ -259,6 +274,10 @@ class AbortSignal extends EventTarget { resultSignal[kSourceSignals].add(signalWeakRef); signal[kDependantSignals].add(resultSignalWeakRef); dependantSignalsCleanupRegistry.register(resultSignal, signalWeakRef); + sourceSignalsCleanupRegistry.register(signal, { + sourceSignalRef: signalWeakRef, + composedSignalRef: resultSignalWeakRef, + }); } else if (!signal[kSourceSignals]) { continue; } else { @@ -276,6 +295,10 @@ class AbortSignal extends EventTarget { resultSignal[kSourceSignals].add(sourceSignalWeakRef); sourceSignal[kDependantSignals].add(resultSignalWeakRef); dependantSignalsCleanupRegistry.register(resultSignal, sourceSignalWeakRef); + sourceSignalsCleanupRegistry.register(signal, { + sourceSignalRef: sourceSignalWeakRef, + composedSignalRef: resultSignalWeakRef, + }); } } } @@ -432,6 +455,7 @@ class AbortController { */ get signal() { this.#signal ??= new AbortSignal(kDontThrowSymbol); + return this.#signal; } diff --git a/lib/internal/blocklist.js b/lib/internal/blocklist.js index d4eb35c9a70cb8..a992a6b8f0703c 100644 --- a/lib/internal/blocklist.js +++ b/lib/internal/blocklist.js @@ -43,6 +43,15 @@ class BlockList { this[kHandle][owner_symbol] = this; } + /** + * Returns true if the value is a BlockList + * @param {any} value + * @returns {boolean} + */ + static isBlockList(value) { + return value?.[kHandle] !== undefined; + } + [kInspect](depth, options) { if (depth < 0) return this; diff --git a/lib/internal/crypto/diffiehellman.js b/lib/internal/crypto/diffiehellman.js index 410759414f635d..9ebd9766f6bc8c 100644 --- a/lib/internal/crypto/diffiehellman.js +++ b/lib/internal/crypto/diffiehellman.js @@ -5,6 +5,7 @@ const { MathCeil, ObjectDefineProperty, SafeSet, + Uint8Array, } = primordials; const { Buffer } = require('buffer'); @@ -295,6 +296,8 @@ function diffieHellman(options) { return statelessDH(privateKey[kHandle], publicKey[kHandle]); } +let masks; + // The ecdhDeriveBits function is part of the Web Crypto API and serves both // deriveKeys and deriveBits functions. async function ecdhDeriveBits(algorithm, baseKey, length) { @@ -341,18 +344,25 @@ async function ecdhDeriveBits(algorithm, baseKey, length) { // If the length is not a multiple of 8 the nearest ceiled // multiple of 8 is sliced. - length = MathCeil(length / 8); - const { byteLength } = bits; + const sliceLength = MathCeil(length / 8); + const { byteLength } = bits; // If the length is larger than the derived secret, throw. - // Otherwise, we either return the secret or a truncated - // slice. - if (byteLength < length) + if (byteLength < sliceLength) throw lazyDOMException('derived bit length is too small', 'OperationError'); - return length === byteLength ? - bits : - ArrayBufferPrototypeSlice(bits, 0, length); + const slice = ArrayBufferPrototypeSlice(bits, 0, sliceLength); + + const mod = length % 8; + if (mod === 0) + return slice; + + // eslint-disable-next-line no-sparse-arrays + masks ||= [, 0b10000000, 0b11000000, 0b11100000, 0b11110000, 0b11111000, 0b11111100, 0b11111110]; + + const masked = new Uint8Array(slice); + masked[sliceLength - 1] = masked[sliceLength - 1] & masks[mod]; + return masked.buffer; } module.exports = { diff --git a/lib/internal/crypto/keys.js b/lib/internal/crypto/keys.js index 64f8bedc48281b..96771f09c8c941 100644 --- a/lib/internal/crypto/keys.js +++ b/lib/internal/crypto/keys.js @@ -1,7 +1,6 @@ 'use strict'; const { - ArrayFrom, ArrayPrototypeSlice, ObjectDefineProperties, ObjectDefineProperty, @@ -781,7 +780,7 @@ class CryptoKey { get usages() { if (!(this instanceof CryptoKey)) throw new ERR_INVALID_THIS('CryptoKey'); - return ArrayFrom(this[kKeyUsages]); + return this[kKeyUsages]; } } diff --git a/lib/internal/crypto/util.js b/lib/internal/crypto/util.js index a1d7e84dae36bc..3cf6599a186c3f 100644 --- a/lib/internal/crypto/util.js +++ b/lib/internal/crypto/util.js @@ -189,18 +189,22 @@ const kSupportedAlgorithms = { 'AES-GCM': 'AesKeyGenParams', 'AES-KW': 'AesKeyGenParams', 'HMAC': 'HmacKeyGenParams', + 'Ed25519': null, + 'X25519': null, }, 'sign': { 'RSASSA-PKCS1-v1_5': null, 'RSA-PSS': 'RsaPssParams', 'ECDSA': 'EcdsaParams', 'HMAC': null, + 'Ed25519': null, }, 'verify': { 'RSASSA-PKCS1-v1_5': null, 'RSA-PSS': 'RsaPssParams', 'ECDSA': 'EcdsaParams', 'HMAC': null, + 'Ed25519': null, }, 'importKey': { 'RSASSA-PKCS1-v1_5': 'RsaHashedImportParams', @@ -215,11 +219,14 @@ const kSupportedAlgorithms = { 'AES-CBC': null, 'AES-GCM': null, 'AES-KW': null, + 'Ed25519': null, + 'X25519': null, }, 'deriveBits': { 'HKDF': 'HkdfParams', 'PBKDF2': 'Pbkdf2Params', 'ECDH': 'EcdhKeyDeriveParams', + 'X25519': 'EcdhKeyDeriveParams', }, 'encrypt': { 'RSA-OAEP': 'RsaOaepParams', @@ -251,17 +258,6 @@ const kSupportedAlgorithms = { }; const experimentalAlgorithms = ObjectEntries({ - 'X25519': { - generateKey: null, - importKey: null, - deriveBits: 'EcdhKeyDeriveParams', - }, - 'Ed25519': { - generateKey: null, - sign: null, - verify: null, - importKey: null, - }, 'X448': { generateKey: null, importKey: null, diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 252eaa75fac22b..b2ca3c612bf6ef 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder'); const kEncoder = Symbol('encoder'); const kFatal = Symbol('kFatal'); const kUTF8FastPath = Symbol('kUTF8FastPath'); +const kLatin1FastPath = Symbol('kLatin1FastPath'); const kIgnoreBOM = Symbol('kIgnoreBOM'); const { @@ -55,6 +56,7 @@ const { encodeIntoResults, encodeUtf8String, decodeUTF8, + decodeLatin1, } = binding; const { Buffer } = require('buffer'); @@ -419,9 +421,10 @@ function makeTextDecoderICU() { this[kFatal] = Boolean(options?.fatal); // Only support fast path for UTF-8. this[kUTF8FastPath] = enc === 'utf-8'; + this[kLatin1FastPath] = enc === 'windows-1252'; this[kHandle] = undefined; - if (!this[kUTF8FastPath]) { + if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) { this.#prepareConverter(); } } @@ -438,11 +441,16 @@ function makeTextDecoderICU() { validateDecoder(this); this[kUTF8FastPath] &&= !(options?.stream); + this[kLatin1FastPath] &&= !(options?.stream); if (this[kUTF8FastPath]) { return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); } + if (this[kLatin1FastPath]) { + return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]); + } + this.#prepareConverter(); validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); diff --git a/lib/internal/errors.js b/lib/internal/errors.js index be5ce9fc24bc5d..4389d32e47619e 100644 --- a/lib/internal/errors.js +++ b/lib/internal/errors.js @@ -1555,6 +1555,9 @@ E('ERR_IPC_CHANNEL_CLOSED', 'Channel closed', Error); E('ERR_IPC_DISCONNECTED', 'IPC channel is already disconnected', Error); E('ERR_IPC_ONE_PIPE', 'Child process can have only one IPC pipe', Error); E('ERR_IPC_SYNC_FORK', 'IPC cannot be used with synchronous forks', Error); +E('ERR_IP_BLOCKED', function(ip) { + return `IP(${ip}) is blocked by net.BlockList`; +}, Error); E( 'ERR_LOADER_CHAIN_INCOMPLETE', '"%s" did not call the next hook in its chain and did not' + @@ -1651,9 +1654,12 @@ E('ERR_PARSE_ARGS_UNKNOWN_OPTION', (option, allowPositionals) => { E('ERR_PERFORMANCE_INVALID_TIMESTAMP', '%d is not a valid timestamp', TypeError); E('ERR_PERFORMANCE_MEASURE_INVALID_OPTIONS', '%s', TypeError); +E('ERR_QUIC_APPLICATION_ERROR', 'A QUIC application error occurred. %d [%s]', Error); E('ERR_QUIC_CONNECTION_FAILED', 'QUIC connection failed', Error); E('ERR_QUIC_ENDPOINT_CLOSED', 'QUIC endpoint closed: %s (%d)', Error); E('ERR_QUIC_OPEN_STREAM_FAILED', 'Failed to open QUIC stream', Error); +E('ERR_QUIC_TRANSPORT_ERROR', 'A QUIC transport error occurred. %d [%s]', Error); +E('ERR_QUIC_VERSION_NEGOTIATION_ERROR', 'The QUIC session requires version negotiation', Error); E('ERR_REQUIRE_ASYNC_MODULE', 'require() cannot be used on an ESM ' + 'graph with top-level await. Use import() instead. To see where the' + ' top-level await comes from, use --experimental-print-required-tla.', Error); diff --git a/lib/internal/fs/promises.js b/lib/internal/fs/promises.js index bdb4abfdf39aae..4567f7ff36a01d 100644 --- a/lib/internal/fs/promises.js +++ b/lib/internal/fs/promises.js @@ -943,6 +943,10 @@ async function readdirRecursive(originalPath, options) { async function readdir(path, options) { options = getOptions(options); + + // Make shallow copy to prevent mutating options from affecting results + options = copyObject(options); + path = getValidatedPath(path); if (options.recursive) { return readdirRecursive(path, options); diff --git a/lib/internal/http2/core.js b/lib/internal/http2/core.js index 3e21a19bf02149..cb29553c05536d 100644 --- a/lib/internal/http2/core.js +++ b/lib/internal/http2/core.js @@ -3136,9 +3136,13 @@ function initializeOptions(options) { function initializeTLSOptions(options, servername) { options = initializeOptions(options); - options.ALPNProtocols = ['h2']; - if (options.allowHTTP1 === true) - options.ALPNProtocols.push('http/1.1'); + + if (!options.ALPNCallback) { + options.ALPNProtocols = ['h2']; + if (options.allowHTTP1 === true) + options.ALPNProtocols.push('http/1.1'); + } + if (servername !== undefined && !options.servername) options.servername = servername; return options; diff --git a/lib/internal/main/eval_string.js b/lib/internal/main/eval_string.js index 2ca016c4960e85..a3be08551425b1 100644 --- a/lib/internal/main/eval_string.js +++ b/lib/internal/main/eval_string.js @@ -14,8 +14,8 @@ const { markBootstrapComplete, } = require('internal/process/pre_execution'); const { evalModuleEntryPoint, evalScript } = require('internal/process/execution'); -const { addBuiltinLibsToObject, stripTypeScriptTypes } = require('internal/modules/helpers'); - +const { addBuiltinLibsToObject } = require('internal/modules/helpers'); +const { stripTypeScriptModuleTypes } = require('internal/modules/typescript'); const { getOptionValue } = require('internal/options'); prepareMainThreadExecution(); @@ -24,7 +24,7 @@ markBootstrapComplete(); const code = getOptionValue('--eval'); const source = getOptionValue('--experimental-strip-types') ? - stripTypeScriptTypes(code) : + stripTypeScriptModuleTypes(code) : code; const print = getOptionValue('--print'); diff --git a/lib/internal/modules/cjs/loader.js b/lib/internal/modules/cjs/loader.js index d6897f84d26a5e..ade9abadb3e5f4 100644 --- a/lib/internal/modules/cjs/loader.js +++ b/lib/internal/modules/cjs/loader.js @@ -125,10 +125,10 @@ const { pathToFileURL, fileURLToPath, isURL } = require('internal/url'); const { pendingDeprecate, emitExperimentalWarning, - isUnderNodeModules, kEmptyObject, setOwnProperty, getLazy, + isUnderNodeModules, isWindows, } = require('internal/util'); const { @@ -153,13 +153,12 @@ const { setHasStartedUserCJSExecution, stripBOM, toRealPath, - stripTypeScriptTypes, } = require('internal/modules/helpers'); +const { stripTypeScriptModuleTypes } = require('internal/modules/typescript'); const packageJsonReader = require('internal/modules/package_json_reader'); const { getOptionValue, getEmbedderOptions } = require('internal/options'); const shouldReportRequiredModules = getLazy(() => process.env.WATCH_REPORT_DEPENDENCIES); -const permission = require('internal/process/permission'); const { vm_dynamic_import_default_internal, } = internalBinding('symbols'); @@ -171,7 +170,6 @@ const { ERR_REQUIRE_CYCLE_MODULE, ERR_REQUIRE_ESM, ERR_UNKNOWN_BUILTIN_MODULE, - ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING, }, setArrowMessage, } = require('internal/errors'); @@ -605,11 +603,11 @@ function trySelf(parentPath, request) { try { const { packageExportsResolve } = require('internal/modules/esm/resolve'); return finalizeEsmResolution(packageExportsResolve( - pathToFileURL(pkg.path + '/package.json'), expansion, pkg.data, + pathToFileURL(pkg.path), expansion, pkg.data, pathToFileURL(parentPath), getCjsConditions()), parentPath, pkg.path); } catch (e) { if (e.code === 'ERR_MODULE_NOT_FOUND') { - throw createEsmNotFoundErr(request, pkg.path + '/package.json'); + throw createEsmNotFoundErr(request, pkg.path); } throw e; } @@ -736,11 +734,8 @@ Module._findPath = function(request, paths, isMain) { // For each path for (let i = 0; i < paths.length; i++) { // Don't search further if path doesn't exist - // or doesn't have permission to it const curPath = paths[i]; - if (insidePath && curPath && - ((permission.isEnabled() && !permission.has('fs.read', curPath)) || _stat(curPath) < 1) - ) { + if (insidePath && curPath && _stat(curPath) < 1) { continue; } @@ -1204,14 +1199,15 @@ Module._resolveFilename = function(request, parent, isMain, options) { if (request[0] === '#' && (parent?.filename || parent?.id === '')) { const parentPath = parent?.filename ?? process.cwd() + path.sep; - const pkg = packageJsonReader.getNearestParentPackageJSON(parentPath) || { __proto__: null }; - if (pkg.data?.imports != null) { + const pkg = packageJsonReader.getNearestParentPackageJSON(parentPath); + if (pkg?.data.imports != null) { try { const { packageImportsResolve } = require('internal/modules/esm/resolve'); return finalizeEsmResolution( - packageImportsResolve(request, pathToFileURL(parentPath), - getCjsConditions()), parentPath, - pkg.path); + packageImportsResolve(request, pathToFileURL(parentPath), getCjsConditions()), + parentPath, + pkg.path, + ); } catch (e) { if (e.code === 'ERR_MODULE_NOT_FOUND') { throw createEsmNotFoundErr(request); @@ -1271,8 +1267,7 @@ function finalizeEsmResolution(resolved, parentPath, pkgPath) { if (actual) { return actual; } - const err = createEsmNotFoundErr(filename, - path.resolve(pkgPath, 'package.json')); + const err = createEsmNotFoundErr(filename, pkgPath); throw err; } @@ -1343,7 +1338,7 @@ Module.prototype.require = function(id) { } }; -let emittedRequireModuleWarning = false; +let requireModuleWarningMode; /** * Resolve and evaluate it synchronously as ESM if it's ESM. * @param {Module} mod CJS module instance @@ -1352,10 +1347,7 @@ let emittedRequireModuleWarning = false; function loadESMFromCJS(mod, filename) { let source = getMaybeCachedSource(mod, filename); if (getOptionValue('--experimental-strip-types') && path.extname(filename) === '.mts') { - if (isUnderNodeModules(filename)) { - throw new ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING(filename); - } - source = stripTypeScriptTypes(source, filename); + source = stripTypeScriptModuleTypes(source, filename); } const cascadedLoader = require('internal/modules/esm/loader').getOrInitializeCascadedLoader(); const isMain = mod[kIsMainSymbol]; @@ -1369,17 +1361,22 @@ function loadESMFromCJS(mod, filename) { } else { const parent = mod[kModuleParent]; - if (!emittedRequireModuleWarning) { + requireModuleWarningMode ??= getOptionValue('--trace-require-module'); + if (requireModuleWarningMode) { let shouldEmitWarning = false; - // Check if the require() comes from node_modules. - if (parent) { - shouldEmitWarning = !isUnderNodeModules(parent.filename); - } else if (mod[kIsCachedByESMLoader]) { - // It comes from the require() built for `import cjs` and doesn't have a parent recorded - // in the CJS module instance. Inspect the stack trace to see if the require() - // comes from node_modules and reduce the noise. If there are more than 100 frames, - // just give up and assume it is under node_modules. - shouldEmitWarning = !isInsideNodeModules(100, true); + if (requireModuleWarningMode === 'no-node-modules') { + // Check if the require() comes from node_modules. + if (parent) { + shouldEmitWarning = !isUnderNodeModules(parent.filename); + } else if (mod[kIsCachedByESMLoader]) { + // It comes from the require() built for `import cjs` and doesn't have a parent recorded + // in the CJS module instance. Inspect the stack trace to see if the require() + // comes from node_modules and reduce the noise. If there are more than 100 frames, + // just give up and assume it is under node_modules. + shouldEmitWarning = !isInsideNodeModules(100, true); + } + } else { + shouldEmitWarning = true; } if (shouldEmitWarning) { let messagePrefix; @@ -1405,7 +1402,7 @@ function loadESMFromCJS(mod, filename) { messagePrefix, undefined, parent?.require); - emittedRequireModuleWarning = true; + requireModuleWarningMode = true; } } const { @@ -1591,11 +1588,8 @@ function getMaybeCachedSource(mod, filename) { } function loadCTS(module, filename) { - if (isUnderNodeModules(filename)) { - throw new ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING(filename); - } const source = getMaybeCachedSource(module, filename); - const code = stripTypeScriptTypes(source, filename); + const code = stripTypeScriptModuleTypes(source, filename); module._compile(code, filename, 'commonjs'); } @@ -1605,12 +1599,9 @@ function loadCTS(module, filename) { * @param {string} filename The file path of the module */ function loadTS(module, filename) { - if (isUnderNodeModules(filename)) { - throw new ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING(filename); - } // If already analyzed the source, then it will be cached. const source = getMaybeCachedSource(module, filename); - const content = stripTypeScriptTypes(source, filename); + const content = stripTypeScriptModuleTypes(source, filename); let format; const pkg = packageJsonReader.getNearestParentPackageJSON(filename); // Function require shouldn't be used in ES modules. @@ -1622,7 +1613,7 @@ function loadTS(module, filename) { const parent = module[kModuleParent]; const parentPath = parent?.filename; - const packageJsonPath = path.resolve(pkg.path, 'package.json'); + const packageJsonPath = pkg.path; const usesEsm = containsModuleSyntax(content, filename); const err = new ERR_REQUIRE_ESM(filename, usesEsm, parentPath, packageJsonPath); @@ -1630,7 +1621,7 @@ function loadTS(module, filename) { if (Module._cache[parentPath]) { let parentSource; try { - parentSource = stripTypeScriptTypes(fs.readFileSync(parentPath, 'utf8'), parentPath); + parentSource = stripTypeScriptModuleTypes(fs.readFileSync(parentPath, 'utf8'), parentPath); } catch { // Continue regardless of error. } @@ -1681,7 +1672,7 @@ Module._extensions['.js'] = function(module, filename) { // This is an error path because `require` of a `.js` file in a `"type": "module"` scope is not allowed. const parent = module[kModuleParent]; const parentPath = parent?.filename; - const packageJsonPath = path.resolve(pkg.path, 'package.json'); + const packageJsonPath = pkg.path; const usesEsm = containsModuleSyntax(content, filename); const err = new ERR_REQUIRE_ESM(filename, usesEsm, parentPath, packageJsonPath); diff --git a/lib/internal/modules/esm/get_format.js b/lib/internal/modules/esm/get_format.js index 26d0bace6cdd39..9519f947b8dfdc 100644 --- a/lib/internal/modules/esm/get_format.js +++ b/lib/internal/modules/esm/get_format.js @@ -164,9 +164,10 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE // Since experimental-strip-types depends on detect-module, we always return null // if source is undefined. if (!source) { return null; } - const { stripTypeScriptTypes, stringify } = require('internal/modules/helpers'); + const { stringify } = require('internal/modules/helpers'); + const { stripTypeScriptModuleTypes } = require('internal/modules/typescript'); const stringifiedSource = stringify(source); - const parsedSource = stripTypeScriptTypes(stringifiedSource, fileURLToPath(url)); + const parsedSource = stripTypeScriptModuleTypes(stringifiedSource, fileURLToPath(url)); const detectedFormat = detectModuleFormat(parsedSource, url); const format = `${detectedFormat}-typescript`; if (format === 'module-typescript' && foundPackageJson) { diff --git a/lib/internal/modules/esm/load.js b/lib/internal/modules/esm/load.js index d56dae3f001b1c..5ba13096b98047 100644 --- a/lib/internal/modules/esm/load.js +++ b/lib/internal/modules/esm/load.js @@ -4,7 +4,6 @@ const { RegExpPrototypeExec, } = primordials; const { - isUnderNodeModules, kEmptyObject, } = require('internal/util'); @@ -23,7 +22,6 @@ const { ERR_INVALID_URL, ERR_UNKNOWN_MODULE_FORMAT, ERR_UNSUPPORTED_ESM_URL_SCHEME, - ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING, } = require('internal/errors').codes; const { @@ -131,12 +129,6 @@ async function defaultLoad(url, context = kEmptyObject) { validateAttributes(url, format, importAttributes); - if (getOptionValue('--experimental-strip-types') && - (format === 'module-typescript' || format === 'commonjs-typescript') && - isUnderNodeModules(url)) { - throw new ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING(url); - } - return { __proto__: null, format, diff --git a/lib/internal/modules/esm/resolve.js b/lib/internal/modules/esm/resolve.js index 61c043e35c6ce9..bfd9bd3d127404 100644 --- a/lib/internal/modules/esm/resolve.js +++ b/lib/internal/modules/esm/resolve.js @@ -356,7 +356,6 @@ function invalidPackageTarget( const invalidSegmentRegEx = /(^|\\|\/)((\.|%2e)(\.|%2e)?|(n|%6e|%4e)(o|%6f|%4f)(d|%64|%44)(e|%65|%45)(_|%5f)(m|%6d|%4d)(o|%6f|%4f)(d|%64|%44)(u|%75|%55)(l|%6c|%4c)(e|%65|%45)(s|%73|%53))?(\\|\/|$)/i; const deprecatedInvalidSegmentRegEx = /(^|\\|\/)((\.|%2e)(\.|%2e)?|(n|%6e|%4e)(o|%6f|%4f)(d|%64|%44)(e|%65|%45)(_|%5f)(m|%6d|%4d)(o|%6f|%4f)(d|%64|%44)(u|%75|%55)(l|%6c|%4c)(e|%65|%45)(s|%73|%53))(\\|\/|$)/i; -const invalidPackageNameRegEx = /^\.|%|\\/; const patternRegEx = /\*/g; /** @@ -752,44 +751,6 @@ function packageImportsResolve(name, base, conditions) { throw importNotDefined(name, packageJSONUrl, base); } -/** - * Parse a package name from a specifier. - * @param {string} specifier - The import specifier. - * @param {string | URL | undefined} base - The parent URL. - */ -function parsePackageName(specifier, base) { - let separatorIndex = StringPrototypeIndexOf(specifier, '/'); - let validPackageName = true; - let isScoped = false; - if (specifier[0] === '@') { - isScoped = true; - if (separatorIndex === -1 || specifier.length === 0) { - validPackageName = false; - } else { - separatorIndex = StringPrototypeIndexOf( - specifier, '/', separatorIndex + 1); - } - } - - const packageName = separatorIndex === -1 ? - specifier : StringPrototypeSlice(specifier, 0, separatorIndex); - - // Package name cannot have leading . and cannot have percent-encoding or - // \\ separators. - if (RegExpPrototypeExec(invalidPackageNameRegEx, packageName) !== null) { - validPackageName = false; - } - - if (!validPackageName) { - throw new ERR_INVALID_MODULE_SPECIFIER( - specifier, 'is not a valid package name', fileURLToPath(base)); - } - - const packageSubpath = '.' + (separatorIndex === -1 ? '' : - StringPrototypeSlice(specifier, separatorIndex)); - - return { packageName, packageSubpath, isScoped }; -} /** * Resolves a package specifier to a URL. @@ -804,57 +765,24 @@ function packageResolve(specifier, base, conditions) { return new URL('node:' + specifier); } - const { packageName, packageSubpath, isScoped } = - parsePackageName(specifier, base); + const { packageJSONUrl, packageJSONPath, packageSubpath } = packageJsonReader.getPackageJSONURL(specifier, base); - // ResolveSelf - const packageConfig = packageJsonReader.getPackageScopeConfig(base); - if (packageConfig.exists) { - if (packageConfig.exports != null && packageConfig.name === packageName) { - const packageJSONUrl = pathToFileURL(packageConfig.pjsonPath); - return packageExportsResolve( - packageJSONUrl, packageSubpath, packageConfig, base, conditions); - } - } + const packageConfig = packageJsonReader.read(packageJSONPath, { __proto__: null, specifier, base, isESM: true }); - let packageJSONUrl = - new URL('./node_modules/' + packageName + '/package.json', base); - let packageJSONPath = fileURLToPath(packageJSONUrl); - let lastPath; - do { - const stat = internalFsBinding.internalModuleStat( - internalFsBinding, - StringPrototypeSlice(packageJSONPath, 0, packageJSONPath.length - 13), + // Package match. + if (packageConfig.exports != null) { + return packageExportsResolve( + packageJSONUrl, packageSubpath, packageConfig, base, conditions); + } + if (packageSubpath === '.') { + return legacyMainResolve( + packageJSONUrl, + packageConfig, + base, ); - // Check for !stat.isDirectory() - if (stat !== 1) { - lastPath = packageJSONPath; - packageJSONUrl = new URL((isScoped ? - '../../../../node_modules/' : '../../../node_modules/') + - packageName + '/package.json', packageJSONUrl); - packageJSONPath = fileURLToPath(packageJSONUrl); - continue; - } - - // Package match. - const packageConfig = packageJsonReader.read(packageJSONPath, { __proto__: null, specifier, base, isESM: true }); - if (packageConfig.exports != null) { - return packageExportsResolve( - packageJSONUrl, packageSubpath, packageConfig, base, conditions); - } - if (packageSubpath === '.') { - return legacyMainResolve( - packageJSONUrl, - packageConfig, - base, - ); - } - - return new URL(packageSubpath, packageJSONUrl); - // Cross-platform root check. - } while (packageJSONPath.length !== lastPath.length); + } - throw new ERR_MODULE_NOT_FOUND(packageName, fileURLToPath(base), null); + return new URL(packageSubpath, packageJSONUrl); } /** @@ -1105,10 +1033,11 @@ module.exports = { decorateErrorWithCommonJSHints, defaultResolve, encodedSepRegEx, + legacyMainResolve, packageExportsResolve, packageImportsResolve, + packageResolve, throwIfInvalidParentURL, - legacyMainResolve, }; // cycle diff --git a/lib/internal/modules/esm/translators.js b/lib/internal/modules/esm/translators.js index bbe2380b0ed6d7..491dc3f450733a 100644 --- a/lib/internal/modules/esm/translators.js +++ b/lib/internal/modules/esm/translators.js @@ -30,10 +30,10 @@ const { assertBufferSource, loadBuiltinModule, stringify, - stripTypeScriptTypes, stripBOM, urlToFilename, } = require('internal/modules/helpers'); +const { stripTypeScriptModuleTypes } = require('internal/modules/typescript'); const { kIsCachedByESMLoader, Module: CJSModule, @@ -244,7 +244,7 @@ translators.set('require-commonjs', (url, source, isMain) => { translators.set('require-commonjs-typescript', (url, source, isMain) => { emitExperimentalWarning('Type Stripping'); assert(cjsParse); - const code = stripTypeScriptTypes(stringify(source), url); + const code = stripTypeScriptModuleTypes(stringify(source), url); return createCJSModuleWrap(url, code); }); @@ -459,7 +459,7 @@ translators.set('wasm', async function(url, source) { translators.set('commonjs-typescript', function(url, source) { emitExperimentalWarning('Type Stripping'); assertBufferSource(source, true, 'load'); - const code = stripTypeScriptTypes(stringify(source), url); + const code = stripTypeScriptModuleTypes(stringify(source), url); debug(`Translating TypeScript ${url}`); return FunctionPrototypeCall(translators.get('commonjs'), this, url, code, false); }); @@ -468,7 +468,7 @@ translators.set('commonjs-typescript', function(url, source) { translators.set('module-typescript', function(url, source) { emitExperimentalWarning('Type Stripping'); assertBufferSource(source, true, 'load'); - const code = stripTypeScriptTypes(stringify(source), url); + const code = stripTypeScriptModuleTypes(stringify(source), url); debug(`Translating TypeScript ${url}`); return FunctionPrototypeCall(translators.get('module'), this, url, code, false); }); diff --git a/lib/internal/modules/esm/worker.js b/lib/internal/modules/esm/worker.js index 311d77fb099384..0213df7a92a0eb 100644 --- a/lib/internal/modules/esm/worker.js +++ b/lib/internal/modules/esm/worker.js @@ -215,8 +215,6 @@ async function customizedModuleWorker(lock, syncCommPort, errorHandler) { (port ?? syncCommPort).postMessage(wrapMessage('error', exception)); } - AtomicsAdd(lock, WORKER_TO_MAIN_THREAD_NOTIFICATION, 1); - AtomicsNotify(lock, WORKER_TO_MAIN_THREAD_NOTIFICATION); if (shouldRemoveGlobalErrorHandler) { process.off('uncaughtException', errorHandler); } @@ -225,6 +223,10 @@ async function customizedModuleWorker(lock, syncCommPort, errorHandler) { // We keep checking for new messages to not miss any. clearImmediate(immediate); immediate = setImmediate(checkForMessages).unref(); + // To prevent the main thread from terminating before this function completes after unlocking, + // the following process is executed at the end of the function. + AtomicsAdd(lock, WORKER_TO_MAIN_THREAD_NOTIFICATION, 1); + AtomicsNotify(lock, WORKER_TO_MAIN_THREAD_NOTIFICATION); } } diff --git a/lib/internal/modules/helpers.js b/lib/internal/modules/helpers.js index 3153ecec198392..4445c0dc3fae73 100644 --- a/lib/internal/modules/helpers.js +++ b/lib/internal/modules/helpers.js @@ -15,7 +15,6 @@ const { const { ERR_INVALID_ARG_TYPE, ERR_INVALID_RETURN_PROPERTY_VALUE, - ERR_INVALID_TYPESCRIPT_SYNTAX, } = require('internal/errors').codes; const { BuiltinModule } = require('internal/bootstrap/realm'); @@ -26,9 +25,8 @@ const path = require('path'); const { pathToFileURL, fileURLToPath } = require('internal/url'); const assert = require('internal/assert'); -const { Buffer } = require('buffer'); const { getOptionValue } = require('internal/options'); -const { assertTypeScript, setOwnProperty, getLazy } = require('internal/util'); +const { setOwnProperty, getLazy } = require('internal/util'); const { inspect } = require('internal/util/inspect'); const lazyTmpdir = getLazy(() => require('os').tmpdir()); @@ -313,72 +311,6 @@ function getBuiltinModule(id) { return normalizedId ? require(normalizedId) : undefined; } -/** - * The TypeScript parsing mode, either 'strip-only' or 'transform'. - * @type {string} - */ -const getTypeScriptParsingMode = getLazy(() => - (getOptionValue('--experimental-transform-types') ? 'transform' : 'strip-only'), -); - -/** - * Load the TypeScript parser. - * and returns an object with a `code` property. - * @returns {Function} The TypeScript parser function. - */ -const loadTypeScriptParser = getLazy(() => { - assertTypeScript(); - const amaro = require('internal/deps/amaro/dist/index'); - return amaro.transformSync; -}); - -/** - * - * @param {string} source the source code - * @param {object} options the options to pass to the parser - * @returns {TransformOutput} an object with a `code` property. - */ -function parseTypeScript(source, options) { - const parse = loadTypeScriptParser(); - try { - return parse(source, options); - } catch (error) { - throw new ERR_INVALID_TYPESCRIPT_SYNTAX(error); - } -} - -/** - * @typedef {object} TransformOutput - * @property {string} code The compiled code. - * @property {string} [map] The source maps (optional). - * - * Performs type-stripping to TypeScript source code. - * @param {string} source TypeScript code to parse. - * @param {string} filename The filename of the source code. - * @returns {TransformOutput} The stripped TypeScript code. - */ -function stripTypeScriptTypes(source, filename) { - assert(typeof source === 'string'); - const options = { - __proto__: null, - mode: getTypeScriptParsingMode(), - sourceMap: getOptionValue('--enable-source-maps'), - filename, - }; - const { code, map } = parseTypeScript(source, options); - if (map) { - // TODO(@marco-ippolito) When Buffer.transcode supports utf8 to - // base64 transformation, we should change this line. - const base64SourceMap = Buffer.from(map).toString('base64'); - return `${code}\n\n//# sourceMappingURL=data:application/json;base64,${base64SourceMap}`; - } - // Source map is not necessary in strip-only mode. However, to map the source - // file in debuggers to the original TypeScript source, add a sourceURL magic - // comment to hint that it is a generated source. - return `${code}\n\n//# sourceURL=${filename}`; -} - - /** * Enable on-disk compiled cache for all user modules being complied in the current Node.js instance * after this method is called. @@ -481,7 +413,6 @@ module.exports = { loadBuiltinModule, makeRequireFunction, normalizeReferrerURL, - stripTypeScriptTypes, stringify, stripBOM, toRealPath, diff --git a/lib/internal/modules/package_json_reader.js b/lib/internal/modules/package_json_reader.js index 9a9dcebb799c00..92f46ed3971341 100644 --- a/lib/internal/modules/package_json_reader.js +++ b/lib/internal/modules/package_json_reader.js @@ -4,29 +4,53 @@ const { ArrayIsArray, JSONParse, ObjectDefineProperty, - StringPrototypeLastIndexOf, + RegExpPrototypeExec, + StringPrototypeIndexOf, StringPrototypeSlice, } = primordials; -const modulesBinding = internalBinding('modules'); -const { resolve, sep } = require('path'); +const { + fileURLToPath, + isURL, + pathToFileURL, + URL, +} = require('internal/url'); +const { canParse: URLCanParse } = internalBinding('url'); +const { + codes: { + ERR_INVALID_MODULE_SPECIFIER, + ERR_MISSING_ARGS, + ERR_MODULE_NOT_FOUND, + }, +} = require('internal/errors'); const { kEmptyObject } = require('internal/util'); +const modulesBinding = internalBinding('modules'); +const path = require('path'); +const { validateString } = require('internal/validators'); +const internalFsBinding = internalBinding('fs'); /** - * @param {string} path - * @param {import('typings/internalBinding/modules').SerializedPackageConfig} contents - * @returns {import('typings/internalBinding/modules').PackageConfig} + * @typedef {import('typings/internalBinding/modules').DeserializedPackageConfig} DeserializedPackageConfig + * @typedef {import('typings/internalBinding/modules').PackageConfig} PackageConfig + * @typedef {import('typings/internalBinding/modules').SerializedPackageConfig} SerializedPackageConfig + */ + +/** + * @param {URL['pathname']} path + * @param {SerializedPackageConfig} contents + * @returns {DeserializedPackageConfig} */ function deserializePackageJSON(path, contents) { if (contents === undefined) { return { - __proto__: null, + data: { + __proto__: null, + type: 'none', // Ignore unknown types for forwards compatibility + }, exists: false, - pjsonPath: path, - type: 'none', // Ignore unknown types for forwards compatibility + path, }; } - let pjsonPath = path; const { 0: name, 1: main, @@ -36,37 +60,40 @@ function deserializePackageJSON(path, contents) { 5: optionalFilePath, } = contents; - // This is required to be used in getPackageScopeConfig. - if (optionalFilePath) { - pjsonPath = optionalFilePath; - } - - // The imports and exports fields can be either undefined or a string. - // - If it's a string, it's either plain string or a stringified JSON string. - // - If it's a stringified JSON string, it starts with either '[' or '{'. - const requiresJSONParse = (value) => (value !== undefined && (value[0] === '[' || value[0] === '{')); + const pjsonPath = optionalFilePath ?? path; return { - __proto__: null, - exists: true, - pjsonPath, - name, - main, - type, - // This getters are used to lazily parse the imports and exports fields. - get imports() { - const value = requiresJSONParse(plainImports) ? JSONParse(plainImports) : plainImports; - ObjectDefineProperty(this, 'imports', { __proto__: null, value }); - return this.imports; - }, - get exports() { - const value = requiresJSONParse(plainExports) ? JSONParse(plainExports) : plainExports; - ObjectDefineProperty(this, 'exports', { __proto__: null, value }); - return this.exports; + data: { + __proto__: null, + ...(name != null && { name }), + ...(main != null && { main }), + ...(type != null && { type }), + ...(plainImports != null && { + // This getters are used to lazily parse the imports and exports fields. + get imports() { + const value = requiresJSONParse(plainImports) ? JSONParse(plainImports) : plainImports; + ObjectDefineProperty(this, 'imports', { __proto__: null, value }); + return this.imports; + }, + }), + ...(plainExports != null && { + get exports() { + const value = requiresJSONParse(plainExports) ? JSONParse(plainExports) : plainExports; + ObjectDefineProperty(this, 'exports', { __proto__: null, value }); + return this.exports; + }, + }), }, + exists: true, + path: pjsonPath, }; } +// The imports and exports fields can be either undefined or a string. +// - If it's a string, it's either plain string or a stringified JSON string. +// - If it's a stringified JSON string, it starts with either '[' or '{'. +const requiresJSONParse = (value) => (value !== undefined && (value[0] === '[' || value[0] === '{')); + /** * Reads a package.json file and returns the parsed contents. * @param {string} jsonPath @@ -75,7 +102,7 @@ function deserializePackageJSON(path, contents) { * specifier?: URL | string, * isESM?: boolean, * }} options - * @returns {import('typings/internalBinding/modules').PackageConfig} + * @returns {PackageConfig} */ function read(jsonPath, { base, specifier, isESM } = kEmptyObject) { // This function will be called by both CJS and ESM, so we need to make sure @@ -87,7 +114,14 @@ function read(jsonPath, { base, specifier, isESM } = kEmptyObject) { specifier == null ? undefined : `${specifier}`, ); - return deserializePackageJSON(jsonPath, parsed); + const result = deserializePackageJSON(jsonPath, parsed); + + return { + __proto__: null, + ...result.data, + exists: result.exists, + pjsonPath: result.path, + }; } /** @@ -98,14 +132,14 @@ function read(jsonPath, { base, specifier, isESM } = kEmptyObject) { */ function readPackage(requestPath) { // TODO(@anonrig): Remove this function. - return read(resolve(requestPath, 'package.json')); + return read(path.resolve(requestPath, 'package.json')); } /** * Get the nearest parent package.json file from a given path. * Return the package.json data and the path to the package.json file, or undefined. * @param {string} checkPath The path to start searching from. - * @returns {undefined | {data: import('typings/internalBinding/modules').PackageConfig, path: string}} + * @returns {undefined | DeserializedPackageConfig} */ function getNearestParentPackageJSON(checkPath) { const result = modulesBinding.getNearestParentPackageJSON(checkPath); @@ -114,13 +148,7 @@ function getNearestParentPackageJSON(checkPath) { return undefined; } - const data = deserializePackageJSON(checkPath, result); - - // Path should be the root folder of the matched package.json - // For example for ~/path/package.json, it should be ~/path - const path = StringPrototypeSlice(data.pjsonPath, 0, StringPrototypeLastIndexOf(data.pjsonPath, sep)); - - return { data, path }; + return deserializePackageJSON(checkPath, result); } /** @@ -132,7 +160,14 @@ function getPackageScopeConfig(resolved) { const result = modulesBinding.getPackageScopeConfig(`${resolved}`); if (ArrayIsArray(result)) { - return deserializePackageJSON(`${resolved}`, result); + const { data, exists, path } = deserializePackageJSON(`${resolved}`, result); + + return { + __proto__: null, + ...data, + exists, + pjsonPath: path, + }; } // This means that the response is a string @@ -154,10 +189,137 @@ function getPackageType(url) { return getPackageScopeConfig(url).type; } +const invalidPackageNameRegEx = /^\.|%|\\/; +/** + * Parse a package name from a specifier. + * @param {string} specifier - The import specifier. + * @param {string | URL | undefined} base - The parent URL. + */ +function parsePackageName(specifier, base) { + let separatorIndex = StringPrototypeIndexOf(specifier, '/'); + let validPackageName = true; + let isScoped = false; + if (specifier[0] === '@') { + isScoped = true; + if (separatorIndex === -1 || specifier.length === 0) { + validPackageName = false; + } else { + separatorIndex = StringPrototypeIndexOf( + specifier, '/', separatorIndex + 1); + } + } + + const packageName = separatorIndex === -1 ? + specifier : StringPrototypeSlice(specifier, 0, separatorIndex); + + // Package name cannot have leading . and cannot have percent-encoding or + // \\ separators. + if (RegExpPrototypeExec(invalidPackageNameRegEx, packageName) !== null) { + validPackageName = false; + } + + if (!validPackageName) { + throw new ERR_INVALID_MODULE_SPECIFIER( + specifier, 'is not a valid package name', fileURLToPath(base)); + } + + const packageSubpath = '.' + (separatorIndex === -1 ? '' : + StringPrototypeSlice(specifier, separatorIndex)); + + return { packageName, packageSubpath, isScoped }; +} + +function getPackageJSONURL(specifier, base) { + const { packageName, packageSubpath, isScoped } = + parsePackageName(specifier, base); + + // ResolveSelf + const packageConfig = getPackageScopeConfig(base); + if (packageConfig.exists) { + if (packageConfig.exports != null && packageConfig.name === packageName) { + const packageJSONPath = packageConfig.pjsonPath; + return { packageJSONUrl: pathToFileURL(packageJSONPath), packageJSONPath, packageSubpath }; + } + } + + let packageJSONUrl = + new URL('./node_modules/' + packageName + '/package.json', base); + let packageJSONPath = fileURLToPath(packageJSONUrl); + let lastPath; + do { + const stat = internalFsBinding.internalModuleStat( + internalFsBinding, + StringPrototypeSlice(packageJSONPath, 0, packageJSONPath.length - 13), + ); + // Check for !stat.isDirectory() + if (stat !== 1) { + lastPath = packageJSONPath; + packageJSONUrl = new URL((isScoped ? + '../../../../node_modules/' : '../../../node_modules/') + + packageName + '/package.json', packageJSONUrl); + packageJSONPath = fileURLToPath(packageJSONUrl); + continue; + } + + // Package match. + return { packageJSONUrl, packageJSONPath, packageSubpath }; + } while (packageJSONPath.length !== lastPath.length); + + throw new ERR_MODULE_NOT_FOUND(packageName, fileURLToPath(base), null); +} + +const pjsonImportAttributes = { __proto__: null, type: 'json' }; +let cascadedLoader; +/** + * @param {URL['href'] | string | URL} specifier The location for which to get the "root" package.json + * @param {URL['href'] | string | URL} [base] The location of the current module (ex file://tmp/foo.js). + */ +function findPackageJSON(specifier, base = 'data:') { + if (arguments.length === 0) { + throw new ERR_MISSING_ARGS('specifier'); + } + try { + specifier = `${specifier}`; + } catch { + validateString(specifier, 'specifier'); + } + + let parentURL = base; + if (!isURL(base)) { + validateString(base, 'base'); + parentURL = path.isAbsolute(base) ? pathToFileURL(base) : new URL(base); + } + + if (specifier && specifier[0] !== '.' && specifier[0] !== '/' && !URLCanParse(specifier)) { + // If `specifier` is a bare specifier. + const { packageJSONPath } = getPackageJSONURL(specifier, parentURL); + return packageJSONPath; + } + + let resolvedTarget; + cascadedLoader ??= require('internal/modules/esm/loader').getOrInitializeCascadedLoader(); + + try { + resolvedTarget = cascadedLoader.resolve(specifier, `${parentURL}`, pjsonImportAttributes).url; + } catch (err) { + if (err.code === 'ERR_UNSUPPORTED_DIR_IMPORT') { + resolvedTarget = err.url; + } else { + throw err; + } + } + + const pkg = getNearestParentPackageJSON(fileURLToPath(resolvedTarget)); + + return pkg?.path; +} + module.exports = { read, readPackage, getNearestParentPackageJSON, getPackageScopeConfig, getPackageType, + getPackageJSONURL, + findPackageJSON, }; diff --git a/lib/internal/modules/typescript.js b/lib/internal/modules/typescript.js new file mode 100644 index 00000000000000..d1b58e86c72ee7 --- /dev/null +++ b/lib/internal/modules/typescript.js @@ -0,0 +1,146 @@ +'use strict'; + +const { + validateBoolean, + validateOneOf, + validateObject, + validateString, +} = require('internal/validators'); +const { assertTypeScript, + emitExperimentalWarning, + getLazy, + isUnderNodeModules, + kEmptyObject } = require('internal/util'); +const { + ERR_INVALID_TYPESCRIPT_SYNTAX, + ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING, +} = require('internal/errors').codes; +const { getOptionValue } = require('internal/options'); +const assert = require('internal/assert'); + +/** + * The TypeScript parsing mode, either 'strip-only' or 'transform'. + * @type {string} + */ +const getTypeScriptParsingMode = getLazy(() => + (getOptionValue('--experimental-transform-types') ? 'transform' : 'strip-only'), +); + +/** + * Load the TypeScript parser. + * and returns an object with a `code` property. + * @returns {Function} The TypeScript parser function. + */ +const loadTypeScriptParser = getLazy(() => { + assertTypeScript(); + const amaro = require('internal/deps/amaro/dist/index'); + return amaro.transformSync; +}); + +/** + * + * @param {string} source the source code + * @param {object} options the options to pass to the parser + * @returns {TransformOutput} an object with a `code` property. + */ +function parseTypeScript(source, options) { + const parse = loadTypeScriptParser(); + try { + return parse(source, options); + } catch (error) { + throw new ERR_INVALID_TYPESCRIPT_SYNTAX(error); + } +} + +/** + * Performs type-stripping to TypeScript source code. + * @param {string} code TypeScript code to parse. + * @param {TransformOptions} options The configuration for type stripping. + * @returns {string} The stripped TypeScript code. + */ +function stripTypeScriptTypes(code, options = kEmptyObject) { + emitExperimentalWarning('stripTypeScriptTypes'); + validateString(code, 'code'); + validateObject(options, 'options'); + + const { + sourceMap = false, + sourceUrl = '', + } = options; + let { mode = 'strip' } = options; + validateOneOf(mode, 'options.mode', ['strip', 'transform']); + validateBoolean(sourceMap, 'options.sourceMap'); + validateString(sourceUrl, 'options.sourceUrl'); + if (mode === 'strip') { + validateOneOf(sourceMap, 'options.sourceMap', [false, undefined]); + // Rename mode from 'strip' to 'strip-only'. + // The reason is to match `process.features.typescript` which returns `strip`, + // but the parser expects `strip-only`. + mode = 'strip-only'; + } + + return processTypeScriptCode(code, { + mode, + sourceMap, + filename: sourceUrl, + }); +} + +/** + * Processes TypeScript code by stripping types or transforming. + * Handles source maps if needed. + * @param {string} code TypeScript code to process. + * @param {object} options The configuration object. + * @returns {string} The processed code. + */ +function processTypeScriptCode(code, options) { + const { code: transformedCode, map } = parseTypeScript(code, options); + + if (map) { + return addSourceMap(transformedCode, map); + } + + if (options.filename) { + return `${transformedCode}\n\n//# sourceURL=${options.filename}`; + } + + return transformedCode; +} + +/** + * Performs type-stripping to TypeScript source code internally. + * It is used by internal loaders. + * @param {string} source TypeScript code to parse. + * @param {string} filename The filename of the source code. + * @returns {TransformOutput} The stripped TypeScript code. + */ +function stripTypeScriptModuleTypes(source, filename) { + assert(typeof source === 'string'); + if (isUnderNodeModules(filename)) { + throw new ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING(filename); + } + const options = { + mode: getTypeScriptParsingMode(), + sourceMap: getOptionValue('--enable-source-maps'), + filename, + }; + return processTypeScriptCode(source, options); +} + +/** + * + * @param {string} code The compiled code. + * @param {string} sourceMap The source map. + * @returns {string} The code with the source map attached. + */ +function addSourceMap(code, sourceMap) { + // TODO(@marco-ippolito) When Buffer.transcode supports utf8 to + // base64 transformation, we should change this line. + const base64SourceMap = internalBinding('buffer').btoa(sourceMap); + return `${code}\n\n//# sourceMappingURL=data:application/json;base64,${base64SourceMap}`; +} + +module.exports = { + stripTypeScriptModuleTypes, + stripTypeScriptTypes, +}; diff --git a/lib/internal/process/permission.js b/lib/internal/process/permission.js index 7a6dd80d1d01f3..bfdfe29fe4739f 100644 --- a/lib/internal/process/permission.js +++ b/lib/internal/process/permission.js @@ -9,16 +9,16 @@ const { validateString, validateBuffer } = require('internal/validators'); const { Buffer } = require('buffer'); const { isBuffer } = Buffer; -let experimentalPermission; +let _permission; module.exports = ObjectFreeze({ __proto__: null, isEnabled() { - if (experimentalPermission === undefined) { + if (_permission === undefined) { const { getOptionValue } = require('internal/options'); - experimentalPermission = getOptionValue('--experimental-permission'); + _permission = getOptionValue('--permission'); } - return experimentalPermission; + return _permission; }, has(scope, reference) { validateString(scope, 'scope'); diff --git a/lib/internal/process/pre_execution.js b/lib/internal/process/pre_execution.js index f46a621e524207..5d67892bb8e1fd 100644 --- a/lib/internal/process/pre_execution.js +++ b/lib/internal/process/pre_execution.js @@ -382,7 +382,7 @@ function setupWebCrypto() { } function setupSQLite() { - if (!getOptionValue('--experimental-sqlite')) { + if (getOptionValue('--no-experimental-sqlite')) { return; } @@ -610,14 +610,13 @@ function initializeClusterIPC() { } function initializePermission() { - const experimentalPermission = getOptionValue('--experimental-permission'); - if (experimentalPermission) { + const permission = getOptionValue('--permission'); + if (permission) { process.binding = function binding(_module) { throw new ERR_ACCESS_DENIED('process.binding'); }; // Guarantee path module isn't monkey-patched to bypass permission model ObjectFreeze(require('path')); - emitExperimentalWarning('Permission'); const { has } = require('internal/process/permission'); const warnFlags = [ '--allow-addons', @@ -669,7 +668,7 @@ function initializePermission() { ArrayPrototypeForEach(availablePermissionFlags, (flag) => { const value = getOptionValue(flag); if (value.length) { - throw new ERR_MISSING_OPTION('--experimental-permission'); + throw new ERR_MISSING_OPTION('--permission'); } }); } diff --git a/lib/internal/process/report.js b/lib/internal/process/report.js index 3ca5273eb37ae9..58104bd1095aa1 100644 --- a/lib/internal/process/report.js +++ b/lib/internal/process/report.js @@ -105,6 +105,13 @@ const report = { nr.setReportOnUncaughtException(trigger); }, + get excludeEnv() { + return nr.getExcludeEnv(); + }, + set excludeEnv(b) { + validateBoolean(b, 'excludeEnv'); + nr.setExcludeEnv(b); + }, }; function addSignalHandler(sig) { diff --git a/lib/internal/quic/quic.js b/lib/internal/quic/quic.js index 0d5d0bbb56deb2..7fb64a8bea0c7b 100644 --- a/lib/internal/quic/quic.js +++ b/lib/internal/quic/quic.js @@ -5,28 +5,24 @@ /* c8 ignore start */ const { - ArrayBuffer, + ArrayBufferPrototypeTransfer, ArrayIsArray, ArrayPrototypePush, - BigUint64Array, - DataView, - DataViewPrototypeGetBigInt64, - DataViewPrototypeGetBigUint64, - DataViewPrototypeGetUint8, - DataViewPrototypeSetUint8, ObjectDefineProperties, - Symbol, + SafeSet, + SymbolIterator, Uint8Array, } = primordials; +// QUIC requires that Node.js be compiled with crypto support. const { assertCrypto, - customInspectSymbol: kInspect, SymbolAsyncDispose, } = require('internal/util'); -const { inspect } = require('internal/util/inspect'); assertCrypto(); +const { inspect } = require('internal/util/inspect'); + const { Endpoint: Endpoint_, setCallbacks, @@ -54,99 +50,6 @@ const { CLOSECONTEXT_RECEIVE_FAILURE: kCloseContextReceiveFailure, CLOSECONTEXT_SEND_FAILURE: kCloseContextSendFailure, CLOSECONTEXT_START_FAILURE: kCloseContextStartFailure, - - // All of the IDX_STATS_* constants are the index positions of the stats - // fields in the relevant BigUint64Array's that underlie the *Stats objects. - // These are not exposed to end users. - IDX_STATS_ENDPOINT_CREATED_AT, - IDX_STATS_ENDPOINT_DESTROYED_AT, - IDX_STATS_ENDPOINT_BYTES_RECEIVED, - IDX_STATS_ENDPOINT_BYTES_SENT, - IDX_STATS_ENDPOINT_PACKETS_RECEIVED, - IDX_STATS_ENDPOINT_PACKETS_SENT, - IDX_STATS_ENDPOINT_SERVER_SESSIONS, - IDX_STATS_ENDPOINT_CLIENT_SESSIONS, - IDX_STATS_ENDPOINT_SERVER_BUSY_COUNT, - IDX_STATS_ENDPOINT_RETRY_COUNT, - IDX_STATS_ENDPOINT_VERSION_NEGOTIATION_COUNT, - IDX_STATS_ENDPOINT_STATELESS_RESET_COUNT, - IDX_STATS_ENDPOINT_IMMEDIATE_CLOSE_COUNT, - - IDX_STATS_SESSION_CREATED_AT, - IDX_STATS_SESSION_CLOSING_AT, - IDX_STATS_SESSION_DESTROYED_AT, - IDX_STATS_SESSION_HANDSHAKE_COMPLETED_AT, - IDX_STATS_SESSION_HANDSHAKE_CONFIRMED_AT, - IDX_STATS_SESSION_GRACEFUL_CLOSING_AT, - IDX_STATS_SESSION_BYTES_RECEIVED, - IDX_STATS_SESSION_BYTES_SENT, - IDX_STATS_SESSION_BIDI_IN_STREAM_COUNT, - IDX_STATS_SESSION_BIDI_OUT_STREAM_COUNT, - IDX_STATS_SESSION_UNI_IN_STREAM_COUNT, - IDX_STATS_SESSION_UNI_OUT_STREAM_COUNT, - IDX_STATS_SESSION_LOSS_RETRANSMIT_COUNT, - IDX_STATS_SESSION_MAX_BYTES_IN_FLIGHT, - IDX_STATS_SESSION_BYTES_IN_FLIGHT, - IDX_STATS_SESSION_BLOCK_COUNT, - IDX_STATS_SESSION_CWND, - IDX_STATS_SESSION_LATEST_RTT, - IDX_STATS_SESSION_MIN_RTT, - IDX_STATS_SESSION_RTTVAR, - IDX_STATS_SESSION_SMOOTHED_RTT, - IDX_STATS_SESSION_SSTHRESH, - IDX_STATS_SESSION_DATAGRAMS_RECEIVED, - IDX_STATS_SESSION_DATAGRAMS_SENT, - IDX_STATS_SESSION_DATAGRAMS_ACKNOWLEDGED, - IDX_STATS_SESSION_DATAGRAMS_LOST, - - IDX_STATS_STREAM_CREATED_AT, - IDX_STATS_STREAM_RECEIVED_AT, - IDX_STATS_STREAM_ACKED_AT, - IDX_STATS_STREAM_CLOSING_AT, - IDX_STATS_STREAM_DESTROYED_AT, - IDX_STATS_STREAM_BYTES_RECEIVED, - IDX_STATS_STREAM_BYTES_SENT, - IDX_STATS_STREAM_MAX_OFFSET, - IDX_STATS_STREAM_MAX_OFFSET_ACK, - IDX_STATS_STREAM_MAX_OFFSET_RECV, - IDX_STATS_STREAM_FINAL_SIZE, - - IDX_STATE_SESSION_PATH_VALIDATION, - IDX_STATE_SESSION_VERSION_NEGOTIATION, - IDX_STATE_SESSION_DATAGRAM, - IDX_STATE_SESSION_SESSION_TICKET, - IDX_STATE_SESSION_CLOSING, - IDX_STATE_SESSION_GRACEFUL_CLOSE, - IDX_STATE_SESSION_SILENT_CLOSE, - IDX_STATE_SESSION_STATELESS_RESET, - IDX_STATE_SESSION_DESTROYED, - IDX_STATE_SESSION_HANDSHAKE_COMPLETED, - IDX_STATE_SESSION_HANDSHAKE_CONFIRMED, - IDX_STATE_SESSION_STREAM_OPEN_ALLOWED, - IDX_STATE_SESSION_PRIORITY_SUPPORTED, - IDX_STATE_SESSION_WRAPPED, - IDX_STATE_SESSION_LAST_DATAGRAM_ID, - - IDX_STATE_ENDPOINT_BOUND, - IDX_STATE_ENDPOINT_RECEIVING, - IDX_STATE_ENDPOINT_LISTENING, - IDX_STATE_ENDPOINT_CLOSING, - IDX_STATE_ENDPOINT_BUSY, - IDX_STATE_ENDPOINT_PENDING_CALLBACKS, - - IDX_STATE_STREAM_ID, - IDX_STATE_STREAM_FIN_SENT, - IDX_STATE_STREAM_FIN_RECEIVED, - IDX_STATE_STREAM_READ_ENDED, - IDX_STATE_STREAM_WRITE_ENDED, - IDX_STATE_STREAM_DESTROYED, - IDX_STATE_STREAM_PAUSED, - IDX_STATE_STREAM_RESET, - IDX_STATE_STREAM_HAS_READER, - IDX_STATE_STREAM_WANTS_BLOCK, - IDX_STATE_STREAM_WANTS_HEADERS, - IDX_STATE_STREAM_WANTS_RESET, - IDX_STATE_STREAM_WANTS_TRAILERS, } = internalBinding('quic'); const { @@ -160,12 +63,16 @@ const { const { codes: { + ERR_ILLEGAL_CONSTRUCTOR, ERR_INVALID_ARG_TYPE, ERR_INVALID_ARG_VALUE, ERR_INVALID_STATE, + ERR_QUIC_APPLICATION_ERROR, ERR_QUIC_CONNECTION_FAILED, ERR_QUIC_ENDPOINT_CLOSED, ERR_QUIC_OPEN_STREAM_FAILED, + ERR_QUIC_TRANSPORT_ERROR, + ERR_QUIC_VERSION_NEGOTIATION_ERROR, }, } = require('internal/errors'); @@ -180,34 +87,74 @@ const { isCryptoKey, } = require('internal/crypto/keys'); -const { - kHandle: kKeyObjectHandle, - kKeyObject: kKeyObjectInner, -} = require('internal/crypto/util'); - const { validateFunction, validateObject, + validateString, + validateBoolean, } = require('internal/validators'); const kEmptyObject = { __proto__: null }; -const kEnumerable = { __proto__: null, enumerable: true }; - -const kBlocked = Symbol('kBlocked'); -const kDatagram = Symbol('kDatagram'); -const kDatagramStatus = Symbol('kDatagramStatus'); -const kError = Symbol('kError'); -const kFinishClose = Symbol('kFinishClose'); -const kHandshake = Symbol('kHandshake'); -const kHeaders = Symbol('kHeaders'); -const kOwner = Symbol('kOwner'); -const kNewSession = Symbol('kNewSession'); -const kNewStream = Symbol('kNewStream'); -const kPathValidation = Symbol('kPathValidation'); -const kReset = Symbol('kReset'); -const kSessionTicket = Symbol('kSessionTicket'); -const kTrailers = Symbol('kTrailers'); -const kVersionNegotiation = Symbol('kVersionNegotiation'); + +const { + kBlocked, + kDatagram, + kDatagramStatus, + kError, + kFinishClose, + kHandshake, + kHeaders, + kOwner, + kRemoveSession, + kNewSession, + kRemoveStream, + kNewStream, + kPathValidation, + kReset, + kSessionTicket, + kTrailers, + kVersionNegotiation, + kInspect, + kKeyObjectHandle, + kKeyObjectInner, + kPrivateConstructor, +} = require('internal/quic/symbols'); + +const { + QuicEndpointStats, + QuicStreamStats, + QuicSessionStats, +} = require('internal/quic/stats'); + +const { + QuicEndpointState, + QuicSessionState, + QuicStreamState, +} = require('internal/quic/state'); + +const { assert } = require('internal/assert'); + +const dc = require('diagnostics_channel'); +const onEndpointCreatedChannel = dc.channel('quic.endpoint.created'); +const onEndpointListeningChannel = dc.channel('quic.endpoint.listen'); +const onEndpointClosingChannel = dc.channel('quic.endpoint.closing'); +const onEndpointClosedChannel = dc.channel('quic.endpoint.closed'); +const onEndpointErrorChannel = dc.channel('quic.endpoint.error'); +const onEndpointBusyChangeChannel = dc.channel('quic.endpoint.busy.change'); +const onEndpointClientSessionChannel = dc.channel('quic.session.created.client'); +const onEndpointServerSessionChannel = dc.channel('quic.session.created.server'); +const onSessionOpenStreamChannel = dc.channel('quic.session.open.stream'); +const onSessionReceivedStreamChannel = dc.channel('quic.session.received.stream'); +const onSessionSendDatagramChannel = dc.channel('quic.session.send.datagram'); +const onSessionUpdateKeyChannel = dc.channel('quic.session.update.key'); +const onSessionClosingChannel = dc.channel('quic.session.closing'); +const onSessionClosedChannel = dc.channel('quic.session.closed'); +const onSessionReceiveDatagramChannel = dc.channel('quic.session.receive.datagram'); +const onSessionReceiveDatagramStatusChannel = dc.channel('quic.session.receive.datagram.status'); +const onSessionPathValidationChannel = dc.channel('quic.session.path.validation'); +const onSessionTicketChannel = dc.channel('quic.session.ticket'); +const onSessionVersionNegotiationChannel = dc.channel('quic.session.version.negotiation'); +const onSessionHandshakeChannel = dc.channel('quic.session.handshake'); /** * @typedef {import('../socketaddress.js').SocketAddress} SocketAddress @@ -230,8 +177,8 @@ const kVersionNegotiation = Symbol('kVersionNegotiation'); * @property {bigint|number} [handshakeTimeout] The handshake timeout * @property {bigint|number} [maxStreamWindow] The maximum stream window * @property {bigint|number} [maxWindow] The maximum window - * @property {number} [rxDiagnosticLoss] The receive diagnostic loss (range 0.0-1.0) - * @property {number} [txDiagnosticLoss] The transmit diagnostic loss (range 0.0-1.0) + * @property {number} [rxDiagnosticLoss] The receive diagnostic loss probability (range 0.0-1.0) + * @property {number} [txDiagnosticLoss] The transmit diagnostic loss probability (range 0.0-1.0) * @property {number} [udpReceiveBufferSize] The UDP receive buffer size * @property {number} [udpSendBufferSize] The UDP send buffer size * @property {number} [udpTTL] The UDP TTL @@ -317,64 +264,65 @@ const kVersionNegotiation = Symbol('kVersionNegotiation'); /** * Called when the Endpoint receives a new server-side Session. * @callback OnSessionCallback - * @param {Session} session - * @param {Endpoint} endpoint + * @this {QuicEndpoint} + * @param {QuicSession} session * @returns {void} */ /** * @callback OnStreamCallback + * @this {QuicSession} * @param {QuicStream} stream - * @param {Session} session * @returns {void} */ /** * @callback OnDatagramCallback + * @this {QuicSession} * @param {Uint8Array} datagram - * @param {Session} session - * @param {boolean} early + * @param {boolean} early A datagram is early if it was received before the TLS handshake completed * @returns {void} */ /** * @callback OnDatagramStatusCallback + * @this {QuicSession} * @param {bigint} id * @param {'lost'|'acknowledged'} status - * @param {Session} session * @returns {void} */ /** * @callback OnPathValidationCallback + * @this {QuicSession} * @param {'aborted'|'failure'|'success'} result * @param {SocketAddress} newLocalAddress * @param {SocketAddress} newRemoteAddress * @param {SocketAddress} oldLocalAddress * @param {SocketAddress} oldRemoteAddress * @param {boolean} preferredAddress - * @param {Session} session * @returns {void} */ /** * @callback OnSessionTicketCallback + * @this {QuicSession} * @param {object} ticket - * @param {Session} session * @returns {void} */ /** * @callback OnVersionNegotiationCallback + * @this {QuicSession} * @param {number} version * @param {number[]} requestedVersions * @param {number[]} supportedVersions - * @param {Session} session * @returns {void} */ /** * @callback OnHandshakeCallback + * @this {QuicSession} * @param {string} sni * @param {string} alpn * @param {string} cipher @@ -382,7 +330,6 @@ const kVersionNegotiation = Symbol('kVersionNegotiation'); * @param {string} validationErrorReason * @param {number} validationErrorCode * @param {boolean} earlyDataAccepted - * @param {Session} session * @returns {void} */ @@ -413,6 +360,14 @@ const kVersionNegotiation = Symbol('kVersionNegotiation'); * @returns {void} */ +/** + * @typedef {object} StreamCallbackConfiguration + * @property {OnBlockedCallback} [onblocked] The blocked callback + * @property {OnStreamErrorCallback} [onreset] The reset callback + * @property {OnHeadersCallback} [onheaders] The headers callback + * @property {OnTrailersCallback} [ontrailers] The trailers callback + */ + /** * Provdes the callback configuration for Sessions. * @typedef {object} SessionCallbackConfiguration @@ -426,38 +381,126 @@ const kVersionNegotiation = Symbol('kVersionNegotiation'); */ /** - * @typedef {object} StreamCallbackConfiguration - * @property {OnStreamErrorCallback} onerror The error callback + * @typedef {object} ProcessedSessionCallbackConfiguration + * @property {OnStreamCallback} onstream The stream callback + * @property {OnDatagramCallback} [ondatagram] The datagram callback + * @property {OnDatagramStatusCallback} [ondatagramstatus] The datagram status callback + * @property {OnPathValidationCallback} [onpathvalidation] The path validation callback + * @property {OnSessionTicketCallback} [onsessionticket] The session ticket callback + * @property {OnVersionNegotiationCallback} [onversionnegotiation] The version negotation callback + * @property {OnHandshakeCallback} [onhandshake] The handshake callback + * @property {StreamCallbackConfiguration} stream The processed stream callbacks + */ + +/** + * Provides the callback configuration for the Endpoint. + * @typedef {object} EndpointCallbackConfiguration + * @property {OnSessionCallback} onsession The session callback + * @property {OnStreamCallback} onstream The stream callback + * @property {OnDatagramCallback} [ondatagram] The datagram callback + * @property {OnDatagramStatusCallback} [ondatagramstatus] The datagram status callback + * @property {OnPathValidationCallback} [onpathvalidation] The path validation callback + * @property {OnSessionTicketCallback} [onsessionticket] The session ticket callback + * @property {OnVersionNegotiationCallback} [onversionnegotiation] The version negotiation callback + * @property {OnHandshakeCallback} [onhandshake] The handshake callback * @property {OnBlockedCallback} [onblocked] The blocked callback * @property {OnStreamErrorCallback} [onreset] The reset callback * @property {OnHeadersCallback} [onheaders] The headers callback * @property {OnTrailersCallback} [ontrailers] The trailers callback + * @property {SocketAddress} [address] The local address to bind to + * @property {bigint|number} [retryTokenExpiration] The retry token expiration + * @property {bigint|number} [tokenExpiration] The token expiration + * @property {bigint|number} [maxConnectionsPerHost] The maximum number of connections per host + * @property {bigint|number} [maxConnectionsTotal] The maximum number of total connections + * @property {bigint|number} [maxStatelessResetsPerHost] The maximum number of stateless resets per host + * @property {bigint|number} [addressLRUSize] The size of the address LRU cache + * @property {bigint|number} [maxRetries] The maximum number of retries + * @property {bigint|number} [maxPayloadSize] The maximum payload size + * @property {bigint|number} [unacknowledgedPacketThreshold] The unacknowledged packet threshold + * @property {bigint|number} [handshakeTimeout] The handshake timeout + * @property {bigint|number} [maxStreamWindow] The maximum stream window + * @property {bigint|number} [maxWindow] The maximum window + * @property {number} [rxDiagnosticLoss] The receive diagnostic loss probability (range 0.0-1.0) + * @property {number} [txDiagnosticLoss] The transmit diagnostic loss probability (range 0.0-1.0) + * @property {number} [udpReceiveBufferSize] The UDP receive buffer size + * @property {number} [udpSendBufferSize] The UDP send buffer size + * @property {number} [udpTTL] The UDP TTL + * @property {boolean} [noUdpPayloadSizeShaping] Disable UDP payload size shaping + * @property {boolean} [validateAddress] Validate the address + * @property {boolean} [disableActiveMigration] Disable active migration + * @property {boolean} [ipv6Only] Use IPv6 only + * @property {'reno'|'cubic'|'bbr'|number} [cc] The congestion control algorithm + * @property {ArrayBufferView} [resetTokenSecret] The reset token secret + * @property {ArrayBufferView} [tokenSecret] The token secret */ /** - * Provides the callback configuration for the Endpoint. - * @typedef {object} EndpointCallbackConfiguration + * @typedef {object} ProcessedEndpointCallbackConfiguration * @property {OnSessionCallback} onsession The session callback - * @property {SessionCallbackConfiguration} session The session callback configuration - * @property {StreamCallbackConfiguration} stream The stream callback configuration + * @property {SessionCallbackConfiguration} session The processesd session callbacks */ setCallbacks({ + // QuicEndpoint callbacks + + /** + * Called when the QuicEndpoint C++ handle has closed and we need to finish + * cleaning up the JS side. + * @param {number} context Identifies the reason the endpoint was closed. + * @param {number} status If context indicates an error, provides the error code. + */ onEndpointClose(context, status) { this[kOwner][kFinishClose](context, status); }, + /** + * Called when the QuicEndpoint C++ handle receives a new server-side session + * @param {*} session The QuicSession C++ handle + */ onSessionNew(session) { this[kOwner][kNewSession](session); }, + + // QuicSession callbacks + + /** + * Called when the underlying session C++ handle is closed either normally + * or with an error. + * @param {number} errorType + * @param {number} code + * @param {string} [reason] + */ onSessionClose(errorType, code, reason) { this[kOwner][kFinishClose](errorType, code, reason); }, + + /** + * Called when a datagram is received on this session. + * @param {Uint8Array} uint8Array + * @param {boolean} early + */ onSessionDatagram(uint8Array, early) { this[kOwner][kDatagram](uint8Array, early); }, + + /** + * Called when the status of a datagram is received. + * @param {bigint} id + * @param {'lost' | 'acknowledged'} status + */ onSessionDatagramStatus(id, status) { this[kOwner][kDatagramStatus](id, status); }, + + /** + * Called when the session handshake completes. + * @param {string} sni + * @param {string} alpn + * @param {string} cipher + * @param {string} cipherVersion + * @param {string} validationErrorReason + * @param {number} validationErrorCode + * @param {boolean} earlyDataAccepted + */ onSessionHandshake(sni, alpn, cipher, cipherVersion, validationErrorReason, validationErrorCode, @@ -466,12 +509,37 @@ setCallbacks({ validationErrorReason, validationErrorCode, earlyDataAccepted); }, - onSessionPathValidation(...args) { - this[kOwner][kPathValidation](...args); + + /** + * Called when the session path validation completes. + * @param {'aborted'|'failure'|'success'} result + * @param {SocketAddress} newLocalAddress + * @param {SocketAddress} newRemoteAddress + * @param {SocketAddress} oldLocalAddress + * @param {SocketAddress} oldRemoteAddress + * @param {boolean} preferredAddress + */ + onSessionPathValidation(result, newLocalAddress, newRemoteAddress, + oldLocalAddress, oldRemoteAddress, preferredAddress) { + this[kOwner][kPathValidation](result, newLocalAddress, newRemoteAddress, + oldLocalAddress, oldRemoteAddress, + preferredAddress); }, + + /** + * Called when the session generates a new TLS session ticket + * @param {object} ticket An opaque session ticket + */ onSessionTicket(ticket) { this[kOwner][kSessionTicket](ticket); }, + + /** + * Called when the session receives a session version negotiation request + * @param {*} version + * @param {*} requestedVersions + * @param {*} supportedVersions + */ onSessionVersionNegotiation(version, requestedVersions, supportedVersions) { @@ -479,185 +547,177 @@ setCallbacks({ // Note that immediately following a version negotiation event, the // session will be destroyed. }, + + /** + * Called when a new stream has been received for the session + * @param {object} stream The QuicStream C++ handle + * @param {number} direction The stream direction (0 == bidi, 1 == uni) + */ onStreamCreated(stream, direction) { const session = this[kOwner]; - // The event is ignored if the session has been destroyed. + // The event is ignored and the stream destroyed if the session has been destroyed. if (session.destroyed) { stream.destroy(); return; }; session[kNewStream](stream, direction); }, + + // QuicStream callbacks onStreamBlocked() { + // Called when the stream C++ handle has been blocked by flow control. this[kOwner][kBlocked](); }, onStreamClose(error) { + // Called when the stream C++ handle has been closed. this[kOwner][kError](error); }, onStreamReset(error) { + // Called when the stream C++ handle has received a stream reset. this[kOwner][kReset](error); }, onStreamHeaders(headers, kind) { + // Called when the stream C++ handle has received a full block of headers. this[kOwner][kHeaders](headers, kind); }, onStreamTrailers() { + // Called when the stream C++ handle is ready to receive trailing headers. this[kOwner][kTrailers](); }, }); -/** - * @param {'use'|'ignore'|'default'} policy - * @returns {number} - */ -function getPreferredAddressPolicy(policy = 'default') { - switch (policy) { - case 'use': return PREFERRED_ADDRESS_USE; - case 'ignore': return PREFERRED_ADDRESS_IGNORE; - case 'default': return DEFAULT_PREFERRED_ADDRESS_POLICY; - } - throw new ERR_INVALID_ARG_VALUE('options.preferredAddressPolicy', policy); -} - -/** - * @param {TlsOptions} tls - */ -function processTlsOptions(tls) { - validateObject(tls, 'options.tls'); - const { - sni, - alpn, - ciphers, - groups, - keylog, - verifyClient, - tlsTrace, - verifyPrivateKey, - keys, - certs, - ca, - crl, - } = tls; - - const keyHandles = []; - if (keys !== undefined) { - const keyInputs = ArrayIsArray(keys) ? keys : [keys]; - for (const key of keyInputs) { - if (isKeyObject(key)) { - if (key.type !== 'private') { - throw new ERR_INVALID_ARG_VALUE('options.tls.keys', key, 'must be a private key'); - } - ArrayPrototypePush(keyHandles, key[kKeyObjectHandle]); - } else if (isCryptoKey(key)) { - if (key.type !== 'private') { - throw new ERR_INVALID_ARG_VALUE('options.tls.keys', key, 'must be a private key'); - } - ArrayPrototypePush(keyHandles, key[kKeyObjectInner][kKeyObjectHandle]); - } else { - throw new ERR_INVALID_ARG_TYPE('options.tls.keys', ['KeyObject', 'CryptoKey'], key); - } - } - } - - return { - sni, - alpn, - ciphers, - groups, - keylog, - verifyClient, - tlsTrace, - verifyPrivateKey, - keys: keyHandles, - certs, - ca, - crl, - }; -} - -class QuicStreamStats { - /** @type {BigUint64Array} */ +class QuicStream { + /** @type {object} */ #handle; + /** @type {QuicSession} */ + #session; + /** @type {QuicStreamStats} */ + #stats; + /** @type {QuicStreamState} */ + #state; + /** @type {number} */ + #direction; + /** @type {OnBlockedCallback|undefined} */ + #onblocked; + /** @type {OnStreamErrorCallback|undefined} */ + #onreset; + /** @type {OnHeadersCallback|undefined} */ + #onheaders; + /** @type {OnTrailersCallback|undefined} */ + #ontrailers; /** - * @param {ArrayBuffer} buffer + * @param {symbol} privateSymbol + * @param {StreamCallbackConfiguration} config + * @param {object} handle + * @param {QuicSession} session */ - constructor(buffer) { - if (!isArrayBuffer(buffer)) { - throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); + constructor(privateSymbol, config, handle, session, direction) { + if (privateSymbol !== kPrivateConstructor) { + throw new ERR_ILLEGAL_CONSTRUCTOR(); } - this.#handle = new BigUint64Array(buffer); - } - /** @type {bigint} */ - get createdAt() { - return this.#handle[IDX_STATS_STREAM_CREATED_AT]; - } + const { + onblocked, + onreset, + onheaders, + ontrailers, + } = config; - /** @type {bigint} */ - get receivedAt() { - return this.#handle[IDX_STATS_STREAM_RECEIVED_AT]; - } + if (onblocked !== undefined) { + this.#onblocked = onblocked.bind(this); + } + if (onreset !== undefined) { + this.#onreset = onreset.bind(this); + } + if (onheaders !== undefined) { + this.#onheaders = onheaders.bind(this); + } + if (ontrailers !== undefined) { + this.#ontrailers = ontrailers.bind(this); + } + this.#handle = handle; + this.#handle[kOwner] = true; - /** @type {bigint} */ - get ackedAt() { - return this.#handle[IDX_STATS_STREAM_ACKED_AT]; - } + this.#session = session; + this.#direction = direction; - /** @type {bigint} */ - get closingAt() { - return this.#handle[IDX_STATS_STREAM_CLOSING_AT]; + this.#stats = new QuicStreamStats(kPrivateConstructor, this.#handle.stats); + + this.#state = new QuicStreamState(kPrivateConstructor, this.#handle.stats); + this.#state.wantsBlock = !!this.#onblocked; + this.#state.wantsReset = !!this.#onreset; + this.#state.wantsHeaders = !!this.#onheaders; + this.#state.wantsTrailers = !!this.#ontrailers; } + /** @type {QuicStreamStats} */ + get stats() { return this.#stats; } + + /** @type {QuicStreamState} */ + get state() { return this.#state; } + + /** @type {QuicSession} */ + get session() { return this.#session; } + /** @type {bigint} */ - get destroyedAt() { - return this.#handle[IDX_STATS_STREAM_DESTROYED_AT]; + get id() { return this.#state.id; } + + /** @type {'bidi'|'uni'} */ + get direction() { + return this.#direction === 0 ? 'bidi' : 'uni'; } - /** @type {bigint} */ - get bytesReceived() { - return this.#handle[IDX_STATS_STREAM_BYTES_RECEIVED]; + /** @returns {boolean} */ + get destroyed() { + return this.#handle === undefined; } - /** @type {bigint} */ - get bytesSent() { - return this.#handle[IDX_STATS_STREAM_BYTES_SENT]; + destroy(error) { + if (this.destroyed) return; + // TODO(@jasnell): pass an error code + this.#stats[kFinishClose](); + this.#state[kFinishClose](); + this.#onblocked = undefined; + this.#onreset = undefined; + this.#onheaders = undefined; + this.#ontrailers = undefined; + this.#session[kRemoveStream](this); + this.#session = undefined; + this.#handle.destroy(); + this.#handle = undefined; } - /** @type {bigint} */ - get maxOffset() { - return this.#handle[IDX_STATS_STREAM_MAX_OFFSET]; + [kBlocked]() { + // The blocked event should only be called if the stream was created with + // an onblocked callback. The callback should always exist here. + assert(this.#onblocked, 'Unexpected stream blocked event'); + this.#onblocked(this); } - /** @type {bigint} */ - get maxOffsetAcknowledged() { - return this.#handle[IDX_STATS_STREAM_MAX_OFFSET_ACK]; + [kError](error) { + this.destroy(error); } - /** @type {bigint} */ - get maxOffsetReceived() { - return this.#handle[IDX_STATS_STREAM_MAX_OFFSET_RECV]; + [kReset](error) { + // The reset event should only be called if the stream was created with + // an onreset callback. The callback should always exist here. + assert(this.#onreset, 'Unexpected stream reset event'); + this.#onreset(error, this); } - /** @type {bigint} */ - get finalSize() { - return this.#handle[IDX_STATS_STREAM_FINAL_SIZE]; + [kHeaders](headers, kind) { + // The headers event should only be called if the stream was created with + // an onheaders callback. The callback should always exist here. + assert(this.#onheaders, 'Unexpected stream headers event'); + this.#onheaders(headers, kind, this); } - toJSON() { - return { - __proto__: null, - createdAt: `${this.createdAt}`, - receivedAt: `${this.receivedAt}`, - ackedAt: `${this.ackedAt}`, - closingAt: `${this.closingAt}`, - destroyedAt: `${this.destroyedAt}`, - bytesReceived: `${this.bytesReceived}`, - bytesSent: `${this.bytesSent}`, - maxOffset: `${this.maxOffset}`, - maxOffsetAcknowledged: `${this.maxOffsetAcknowledged}`, - maxOffsetReceived: `${this.maxOffsetReceived}`, - finalSize: `${this.finalSize}`, - }; + [kTrailers]() { + // The trailers event should only be called if the stream was created with + // an ontrailers callback. The callback should always exist here. + assert(this.#ontrailers, 'Unexpected stream trailers event'); + this.#ontrailers(this); } [kInspect](depth, options) { @@ -669,731 +729,62 @@ class QuicStreamStats { depth: options.depth == null ? null : options.depth - 1, }; - return `StreamStats ${inspect({ - createdAt: this.createdAt, - receivedAt: this.receivedAt, - ackedAt: this.ackedAt, - closingAt: this.closingAt, - destroyedAt: this.destroyedAt, - bytesReceived: this.bytesReceived, - bytesSent: this.bytesSent, - maxOffset: this.maxOffset, - maxOffsetAcknowledged: this.maxOffsetAcknowledged, - maxOffsetReceived: this.maxOffsetReceived, - finalSize: this.finalSize, + return `Stream ${inspect({ + id: this.id, + direction: this.direction, + stats: this.stats, + state: this.state, + session: this.session, }, opts)}`; } - - [kFinishClose]() { - // Snapshot the stats into a new BigUint64Array since the underlying - // buffer will be destroyed. - this.#handle = new BigUint64Array(this.#handle); - } } -class QuicStreamState { - /** @type {DataView} */ +class QuicSession { + /** @type {QuicEndpoint} */ + #endpoint = undefined; + /** @type {boolean} */ + #isPendingClose = false; + /** @type {object|undefined} */ #handle; + /** @type {PromiseWithResolvers} */ + #pendingClose = Promise.withResolvers(); // eslint-disable-line node-core/prefer-primordials + /** @type {SocketAddress|undefined} */ + #remoteAddress = undefined; + /** @type {QuicSessionState} */ + #state; + /** @type {QuicSessionStats} */ + #stats; + /** @type {Set} */ + #streams = new SafeSet(); + /** @type {OnStreamCallback} */ + #onstream; + /** @type {OnDatagramCallback|undefined} */ + #ondatagram; + /** @type {OnDatagramStatusCallback|undefined} */ + #ondatagramstatus; + /** @type {OnPathValidationCallback|undefined} */ + #onpathvalidation; + /** @type {OnSessionTicketCallback|undefined} */ + #onsessionticket; + /** @type {OnVersionNegotiationCallback|undefined} */ + #onversionnegotiation; + /** @type {OnHandshakeCallback} */ + #onhandshake; + /** @type {StreamCallbackConfiguration} */ + #streamConfig; /** - * @param {ArrayBuffer} buffer + * @param {symbol} privateSymbol + * @param {ProcessedSessionCallbackConfiguration} config + * @param {object} handle + * @param {QuicEndpoint} endpoint */ - constructor(buffer) { - if (!isArrayBuffer(buffer)) { - throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); + constructor(privateSymbol, config, handle, endpoint) { + // Instances of QuicSession can only be created internally. + if (privateSymbol !== kPrivateConstructor) { + throw new ERR_ILLEGAL_CONSTRUCTOR(); } - this.#handle = new DataView(buffer); - } - - /** @type {bigint} */ - get id() { - return DataViewPrototypeGetBigInt64(this.#handle, IDX_STATE_STREAM_ID); - } - - /** @type {boolean} */ - get finSent() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_FIN_SENT); - } - - /** @type {boolean} */ - get finReceived() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_FIN_RECEIVED); - } - - /** @type {boolean} */ - get readEnded() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_READ_ENDED); - } - - /** @type {boolean} */ - get writeEnded() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WRITE_ENDED); - } - - /** @type {boolean} */ - get destroyed() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_DESTROYED); - } - - /** @type {boolean} */ - get paused() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_PAUSED); - } - - /** @type {boolean} */ - get reset() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_RESET); - } - - /** @type {boolean} */ - get hasReader() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_HAS_READER); - } - - /** @type {boolean} */ - get wantsBlock() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WANTS_BLOCK); - } - - /** @type {boolean} */ - set wantsBlock(val) { - DataViewPrototypeSetUint8(this.#handle, IDX_STATE_STREAM_WANTS_BLOCK, val ? 1 : 0); - } - - /** @type {boolean} */ - get wantsHeaders() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WANTS_HEADERS); - } - - /** @type {boolean} */ - set wantsHeaders(val) { - DataViewPrototypeSetUint8(this.#handle, IDX_STATE_STREAM_WANTS_HEADERS, val ? 1 : 0); - } - - /** @type {boolean} */ - get wantsReset() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WANTS_RESET); - } - - /** @type {boolean} */ - set wantsReset(val) { - DataViewPrototypeSetUint8(this.#handle, IDX_STATE_STREAM_WANTS_RESET, val ? 1 : 0); - } - - /** @type {boolean} */ - get wantsTrailers() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WANTS_TRAILERS); - } - - /** @type {boolean} */ - set wantsTrailers(val) { - DataViewPrototypeSetUint8(this.#handle, IDX_STATE_STREAM_WANTS_TRAILERS, val ? 1 : 0); - } - - toJSON() { - return { - __proto__: null, - id: `${this.id}`, - finSent: this.finSent, - finReceived: this.finReceived, - readEnded: this.readEnded, - writeEnded: this.writeEnded, - destroyed: this.destroyed, - paused: this.paused, - reset: this.reset, - hasReader: this.hasReader, - wantsBlock: this.wantsBlock, - wantsHeaders: this.wantsHeaders, - wantsReset: this.wantsReset, - wantsTrailers: this.wantsTrailers, - }; - } - - [kInspect](depth, options) { - if (depth < 0) - return this; - - const opts = { - ...options, - depth: options.depth == null ? null : options.depth - 1, - }; - - return `StreamState ${inspect({ - id: this.id, - finSent: this.finSent, - finReceived: this.finReceived, - readEnded: this.readEnded, - writeEnded: this.writeEnded, - destroyed: this.destroyed, - paused: this.paused, - reset: this.reset, - hasReader: this.hasReader, - wantsBlock: this.wantsBlock, - wantsHeaders: this.wantsHeaders, - wantsReset: this.wantsReset, - wantsTrailers: this.wantsTrailers, - }, opts)}`; - } - - [kFinishClose]() { - // When the stream is destroyed, the state is no longer available. - this.#handle = new DataView(new ArrayBuffer(0)); - } -} - -class QuicStream { - /** @type {object} */ - #handle; - /** @type {Session} */ - #session; - /** @type {QuicStreamStats} */ - #stats; - /** @type {QuicStreamState} */ - #state; - /** @type {number} */ - #direction; - /** @type {OnStreamErrorCallback} */ - #onerror; - /** @type {OnBlockedCallback|undefined} */ - #onblocked; - /** @type {OnStreamErrorCallback|undefined} */ - #onreset; - /** @type {OnHeadersCallback|undefined} */ - #onheaders; - /** @type {OnTrailersCallback|undefined} */ - #ontrailers; - - /** - * @param {StreamCallbackConfiguration} config - * @param {object} handle - * @param {Session} session - */ - constructor(config, handle, session, direction) { - validateObject(config, 'config'); - this.#stats = new QuicStreamStats(handle.stats); - this.#state = new QuicStreamState(handle.stats); - const { - onblocked, - onerror, - onreset, - onheaders, - ontrailers, - } = config; - if (onblocked !== undefined) { - validateFunction(onblocked, 'config.onblocked'); - this.#state.wantsBlock = true; - this.#onblocked = onblocked; - } - if (onreset !== undefined) { - validateFunction(onreset, 'config.onreset'); - this.#state.wantsReset = true; - this.#onreset = onreset; - } - if (onheaders !== undefined) { - validateFunction(onheaders, 'config.onheaders'); - this.#state.wantsHeaders = true; - this.#onheaders = onheaders; - } - if (ontrailers !== undefined) { - validateFunction(ontrailers, 'config.ontrailers'); - this.#state.wantsTrailers = true; - this.#ontrailers = ontrailers; - } - validateFunction(onerror, 'config.onerror'); - this.#onerror = onerror; - this.#handle = handle; - this.#session = session; - this.#direction = direction; - this.#handle[kOwner] = true; - } - - /** @type {QuicStreamStats} */ - get stats() { return this.#stats; } - - /** @type {QuicStreamState} */ - get state() { return this.#state; } - - /** @type {Session} */ - get session() { return this.#session; } - - /** @type {bigint} */ - get id() { return this.#state.id; } - - /** @type {'bidi'|'uni'} */ - get direction() { - return this.#direction === 0 ? 'bidi' : 'uni'; - } - - [kBlocked]() { - this.#onblocked(this); - } - - [kError](error) { - this.#onerror(error, this); - } - - [kReset](error) { - this.#onreset(error, this); - } - - [kHeaders](headers, kind) { - this.#onheaders(headers, kind, this); - } - - [kTrailers]() { - this.#ontrailers(this); - } - - [kInspect](depth, options) { - if (depth < 0) - return this; - - const opts = { - ...options, - depth: options.depth == null ? null : options.depth - 1, - }; - - return `Stream ${inspect({ - id: this.id, - direction: this.direction, - stats: this.stats, - state: this.state, - session: this.session, - }, opts)}`; - } -} - -class SessionStats { - /** @type {BigUint64Array} */ - #handle; - - /** - * @param {BigUint64Array} buffer - */ - constructor(buffer) { - if (!isArrayBuffer(buffer)) { - throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); - } - this.#handle = new BigUint64Array(buffer); - } - - /** @type {bigint} */ - get createdAt() { - return this.#handle[IDX_STATS_SESSION_CREATED_AT]; - } - - /** @type {bigint} */ - get closingAt() { - return this.#handle[IDX_STATS_SESSION_CLOSING_AT]; - } - - /** @type {bigint} */ - get destroyedAt() { - return this.#handle[IDX_STATS_SESSION_DESTROYED_AT]; - } - - /** @type {bigint} */ - get handshakeCompletedAt() { - return this.#handle[IDX_STATS_SESSION_HANDSHAKE_COMPLETED_AT]; - } - - /** @type {bigint} */ - get handshakeConfirmedAt() { - return this.#handle[IDX_STATS_SESSION_HANDSHAKE_CONFIRMED_AT]; - } - - /** @type {bigint} */ - get gracefulClosingAt() { - return this.#handle[IDX_STATS_SESSION_GRACEFUL_CLOSING_AT]; - } - - /** @type {bigint} */ - get bytesReceived() { - return this.#handle[IDX_STATS_SESSION_BYTES_RECEIVED]; - } - - /** @type {bigint} */ - get bytesSent() { - return this.#handle[IDX_STATS_SESSION_BYTES_SENT]; - } - - /** @type {bigint} */ - get bidiInStreamCount() { - return this.#handle[IDX_STATS_SESSION_BIDI_IN_STREAM_COUNT]; - } - - /** @type {bigint} */ - get bidiOutStreamCount() { - return this.#handle[IDX_STATS_SESSION_BIDI_OUT_STREAM_COUNT]; - } - - /** @type {bigint} */ - get uniInStreamCount() { - return this.#handle[IDX_STATS_SESSION_UNI_IN_STREAM_COUNT]; - } - - /** @type {bigint} */ - get uniOutStreamCount() { - return this.#handle[IDX_STATS_SESSION_UNI_OUT_STREAM_COUNT]; - } - - /** @type {bigint} */ - get lossRetransmitCount() { - return this.#handle[IDX_STATS_SESSION_LOSS_RETRANSMIT_COUNT]; - } - - /** @type {bigint} */ - get maxBytesInFlights() { - return this.#handle[IDX_STATS_SESSION_MAX_BYTES_IN_FLIGHT]; - } - - /** @type {bigint} */ - get bytesInFlight() { - return this.#handle[IDX_STATS_SESSION_BYTES_IN_FLIGHT]; - } - - /** @type {bigint} */ - get blockCount() { - return this.#handle[IDX_STATS_SESSION_BLOCK_COUNT]; - } - - /** @type {bigint} */ - get cwnd() { - return this.#handle[IDX_STATS_SESSION_CWND]; - } - - /** @type {bigint} */ - get latestRtt() { - return this.#handle[IDX_STATS_SESSION_LATEST_RTT]; - } - - /** @type {bigint} */ - get minRtt() { - return this.#handle[IDX_STATS_SESSION_MIN_RTT]; - } - - /** @type {bigint} */ - get rttVar() { - return this.#handle[IDX_STATS_SESSION_RTTVAR]; - } - - /** @type {bigint} */ - get smoothedRtt() { - return this.#handle[IDX_STATS_SESSION_SMOOTHED_RTT]; - } - - /** @type {bigint} */ - get ssthresh() { - return this.#handle[IDX_STATS_SESSION_SSTHRESH]; - } - - /** @type {bigint} */ - get datagramsReceived() { - return this.#handle[IDX_STATS_SESSION_DATAGRAMS_RECEIVED]; - } - - /** @type {bigint} */ - get datagramsSent() { - return this.#handle[IDX_STATS_SESSION_DATAGRAMS_SENT]; - } - - /** @type {bigint} */ - get datagramsAcknowledged() { - return this.#handle[IDX_STATS_SESSION_DATAGRAMS_ACKNOWLEDGED]; - } - - /** @type {bigint} */ - get datagramsLost() { - return this.#handle[IDX_STATS_SESSION_DATAGRAMS_LOST]; - } - - toJSON() { - return { - __proto__: null, - createdAt: `${this.createdAt}`, - closingAt: `${this.closingAt}`, - destroyedAt: `${this.destroyedAt}`, - handshakeCompletedAt: `${this.handshakeCompletedAt}`, - handshakeConfirmedAt: `${this.handshakeConfirmedAt}`, - gracefulClosingAt: `${this.gracefulClosingAt}`, - bytesReceived: `${this.bytesReceived}`, - bytesSent: `${this.bytesSent}`, - bidiInStreamCount: `${this.bidiInStreamCount}`, - bidiOutStreamCount: `${this.bidiOutStreamCount}`, - uniInStreamCount: `${this.uniInStreamCount}`, - uniOutStreamCount: `${this.uniOutStreamCount}`, - lossRetransmitCount: `${this.lossRetransmitCount}`, - maxBytesInFlights: `${this.maxBytesInFlights}`, - bytesInFlight: `${this.bytesInFlight}`, - blockCount: `${this.blockCount}`, - cwnd: `${this.cwnd}`, - latestRtt: `${this.latestRtt}`, - minRtt: `${this.minRtt}`, - rttVar: `${this.rttVar}`, - smoothedRtt: `${this.smoothedRtt}`, - ssthresh: `${this.ssthresh}`, - datagramsReceived: `${this.datagramsReceived}`, - datagramsSent: `${this.datagramsSent}`, - datagramsAcknowledged: `${this.datagramsAcknowledged}`, - datagramsLost: `${this.datagramsLost}`, - }; - } - - [kInspect](depth, options) { - if (depth < 0) - return this; - - const opts = { - ...options, - depth: options.depth == null ? null : options.depth - 1, - }; - - return `SessionStats ${inspect({ - createdAt: this.createdAt, - closingAt: this.closingAt, - destroyedAt: this.destroyedAt, - handshakeCompletedAt: this.handshakeCompletedAt, - handshakeConfirmedAt: this.handshakeConfirmedAt, - gracefulClosingAt: this.gracefulClosingAt, - bytesReceived: this.bytesReceived, - bytesSent: this.bytesSent, - bidiInStreamCount: this.bidiInStreamCount, - bidiOutStreamCount: this.bidiOutStreamCount, - uniInStreamCount: this.uniInStreamCount, - uniOutStreamCount: this.uniOutStreamCount, - lossRetransmitCount: this.lossRetransmitCount, - maxBytesInFlights: this.maxBytesInFlights, - bytesInFlight: this.bytesInFlight, - blockCount: this.blockCount, - cwnd: this.cwnd, - latestRtt: this.latestRtt, - minRtt: this.minRtt, - rttVar: this.rttVar, - smoothedRtt: this.smoothedRtt, - ssthresh: this.ssthresh, - datagramsReceived: this.datagramsReceived, - datagramsSent: this.datagramsSent, - datagramsAcknowledged: this.datagramsAcknowledged, - datagramsLost: this.datagramsLost, - }, opts)}`; - } - - [kFinishClose]() { - // Snapshot the stats into a new BigUint64Array since the underlying - // buffer will be destroyed. - this.#handle = new BigUint64Array(this.#handle); - } -} - -class SessionState { - /** @type {DataView} */ - #handle; - - /** - * @param {ArrayBuffer} buffer - */ - constructor(buffer) { - if (!isArrayBuffer(buffer)) { - throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); - } - this.#handle = new DataView(buffer); - } - - /** @type {boolean} */ - get hasPathValidationListener() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_PATH_VALIDATION); - } - - /** @type {boolean} */ - set hasPathValidationListener(val) { - DataViewPrototypeSetUint8(this.#handle, IDX_STATE_SESSION_PATH_VALIDATION, val ? 1 : 0); - } - - /** @type {boolean} */ - get hasVersionNegotiationListener() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_VERSION_NEGOTIATION); - } - - /** @type {boolean} */ - set hasVersionNegotiationListener(val) { - DataViewPrototypeSetUint8(this.#handle, IDX_STATE_SESSION_VERSION_NEGOTIATION, val ? 1 : 0); - } - - /** @type {boolean} */ - get hasDatagramListener() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_DATAGRAM); - } - - /** @type {boolean} */ - set hasDatagramListener(val) { - DataViewPrototypeSetUint8(this.#handle, IDX_STATE_SESSION_DATAGRAM, val ? 1 : 0); - } - - /** @type {boolean} */ - get hasSessionTicketListener() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_SESSION_TICKET); - } - - /** @type {boolean} */ - set hasSessionTicketListener(val) { - DataViewPrototypeSetUint8(this.#handle, IDX_STATE_SESSION_SESSION_TICKET, val ? 1 : 0); - } - - /** @type {boolean} */ - get isClosing() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_CLOSING); - } - - /** @type {boolean} */ - get isGracefulClose() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_GRACEFUL_CLOSE); - } - - /** @type {boolean} */ - get isSilentClose() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_SILENT_CLOSE); - } - - /** @type {boolean} */ - get isStatelessReset() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_STATELESS_RESET); - } - - /** @type {boolean} */ - get isDestroyed() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_DESTROYED); - } - - /** @type {boolean} */ - get isHandshakeCompleted() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_HANDSHAKE_COMPLETED); - } - - /** @type {boolean} */ - get isHandshakeConfirmed() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_HANDSHAKE_CONFIRMED); - } - - /** @type {boolean} */ - get isStreamOpenAllowed() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_STREAM_OPEN_ALLOWED); - } - - /** @type {boolean} */ - get isPrioritySupported() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_PRIORITY_SUPPORTED); - } - - /** @type {boolean} */ - get isWrapped() { - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_WRAPPED); - } - - /** @type {bigint} */ - get lastDatagramId() { - return DataViewPrototypeGetBigUint64(this.#handle, IDX_STATE_SESSION_LAST_DATAGRAM_ID); - } - - toJSON() { - return { - __proto__: null, - hasPathValidationListener: this.hasPathValidationListener, - hasVersionNegotiationListener: this.hasVersionNegotiationListener, - hasDatagramListener: this.hasDatagramListener, - hasSessionTicketListener: this.hasSessionTicketListener, - isClosing: this.isClosing, - isGracefulClose: this.isGracefulClose, - isSilentClose: this.isSilentClose, - isStatelessReset: this.isStatelessReset, - isDestroyed: this.isDestroyed, - isHandshakeCompleted: this.isHandshakeCompleted, - isHandshakeConfirmed: this.isHandshakeConfirmed, - isStreamOpenAllowed: this.isStreamOpenAllowed, - isPrioritySupported: this.isPrioritySupported, - isWrapped: this.isWrapped, - lastDatagramId: `${this.lastDatagramId}`, - }; - } - - [kInspect](depth, options) { - if (depth < 0) - return this; - - const opts = { - ...options, - depth: options.depth == null ? null : options.depth - 1, - }; - - return `SessionState ${inspect({ - hasPathValidationListener: this.hasPathValidationListener, - hasVersionNegotiationListener: this.hasVersionNegotiationListener, - hasDatagramListener: this.hasDatagramListener, - hasSessionTicketListener: this.hasSessionTicketListener, - isClosing: this.isClosing, - isGracefulClose: this.isGracefulClose, - isSilentClose: this.isSilentClose, - isStatelessReset: this.isStatelessReset, - isDestroyed: this.isDestroyed, - isHandshakeCompleted: this.isHandshakeCompleted, - isHandshakeConfirmed: this.isHandshakeConfirmed, - isStreamOpenAllowed: this.isStreamOpenAllowed, - isPrioritySupported: this.isPrioritySupported, - isWrapped: this.isWrapped, - lastDatagramId: this.lastDatagramId, - }, opts)}`; - } - - [kFinishClose]() { - // Snapshot the state into a new DataView since the underlying - // buffer will be destroyed. - this.#handle = new DataView(new ArrayBuffer(0)); - } -} - -class Session { - /** @type {Endpoint} */ - #endpoint = undefined; - /** @type {boolean} */ - #isPendingClose = false; - /** @type {object|undefined} */ - #handle; - /** @type {PromiseWithResolvers} */ - #pendingClose; - /** @type {SocketAddress|undefined} */ - #remoteAddress = undefined; - /** @type {SessionState} */ - #state; - /** @type {SessionStats} */ - #stats; - /** @type {QuicStream[]} */ - #streams = []; - /** @type {OnStreamCallback} */ - #onstream; - /** @type {OnDatagramCallback|undefined} */ - #ondatagram; - /** @type {OnDatagramStatusCallback|undefined} */ - #ondatagramstatus; - /** @type {OnPathValidationCallback|undefined} */ - #onpathvalidation; - /** @type {OnSessionTicketCallback|undefined} */ - #onsessionticket; - /** @type {OnVersionNegotiationCallback|undefined} */ - #onversionnegotiation; - /** @type {OnHandshakeCallback} */ - #onhandshake; - /** @type {StreamCallbackConfiguration} */ - #streamConfig; - - /** - * @param {SessionCallbackConfiguration} config - * @param {StreamCallbackConfiguration} streamConfig - * @param {object} [handle] - * @param {Endpoint} [endpoint] - */ - constructor(config, streamConfig, handle, endpoint) { - validateObject(config, 'config'); - this.#stats = new SessionStats(handle.stats); - this.#state = new SessionState(handle.state); + // The config should have already been validated by the QuicEndpoing const { ondatagram, ondatagramstatus, @@ -1402,37 +793,47 @@ class Session { onsessionticket, onstream, onversionnegotiation, + stream, } = config; + if (ondatagram !== undefined) { - validateFunction(ondatagram, 'config.ondatagram'); - validateFunction(ondatagramstatus, 'config.ondatagramstatus'); - this.#state.hasDatagramListener = true; - this.#ondatagram = ondatagram; - this.#ondatagramstatus = ondatagramstatus; + this.#ondatagram = ondatagram.bind(this); + } + if (ondatagramstatus !== undefined) { + this.#ondatagramstatus = ondatagramstatus.bind(this); } - validateFunction(onhandshake, 'config.onhandshake'); if (onpathvalidation !== undefined) { - validateFunction(onpathvalidation, 'config.onpathvalidation'); - this.#state.hasPathValidationListener = true; - this.#onpathvalidation = onpathvalidation; + this.#onpathvalidation = onpathvalidation.bind(this); } if (onsessionticket !== undefined) { - validateFunction(onsessionticket, 'config.onsessionticket'); - this.#state.hasSessionTicketListener = true; - this.#onsessionticket = onsessionticket; + this.#onsessionticket = onsessionticket.bind(this); } - validateFunction(onstream, 'config.onstream'); if (onversionnegotiation !== undefined) { - validateFunction(onversionnegotiation, 'config.onversionnegotiation'); - this.#state.hasVersionNegotiationListener = true; - this.#onversionnegotiation = onversionnegotiation; + this.#onversionnegotiation = onversionnegotiation.bind(this); + } + if (onhandshake !== undefined) { + this.#onhandshake = onhandshake.bind(this); + } + + // It is ok for onstream to be undefined if the session is not expecting + // or wanting to receive incoming streams. If a stream is received and + // no onstream callback is specified, a warning will be emitted and the + // stream will just be immediately destroyed. + if (onstream !== undefined) { + this.#onstream = onstream.bind(this); } - this.#onhandshake = onhandshake; - this.#onstream = onstream; - this.#handle = handle; this.#endpoint = endpoint; - this.#pendingClose = Promise.withResolvers(); // eslint-disable-line node-core/prefer-primordials + this.#streamConfig = stream; + + this.#handle = handle; this.#handle[kOwner] = this; + this.#stats = new QuicSessionStats(kPrivateConstructor, handle.stats); + + this.#state = new QuicSessionState(kPrivateConstructor, handle.state); + this.#state.hasDatagramListener = !!ondatagram; + this.#state.hasPathValidationListener = !!onpathvalidation; + this.#state.hasSessionTicketListener = !!onsessionticket; + this.#state.hasVersionNegotiationListener = !!onversionnegotiation; } /** @type {boolean} */ @@ -1440,18 +841,21 @@ class Session { return this.#handle === undefined || this.#isPendingClose; } - /** @type {SessionStats} */ + /** @type {QuicSessionStats} */ get stats() { return this.#stats; } - /** @type {SessionState} */ + /** @type {QuicSessionState} */ get state() { return this.#state; } - /** @type {Endpoint} */ + /** @type {QuicEndpoint} */ get endpoint() { return this.#endpoint; } - /** @type {Path} */ + /** + * The path is the local and remote addresses of the session. + * @type {Path} + */ get path() { - if (this.#isClosedOrClosing) return undefined; + if (this.destroyed) return undefined; if (this.#remoteAddress === undefined) { const addr = this.#handle.getRemoteAddress(); if (addr !== undefined) { @@ -1478,28 +882,85 @@ class Session { if (handle === undefined) { throw new ERR_QUIC_OPEN_STREAM_FAILED(); } - const stream = new QuicStream(this.#streamConfig, handle, this, 0); - ArrayPrototypePush(this.#streams, stream); + const stream = new QuicStream(kPrivateConstructor, this.#streamConfig, handle, + this, 0 /* Bidirectional */); + this.#streams.add(stream); + + if (onSessionOpenStreamChannel.hasSubscribers) { + onSessionOpenStreamChannel.publish({ + stream, + session: this, + }); + } + return stream; + } + + /** + * @returns {QuicStream} + */ + openUnidirectionalStream() { + if (this.#isClosedOrClosing) { + throw new ERR_INVALID_STATE('Session is closed'); + } + if (!this.state.isStreamOpenAllowed) { + throw new ERR_QUIC_OPEN_STREAM_FAILED(); + } + const handle = this.#handle.openStream(STREAM_DIRECTION_UNIDIRECTIONAL); + if (handle === undefined) { + throw new ERR_QUIC_OPEN_STREAM_FAILED(); + } + const stream = new QuicStream(kPrivateConstructor, this.#streamConfig, handle, + this, 1 /* Unidirectional */); + this.#streams.add(stream); + + if (onSessionOpenStreamChannel.hasSubscribers) { + onSessionOpenStreamChannel.publish({ + stream, + session: this, + }); + } + return stream; } /** - * @returns {QuicStream} + * Send a datagram. The id of the sent datagram will be returned. The status + * of the sent datagram will be reported via the datagram-status event if + * possible. + * + * If a string is given it will be encoded as UTF-8. + * + * If an ArrayBufferView is given, the view will be copied. + * @param {ArrayBufferView|string} datagram The datagram payload + * @returns {bigint} The datagram ID */ - openUnidirectionalStream() { + sendDatagram(datagram) { if (this.#isClosedOrClosing) { throw new ERR_INVALID_STATE('Session is closed'); } - if (!this.state.isStreamOpenAllowed) { - throw new ERR_QUIC_OPEN_STREAM_FAILED(); + if (typeof datagram === 'string') { + datagram = Buffer.from(datagram, 'utf8'); + } else { + if (!isArrayBufferView(datagram)) { + throw new ERR_INVALID_ARG_TYPE('datagram', + ['ArrayBufferView', 'string'], + datagram); + } + datagram = new Uint8Array(ArrayBufferPrototypeTransfer(datagram.buffer), + datagram.byteOffset, + datagram.byteLength); } - const handle = this.#handle.openStream(STREAM_DIRECTION_UNIDIRECTIONAL); - if (handle === undefined) { - throw new ERR_QUIC_OPEN_STREAM_FAILED(); + const id = this.#handle.sendDatagram(datagram); + + if (onSessionSendDatagramChannel.hasSubscribers) { + onSessionSendDatagramChannel.publish({ + id, + length: datagram.byteLength, + session: this, + }); } - const stream = new QuicStream(this.#streamConfig, handle, this, 1); - ArrayPrototypePush(this.#streams, stream); - return stream; + + return id; } /** @@ -1510,6 +971,11 @@ class Session { throw new ERR_INVALID_STATE('Session is closed'); } this.#handle.updateKey(); + if (onSessionUpdateKeyChannel.hasSubscribers) { + onSessionUpdateKeyChannel.publish({ + session: this, + }); + } } /** @@ -1526,6 +992,11 @@ class Session { if (!this.#isClosedOrClosing) { this.#isPendingClose = true; this.#handle?.gracefulClose(); + if (onSessionClosingChannel.hasSubscribers) { + onSessionClosingChannel.publish({ + session: this, + }); + } } return this.closed; } @@ -1547,39 +1018,98 @@ class Session { * the closed promise will be rejected with that error. If no error is given, * the closed promise will be resolved. * @param {any} error + * @return {Promise} Returns this.closed */ destroy(error) { - // TODO(@jasnell): Implement. + if (this.destroyed) return; + // First, forcefully and immediately destroy all open streams, if any. + for (const stream of this.#streams) { + stream.destroy(error); + } + // The streams should remove themselves when they are destroyed but let's + // be doubly sure. + if (this.#streams.size) { + process.emitWarning( + `The session is destroyed with ${this.#streams.size} active streams. ` + + 'This should not happen and indicates a bug in Node.js. Please open an ' + + 'issue in the Node.js GitHub repository at https://github.com/nodejs/node ' + + 'to report the problem.', + ); + } + this.#streams.clear(); + + // Remove this session immediately from the endpoint + this.#endpoint[kRemoveSession](this); + this.#endpoint = undefined; + this.#isPendingClose = false; + + if (error) { + // If the session is still waiting to be closed, and error + // is specified, reject the closed promise. + this.#pendingClose.reject?.(error); + } else { + this.#pendingClose.resolve?.(); + } + this.#pendingClose.reject = undefined; + this.#pendingClose.resolve = undefined; + + this.#remoteAddress = undefined; + this.#state[kFinishClose](); + this.#stats[kFinishClose](); + + this.#onstream = undefined; + this.#ondatagram = undefined; + this.#ondatagramstatus = undefined; + this.#onpathvalidation = undefined; + this.#onsessionticket = undefined; + this.#onversionnegotiation = undefined; + this.#onhandshake = undefined; + this.#streamConfig = undefined; + + // Destroy the underlying C++ handle + this.#handle.destroy(); + this.#handle = undefined; + + if (onSessionClosedChannel.hasSubscribers) { + onSessionClosedChannel.publish({ + session: this, + }); + } + + return this.closed; } /** - * Send a datagram. The id of the sent datagram will be returned. The status - * of the sent datagram will be reported via the datagram-status event if - * possible. - * - * If a string is given it will be encoded as UTF-8. - * - * If an ArrayBufferView is given, the view will be copied. - * @param {ArrayBufferView|string} datagram The datagram payload - * @returns {bigint} The datagram ID + * @param {number} errorType + * @param {number} code + * @param {string} [reason] */ - sendDatagram(datagram) { - if (this.#isClosedOrClosing) { - throw new ERR_INVALID_STATE('Session is closed'); + [kFinishClose](errorType, code, reason) { + // If code is zero, then we closed without an error. Yay! We can destroy + // safely without specifying an error. + if (code === 0) { + this.destroy(); + return; } - if (typeof datagram === 'string') { - datagram = Buffer.from(datagram, 'utf8'); - } else { - if (!isArrayBufferView(datagram)) { - throw new ERR_INVALID_ARG_TYPE('datagram', - ['ArrayBufferView', 'string'], - datagram); + + // Otherwise, errorType indicates the type of error that occurred, code indicates + // the specific error, and reason is an optional string describing the error. + switch (errorType) { + case 0: /* Transport Error */ + this.destroy(new ERR_QUIC_TRANSPORT_ERROR(code, reason)); + break; + case 1: /* Application Error */ + this.destroy(new ERR_QUIC_APPLICATION_ERROR(code, reason)); + break; + case 2: /* Version Negotiation Error */ + this.destroy(new ERR_QUIC_VERSION_NEGOTIATION_ERROR()); + break; + case 3: /* Idle close */ { + // An idle close is not really an error. We can just destroy. + this.destroy(); + break; } - datagram = new Uint8Array(datagram.buffer.transfer(), - datagram.byteOffset, - datagram.byteLength); } - return this.#handle.sendDatagram(datagram); } /** @@ -1587,8 +1117,19 @@ class Session { * @param {boolean} early */ [kDatagram](u8, early) { + // The datagram event should only be called if the session was created with + // an ondatagram callback. The callback should always exist here. + assert(this.#ondatagram, 'Unexpected datagram event'); if (this.destroyed) return; - this.#ondatagram(u8, this, early); + this.#ondatagram(u8, early); + + if (onSessionReceiveDatagramChannel.hasSubscribers) { + onSessionReceiveDatagramChannel.publish({ + length: u8.byteLength, + early, + session: this, + }); + } } /** @@ -1597,7 +1138,17 @@ class Session { */ [kDatagramStatus](id, status) { if (this.destroyed) return; - this.#ondatagramstatus(id, status, this); + // The ondatagramstatus callback may not have been specified. That's ok. + // We'll just ignore the event in that case. + this.#ondatagramstatus?.(id, status); + + if (onSessionReceiveDatagramStatusChannel.hasSubscribers) { + onSessionReceiveDatagramStatusChannel.publish({ + id, + status, + session: this, + }); + } } /** @@ -1610,18 +1161,41 @@ class Session { */ [kPathValidation](result, newLocalAddress, newRemoteAddress, oldLocalAddress, oldRemoteAddress, preferredAddress) { + // The path validation event should only be called if the session was created + // with an onpathvalidation callback. The callback should always exist here. + assert(this.#onpathvalidation, 'Unexpected path validation event'); if (this.destroyed) return; this.#onpathvalidation(result, newLocalAddress, newRemoteAddress, - oldLocalAddress, oldRemoteAddress, preferredAddress, - this); + oldLocalAddress, oldRemoteAddress, preferredAddress); + + if (onSessionPathValidationChannel.hasSubscribers) { + onSessionPathValidationChannel.publish({ + result, + newLocalAddress, + newRemoteAddress, + oldLocalAddress, + oldRemoteAddress, + preferredAddress, + session: this, + }); + } } /** * @param {object} ticket */ [kSessionTicket](ticket) { + // The session ticket event should only be called if the session was created + // with an onsessionticket callback. The callback should always exist here. + assert(this.#onsessionticket, 'Unexpected session ticket event'); if (this.destroyed) return; - this.#onsessionticket(ticket, this); + this.#onsessionticket(ticket); + if (onSessionTicketChannel.hasSubscribers) { + onSessionTicketChannel.publish({ + ticket, + session: this, + }); + } } /** @@ -1630,8 +1204,21 @@ class Session { * @param {number[]} supportedVersions */ [kVersionNegotiation](version, requestedVersions, supportedVersions) { + // The version negotiation event should only be called if the session was + // created with an onversionnegotiation callback. The callback should always + // exist here. if (this.destroyed) return; - this.#onversionnegotiation(version, requestedVersions, supportedVersions, this); + this.#onversionnegotiation(version, requestedVersions, supportedVersions); + this.destroy(new ERR_QUIC_VERSION_NEGOTIATION_ERROR()); + + if (onSessionVersionNegotiationChannel.hasSubscribers) { + onSessionVersionNegotiationChannel.publish({ + version, + requestedVersions, + supportedVersions, + session: this, + }); + } } /** @@ -1646,8 +1233,23 @@ class Session { [kHandshake](sni, alpn, cipher, cipherVersion, validationErrorReason, validationErrorCode, earlyDataAccepted) { if (this.destroyed) return; - this.#onhandshake(sni, alpn, cipher, cipherVersion, validationErrorReason, - validationErrorCode, earlyDataAccepted, this); + // The onhandshake callback may not have been specified. That's ok. + // We'll just ignore the event in that case. + this.#onhandshake?.(sni, alpn, cipher, cipherVersion, validationErrorReason, + validationErrorCode, earlyDataAccepted); + + if (onSessionHandshakeChannel.hasSubscribers) { + onSessionHandshakeChannel.publish({ + sni, + alpn, + cipher, + cipherVersion, + validationErrorReason, + validationErrorCode, + earlyDataAccepted, + session: this, + }); + } } /** @@ -1655,9 +1257,30 @@ class Session { * @param {number} direction */ [kNewStream](handle, direction) { - const stream = new QuicStream(this.#streamConfig, handle, this, direction); - ArrayPrototypePush(this.#streams, stream); - this.#onstream(stream, this); + const stream = new QuicStream(kPrivateConstructor, this.#streamConfig, handle, + this, direction); + + // A new stream was received. If we don't have an onstream callback, then + // there's nothing we can do about it. Destroy the stream in this case. + if (this.#onstream === undefined) { + process.emitWarning('A new stream was received but no onstream callback was provided'); + stream.destroy(); + return; + } + this.#streams.add(stream); + + this.#onstream(stream); + + if (onSessionReceivedStreamChannel.hasSubscribers) { + onSessionReceivedStreamChannel.publish({ + stream, + session: this, + }); + } + } + + [kRemoveStream](stream) { + this.#streams.delete(stream); } [kInspect](depth, options) { @@ -1669,7 +1292,7 @@ class Session { depth: options.depth == null ? null : options.depth - 1, }; - return `Session ${inspect({ + return `QuicSession ${inspect({ closed: this.closed, closing: this.#isPendingClose, destroyed: this.destroyed, @@ -1681,331 +1304,189 @@ class Session { }, opts)}`; } - [kFinishClose](errorType, code, reason) { - // TODO(@jasnell): Finish the implementation - } - async [SymbolAsyncDispose]() { await this.close(); } } -class EndpointStats { - /** @type {BigUint64Array} */ +class QuicEndpoint { + /** + * The local socket address on which the endpoint is listening (lazily created) + * @type {SocketAddress|undefined} + */ + #address = undefined; + /** + * When true, the endpoint has been marked busy and is temporarily not accepting + * new sessions (only used when the Endpoint is acting as a server) + * @type {boolean} + */ + #busy = false; + /** + * The underlying C++ handle for the endpoint. When undefined the endpoint is + * considered to be closed. + * @type {object} + */ #handle; - /** - * @param {ArrayBuffer} buffer + * True if endpoint.close() has been called and the [kFinishClose] method has + * not yet been called. + * @type {boolean} */ - constructor(buffer) { - if (!isArrayBuffer(buffer)) { - throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); - } - this.#handle = new BigUint64Array(buffer); - } - - /** @type {bigint} */ - get createdAt() { - return this.#handle[IDX_STATS_ENDPOINT_CREATED_AT]; - } - - /** @type {bigint} */ - get destroyedAt() { - return this.#handle[IDX_STATS_ENDPOINT_DESTROYED_AT]; - } - - /** @type {bigint} */ - get bytesReceived() { - return this.#handle[IDX_STATS_ENDPOINT_BYTES_RECEIVED]; - } - - /** @type {bigint} */ - get bytesSent() { - return this.#handle[IDX_STATS_ENDPOINT_BYTES_SENT]; - } - - /** @type {bigint} */ - get packetsReceived() { - return this.#handle[IDX_STATS_ENDPOINT_PACKETS_RECEIVED]; - } - - /** @type {bigint} */ - get packetsSent() { - return this.#handle[IDX_STATS_ENDPOINT_PACKETS_SENT]; - } - - /** @type {bigint} */ - get serverSessions() { - return this.#handle[IDX_STATS_ENDPOINT_SERVER_SESSIONS]; - } - - /** @type {bigint} */ - get clientSessions() { - return this.#handle[IDX_STATS_ENDPOINT_CLIENT_SESSIONS]; - } - - /** @type {bigint} */ - get serverBusyCount() { - return this.#handle[IDX_STATS_ENDPOINT_SERVER_BUSY_COUNT]; - } - - /** @type {bigint} */ - get retryCount() { - return this.#handle[IDX_STATS_ENDPOINT_RETRY_COUNT]; - } - - /** @type {bigint} */ - get versionNegotiationCount() { - return this.#handle[IDX_STATS_ENDPOINT_VERSION_NEGOTIATION_COUNT]; - } + #isPendingClose = false; + /** + * True if the endpoint is acting as a server and actively listening for connections. + * @type {boolean} + */ + #listening = false; + /** + * A promise that is resolved when the endpoint has been closed (or rejected if + * the endpoint closes abruptly due to an error). + * @type {PromiseWithResolvers} + */ + #pendingClose = Promise.withResolvers(); // eslint-disable-line node-core/prefer-primordials + /** + * If destroy() is called with an error, the error is stored here and used to reject + * the pendingClose promise when [kFinishClose] is called. + * @type {any} + */ + #pendingError = undefined; + /** + * The collection of active sessions. + * @type {Set} + */ + #sessions = new SafeSet(); + /** + * The internal state of the endpoint. Used to efficiently track and update the + * state of the underlying c++ endpoint handle. + * @type {QuicEndpointState} + */ + #state; + /** + * The collected statistics for the endpoint. + * @type {QuicEndpointStats} + */ + #stats; + /** + * The user provided callback that is invoked when a new session is received. + * (used only when the endpoint is acting as a server) + * @type {OnSessionCallback} + */ + #onsession; + /** + * The callback configuration used for new sessions (client or server) + * @type {ProcessedSessionCallbackConfiguration} + */ + #sessionConfig; - /** @type {bigint} */ - get statelessResetCount() { - return this.#handle[IDX_STATS_ENDPOINT_STATELESS_RESET_COUNT]; - } + /** + * @param {EndpointCallbackConfiguration} config + * @returns {StreamCallbackConfiguration} + */ + #processStreamConfig(config) { + validateObject(config, 'config'); + const { + onblocked, + onreset, + onheaders, + ontrailers, + } = config; - /** @type {bigint} */ - get immediateCloseCount() { - return this.#handle[IDX_STATS_ENDPOINT_IMMEDIATE_CLOSE_COUNT]; - } + if (onblocked !== undefined) { + validateFunction(onblocked, 'config.onblocked'); + } + if (onreset !== undefined) { + validateFunction(onreset, 'config.onreset'); + } + if (onheaders !== undefined) { + validateFunction(onheaders, 'config.onheaders'); + } + if (ontrailers !== undefined) { + validateFunction(ontrailers, 'ontrailers'); + } - toJSON() { return { __proto__: null, - createdAt: `${this.createdAt}`, - destroyedAt: `${this.destroyedAt}`, - bytesReceived: `${this.bytesReceived}`, - bytesSent: `${this.bytesSent}`, - packetsReceived: `${this.packetsReceived}`, - packetsSent: `${this.packetsSent}`, - serverSessions: `${this.serverSessions}`, - clientSessions: `${this.clientSessions}`, - serverBusyCount: `${this.serverBusyCount}`, - retryCount: `${this.retryCount}`, - versionNegotiationCount: `${this.versionNegotiationCount}`, - statelessResetCount: `${this.statelessResetCount}`, - immediateCloseCount: `${this.immediateCloseCount}`, - }; - } - - [kInspect](depth, options) { - if (depth < 0) - return this; - - const opts = { - ...options, - depth: options.depth == null ? null : options.depth - 1, + onblocked, + onreset, + onheaders, + ontrailers, }; - - return `EndpointStats ${inspect({ - createdAt: this.createdAt, - destroyedAt: this.destroyedAt, - bytesReceived: this.bytesReceived, - bytesSent: this.bytesSent, - packetsReceived: this.packetsReceived, - packetsSent: this.packetsSent, - serverSessions: this.serverSessions, - clientSessions: this.clientSessions, - serverBusyCount: this.serverBusyCount, - retryCount: this.retryCount, - versionNegotiationCount: this.versionNegotiationCount, - statelessResetCount: this.statelessResetCount, - immediateCloseCount: this.immediateCloseCount, - }, opts)}`; - } - - [kFinishClose]() { - // Snapshot the stats into a new BigUint64Array since the underlying - // buffer will be destroyed. - this.#handle = new BigUint64Array(this.#handle); } -} - -class EndpointState { - /** @type {DataView} */ - #handle; /** - * @param {ArrayBuffer} buffer + * + * @param {EndpointCallbackConfiguration} config + * @returns {ProcessedSessionCallbackConfiguration} */ - constructor(buffer) { - if (!isArrayBuffer(buffer)) { - throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); + #processSessionConfig(config) { + validateObject(config, 'config'); + const { + onstream, + ondatagram, + ondatagramstatus, + onpathvalidation, + onsessionticket, + onversionnegotiation, + onhandshake, + } = config; + if (onstream !== undefined) { + validateFunction(onstream, 'config.onstream'); } - this.#handle = new DataView(buffer); - } - - #assertNotClosed() { - if (this.#handle.byteLength === 0) { - throw new ERR_INVALID_STATE('Endpoint is closed'); + if (ondatagram !== undefined) { + validateFunction(ondatagram, 'config.ondatagram'); + } + if (ondatagramstatus !== undefined) { + validateFunction(ondatagramstatus, 'config.ondatagramstatus'); + } + if (onpathvalidation !== undefined) { + validateFunction(onpathvalidation, 'config.onpathvalidation'); + } + if (onsessionticket !== undefined) { + validateFunction(onsessionticket, 'config.onsessionticket'); + } + if (onversionnegotiation !== undefined) { + validateFunction(onversionnegotiation, 'config.onversionnegotiation'); + } + if (onhandshake !== undefined) { + validateFunction(onhandshake, 'config.onhandshake'); } - } - - /** @type {boolean} */ - get isBound() { - this.#assertNotClosed(); - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_BOUND); - } - - /** @type {boolean} */ - get isReceiving() { - this.#assertNotClosed(); - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_RECEIVING); - } - - /** @type {boolean} */ - get isListening() { - this.#assertNotClosed(); - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_LISTENING); - } - - /** @type {boolean} */ - get isClosing() { - this.#assertNotClosed(); - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_CLOSING); - } - - /** @type {boolean} */ - get isBusy() { - this.#assertNotClosed(); - return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_BUSY); - } - - /** - * The number of underlying callbacks that are pending. If the session - * is closing, these are the number of callbacks that the session is - * waiting on before it can be closed. - * @type {bigint} - */ - get pendingCallbacks() { - this.#assertNotClosed(); - return DataViewPrototypeGetBigUint64(this.#handle, IDX_STATE_ENDPOINT_PENDING_CALLBACKS); - } - - toJSON() { - if (this.#handle.byteLength === 0) return {}; return { __proto__: null, - isBound: this.isBound, - isReceiving: this.isReceiving, - isListening: this.isListening, - isClosing: this.isClosing, - isBusy: this.isBusy, - pendingCallbacks: `${this.pendingCallbacks}`, + onstream, + ondatagram, + ondatagramstatus, + onpathvalidation, + onsessionticket, + onversionnegotiation, + onhandshake, + stream: this.#processStreamConfig(config), }; } - [kInspect](depth, options) { - if (depth < 0) - return this; + /** + * @param {EndpointCallbackConfiguration} config + * @returns {ProcessedEndpointCallbackConfiguration} + */ + #processEndpointConfig(config) { + validateObject(config, 'config'); + const { + onsession, + } = config; - if (this.#handle.byteLength === 0) { - return 'EndpointState { }'; + if (onsession !== undefined) { + validateFunction(config.onsession, 'config.onsession'); } - const opts = { - ...options, - depth: options.depth == null ? null : options.depth - 1, + return { + __proto__: null, + onsession, + session: this.#processSessionConfig(config), }; - - return `EndpointState ${inspect({ - isBound: this.isBound, - isReceiving: this.isReceiving, - isListening: this.isListening, - isClosing: this.isClosing, - isBusy: this.isBusy, - pendingCallbacks: this.pendingCallbacks, - }, opts)}`; - } - - [kFinishClose]() { - // Snapshot the state into a new DataView since the underlying - // buffer will be destroyed. - if (this.#handle.byteLength === 0) return; - this.#handle = new DataView(new ArrayBuffer(0)); } -} - -function validateStreamConfig(config, name = 'config') { - validateObject(config, name); - if (config.onerror !== undefined) - validateFunction(config.onerror, `${name}.onerror`); - if (config.onblocked !== undefined) - validateFunction(config.onblocked, `${name}.onblocked`); - if (config.onreset !== undefined) - validateFunction(config.onreset, `${name}.onreset`); - if (config.onheaders !== undefined) - validateFunction(config.onheaders, `${name}.onheaders`); - if (config.ontrailers !== undefined) - validateFunction(config.ontrailers, `${name}.ontrailers`); - return config; -} - -function validateSessionConfig(config, name = 'config') { - validateObject(config, name); - if (config.onstream !== undefined) - validateFunction(config.onstream, `${name}.onstream`); - if (config.ondatagram !== undefined) - validateFunction(config.ondatagram, `${name}.ondatagram`); - if (config.ondatagramstatus !== undefined) - validateFunction(config.ondatagramstatus, `${name}.ondatagramstatus`); - if (config.onpathvalidation !== undefined) - validateFunction(config.onpathvalidation, `${name}.onpathvalidation`); - if (config.onsessionticket !== undefined) - validateFunction(config.onsessionticket, `${name}.onsessionticket`); - if (config.onversionnegotiation !== undefined) - validateFunction(config.onversionnegotiation, `${name}.onversionnegotiation`); - if (config.onhandshake !== undefined) - validateFunction(config.onhandshake, `${name}.onhandshake`); - return config; -} - -function validateEndpointConfig(config) { - validateObject(config, 'config'); - validateFunction(config.onsession, 'config.onsession'); - if (config.session !== undefined) - validateSessionConfig(config.session, 'config.session'); - if (config.stream !== undefined) - validateStreamConfig(config.stream, 'config.stream'); - return config; -} - -class Endpoint { - /** @type {SocketAddress|undefined} */ - #address = undefined; - /** @type {boolean} */ - #busy = false; - /** @type {object} */ - #handle; - /** @type {boolean} */ - #isPendingClose = false; - /** @type {boolean} */ - #listening = false; - /** @type {PromiseWithResolvers} */ - #pendingClose; - /** @type {any} */ - #pendingError = undefined; - /** @type {Session[]} */ - #sessions = []; - /** @type {EndpointState} */ - #state; - /** @type {EndpointStats} */ - #stats; - /** @type {OnSessionCallback} */ - #onsession; - /** @type {SessionCallbackConfiguration} */ - #sessionConfig; - /** @type {StreamCallbackConfiguration */ - #streamConfig; /** - * @param {EndpointCallbackConfiguration} config - * @param {EndpointOptions} [options] + * @param {EndpointCallbackConfiguration} options + * @returns {EndpointOptions} */ - constructor(config, options = kEmptyObject) { - validateEndpointConfig(config); - this.#onsession = config.onsession; - this.#sessionConfig = config.session; - this.#streamConfig = config.stream; - + #processEndpointOptions(options) { validateObject(options, 'options'); let { address } = options; const { @@ -2044,8 +1525,7 @@ class Endpoint { } } - this.#pendingClose = Promise.withResolvers(); // eslint-disable-line node-core/prefer-primordials - this.#handle = new Endpoint_({ + return { __proto__: null, address: address?.[kSocketAddressHandle], retryTokenExpiration, @@ -2072,16 +1552,49 @@ class Endpoint { cc, resetTokenSecret, tokenSecret, - }); + }; + } + + #newSession(handle) { + const session = new QuicSession(kPrivateConstructor, this.#sessionConfig, handle, this); + this.#sessions.add(session); + return session; + } + + /** + * @param {EndpointCallbackConfiguration} config + */ + constructor(config = kEmptyObject) { + const { + onsession, + session, + } = this.#processEndpointConfig(config); + + // Note that the onsession callback is only used for server sessions. + // If the callback is not specified, calling listen() will fail but + // connect() can still be called. + if (onsession !== undefined) { + this.#onsession = onsession.bind(this); + } + this.#sessionConfig = session; + + this.#handle = new Endpoint_(this.#processEndpointOptions(config)); this.#handle[kOwner] = this; - this.#stats = new EndpointStats(this.#handle.stats); - this.#state = new EndpointState(this.#handle.state); + this.#stats = new QuicEndpointStats(kPrivateConstructor, this.#handle.stats); + this.#state = new QuicEndpointState(kPrivateConstructor, this.#handle.state); + + if (onEndpointCreatedChannel.hasSubscribers) { + onEndpointCreatedChannel.publish({ + endpoint: this, + config, + }); + } } - /** @type {EndpointStats} */ + /** @type {QuicEndpointStats} */ get stats() { return this.#stats; } - /** @type {EndpointState} */ + /** @type {QuicEndpointState} */ get state() { return this.#state; } get #isClosedOrClosing() { @@ -2103,11 +1616,16 @@ class Endpoint { throw new ERR_INVALID_STATE('Endpoint is closed'); } // The val is allowed to be any truthy value - val = !!val; // Non-op if there is no change - if (val !== this.#busy) { - this.#busy = val; + if (!!val !== this.#busy) { + this.#busy = !this.#busy; this.#handle.markBusy(this.#busy); + if (onEndpointBusyChangeChannel.hasSubscribers) { + onEndpointBusyChangeChannel.publish({ + endpoint: this, + busy: this.#busy, + }); + } } } @@ -2125,19 +1643,127 @@ class Endpoint { } /** - * Configures the endpoint to listen for incoming connections. - * @param {SessionOptions} [options] + * @param {TlsOptions} tls */ - listen(options = kEmptyObject) { - if (this.#isClosedOrClosing) { - throw new ERR_INVALID_STATE('Endpoint is closed'); + #processTlsOptions(tls) { + validateObject(tls, 'options.tls'); + const { + sni, + alpn, + ciphers = DEFAULT_CIPHERS, + groups = DEFAULT_GROUPS, + keylog = false, + verifyClient = false, + tlsTrace = false, + verifyPrivateKey = false, + keys, + certs, + ca, + crl, + } = tls; + + if (sni !== undefined) { + validateString(sni, 'options.tls.sni'); } - if (this.#listening) { - throw new ERR_INVALID_STATE('Endpoint is already listening'); + if (alpn !== undefined) { + validateString(alpn, 'options.tls.alpn'); + } + if (ciphers !== undefined) { + validateString(ciphers, 'options.tls.ciphers'); + } + if (groups !== undefined) { + validateString(groups, 'options.tls.groups'); + } + validateBoolean(keylog, 'options.tls.keylog'); + validateBoolean(verifyClient, 'options.tls.verifyClient'); + validateBoolean(tlsTrace, 'options.tls.tlsTrace'); + validateBoolean(verifyPrivateKey, 'options.tls.verifyPrivateKey'); + + if (certs !== undefined) { + const certInputs = ArrayIsArray(certs) ? certs : [certs]; + for (const cert of certInputs) { + if (!isArrayBufferView(cert) && !isArrayBuffer(cert)) { + throw new ERR_INVALID_ARG_TYPE('options.tls.certs', + ['ArrayBufferView', 'ArrayBuffer'], cert); + } + } } - validateObject(options, 'options'); + if (ca !== undefined) { + const caInputs = ArrayIsArray(ca) ? ca : [ca]; + for (const caCert of caInputs) { + if (!isArrayBufferView(caCert) && !isArrayBuffer(caCert)) { + throw new ERR_INVALID_ARG_TYPE('options.tls.ca', + ['ArrayBufferView', 'ArrayBuffer'], caCert); + } + } + } + + if (crl !== undefined) { + const crlInputs = ArrayIsArray(crl) ? crl : [crl]; + for (const crlCert of crlInputs) { + if (!isArrayBufferView(crlCert) && !isArrayBuffer(crlCert)) { + throw new ERR_INVALID_ARG_TYPE('options.tls.crl', + ['ArrayBufferView', 'ArrayBuffer'], crlCert); + } + } + } + const keyHandles = []; + if (keys !== undefined) { + const keyInputs = ArrayIsArray(keys) ? keys : [keys]; + for (const key of keyInputs) { + if (isKeyObject(key)) { + if (key.type !== 'private') { + throw new ERR_INVALID_ARG_VALUE('options.tls.keys', key, 'must be a private key'); + } + ArrayPrototypePush(keyHandles, key[kKeyObjectHandle]); + } else if (isCryptoKey(key)) { + if (key.type !== 'private') { + throw new ERR_INVALID_ARG_VALUE('options.tls.keys', key, 'must be a private key'); + } + ArrayPrototypePush(keyHandles, key[kKeyObjectInner][kKeyObjectHandle]); + } else { + throw new ERR_INVALID_ARG_TYPE('options.tls.keys', ['KeyObject', 'CryptoKey'], key); + } + } + } + + return { + __proto__: null, + sni, + alpn, + ciphers, + groups, + keylog, + verifyClient, + tlsTrace, + verifyPrivateKey, + keys: keyHandles, + certs, + ca, + crl, + }; + } + + /** + * @param {'use'|'ignore'|'default'} policy + * @returns {number} + */ + #getPreferredAddressPolicy(policy = 'default') { + switch (policy) { + case 'use': return PREFERRED_ADDRESS_USE; + case 'ignore': return PREFERRED_ADDRESS_IGNORE; + case 'default': return DEFAULT_PREFERRED_ADDRESS_POLICY; + } + throw new ERR_INVALID_ARG_VALUE('options.preferredAddressPolicy', policy); + } + + /** + * @param {SessionOptions} options + */ + #processSessionOptions(options) { + validateObject(options, 'options'); const { version, minVersion, @@ -2146,69 +1772,88 @@ class Endpoint { transportParams = kEmptyObject, tls = kEmptyObject, qlog = false, + sessionTicket, } = options; - this.#handle.listen({ + return { + __proto__: null, version, minVersion, - preferredAddressPolicy: getPreferredAddressPolicy(preferredAddressPolicy), + preferredAddressPolicy: this.#getPreferredAddressPolicy(preferredAddressPolicy), application, transportParams, - tls: processTlsOptions(tls), + tls: this.#processTlsOptions(tls), qlog, - }); + sessionTicket, + }; + } + + /** + * Configures the endpoint to listen for incoming connections. + * @param {SessionOptions} [options] + */ + listen(options = kEmptyObject) { + if (this.#isClosedOrClosing) { + throw new ERR_INVALID_STATE('Endpoint is closed'); + } + if (this.#onsession === undefined) { + throw new ERR_INVALID_STATE( + 'Endpoint is not configured to accept sessions. Specify an onsession ' + + 'callback when creating the endpoint', + ); + } + if (this.#listening) { + throw new ERR_INVALID_STATE('Endpoint is already listening'); + } + this.#handle.listen(this.#processSessionOptions(options)); this.#listening = true; + + if (onEndpointListeningChannel.hasSubscribers) { + onEndpointListeningChannel.publish({ + endpoint: this, + options, + }); + } } /** * Initiates a session with a remote endpoint. * @param {SocketAddress} address * @param {SessionOptions} [options] - * @returns {Session} + * @returns {QuicSession} */ connect(address, options = kEmptyObject) { if (this.#isClosedOrClosing) { throw new ERR_INVALID_STATE('Endpoint is closed'); } - if (this.#busy) { - throw new ERR_INVALID_STATE('Endpoint is busy'); - } - if (address === undefined || !SocketAddress.isSocketAddress(address)) { - if (typeof address === 'object' && address !== null) { - address = new SocketAddress(address); - } else { + if (!SocketAddress.isSocketAddress(address)) { + if (address == null || typeof address !== 'object') { throw new ERR_INVALID_ARG_TYPE('address', 'SocketAddress', address); } + address = new SocketAddress(address); } - validateObject(options, 'options'); - const { - version, - minVersion, - preferredAddressPolicy = 'default', - application = kEmptyObject, - transportParams = kEmptyObject, - tls = kEmptyObject, - qlog = false, - sessionTicket, - } = options; + const processedOptions = this.#processSessionOptions(options); + const { sessionTicket } = processedOptions; - const handle = this.#handle.connect(address[kSocketAddressHandle], { - version, - minVersion, - preferredAddressPolicy: getPreferredAddressPolicy(preferredAddressPolicy), - application, - transportParams, - tls: processTlsOptions(tls), - qlog, - }, sessionTicket); + const handle = this.#handle.connect(address[kSocketAddressHandle], + processedOptions, sessionTicket); if (handle === undefined) { throw new ERR_QUIC_CONNECTION_FAILED(); } - const session = new Session(this.#sessionConfig, this.#streamConfig, handle, this); - ArrayPrototypePush(this.#sessions, session); + const session = this.#newSession(handle); + + if (onEndpointClientSessionChannel.hasSubscribers) { + onEndpointClientSessionChannel.publish({ + endpoint: this, + session, + address, + options, + }); + } + return session; } @@ -2218,10 +1863,16 @@ class Endpoint { * not be accepted or created. The returned promise will be resolved when * closing is complete, or will be rejected if the endpoint is closed abruptly * due to an error. - * @returns {Promise} + * @returns {Promise} Returns this.closed */ close() { if (!this.#isClosedOrClosing) { + if (onEndpointClosingChannel.hasSubscribers) { + onEndpointClosingChannel.publish({ + endpoint: this, + hasPendingError: this.#pendingError !== undefined, + }); + } this.#isPendingClose = true; this.#handle?.closeGracefully(); } @@ -2239,12 +1890,22 @@ class Endpoint { /** @type {boolean} */ get destroyed() { return this.#handle === undefined; } + /** + * Return an iterator over all currently active sessions associated + * with this endpoint. + * @type {SetIterator} + */ + get sessions() { + return this.#sessions[SymbolIterator](); + } + /** * Forcefully terminates the endpoint by immediately destroying all sessions * after calling close. If an error is given, the closed promise will be * rejected with that error. If no error is given, the closed promise will * be resolved. - * @param {any} error + * @param {any} [error] + * @returns {Promise} Returns this.closed */ destroy(error) { if (!this.#isClosedOrClosing) { @@ -2258,6 +1919,7 @@ class Endpoint { for (const session of this.#sessions) { session.destroy(error); } + return this.closed; } ref() { @@ -2268,63 +1930,100 @@ class Endpoint { if (this.#handle !== undefined) this.#handle.ref(false); } - [kFinishClose](context, status) { - if (this.#handle === undefined) return; - this.#isPendingClose = false; - this.#address = undefined; - this.#busy = false; - this.#listening = false; - this.#isPendingClose = false; - this.#stats[kFinishClose](); - this.#state[kFinishClose](); - this.#sessions = []; - + #maybeGetCloseError(context, status) { switch (context) { case kCloseContextClose: { - if (this.#pendingError !== undefined) { - this.#pendingClose.reject(this.#pendingError); - } else { - this.#pendingClose.resolve(); - } - break; + return this.#pendingError; } case kCloseContextBindFailure: { - this.#pendingClose.reject( - new ERR_QUIC_ENDPOINT_CLOSED('Bind failure', status)); - break; + return new ERR_QUIC_ENDPOINT_CLOSED('Bind failure', status); } case kCloseContextListenFailure: { - this.#pendingClose.reject( - new ERR_QUIC_ENDPOINT_CLOSED('Listen failure', status)); - break; + return new ERR_QUIC_ENDPOINT_CLOSED('Listen failure', status); } case kCloseContextReceiveFailure: { - this.#pendingClose.reject( - new ERR_QUIC_ENDPOINT_CLOSED('Receive failure', status)); - break; + return new ERR_QUIC_ENDPOINT_CLOSED('Receive failure', status); } case kCloseContextSendFailure: { - this.#pendingClose.reject( - new ERR_QUIC_ENDPOINT_CLOSED('Send failure', status)); - break; + return new ERR_QUIC_ENDPOINT_CLOSED('Send failure', status); } case kCloseContextStartFailure: { - this.#pendingClose.reject( - new ERR_QUIC_ENDPOINT_CLOSED('Start failure', status)); - break; + return new ERR_QUIC_ENDPOINT_CLOSED('Start failure', status); } } + // Otherwise return undefined. + } - this.#pendingError = undefined; + [kFinishClose](context, status) { + if (this.#handle === undefined) return; + this.#handle = undefined; + this.#stats[kFinishClose](); + this.#state[kFinishClose](); + this.#address = undefined; + this.#busy = false; + this.#listening = false; + this.#isPendingClose = false; + + // As QuicSessions are closed they are expected to remove themselves + // from the sessions collection. Just in case they don't, let's force + // it by resetting the set so we don't leak memory. Let's emit a warning, + // tho, if the set is not empty at this point as that would indicate a + // bug in Node.js that should be fixed. + if (this.#sessions.size > 0) { + process.emitWarning( + `The endpoint is closed with ${this.#sessions.size} active sessions. ` + + 'This should not happen and indicates a bug in Node.js. Please open an ' + + 'issue in the Node.js GitHub repository at https://github.com/nodejs/node ' + + 'to report the problem.', + ); + } + this.#sessions.clear(); + + // If destroy was called with an error, then the this.#pendingError will be + // set. Or, if context indicates an error condition that caused the endpoint + // to be closed, the status will indicate the error code. In either case, + // we will reject the pending close promise at this point. + const maybeCloseError = this.#maybeGetCloseError(context, status); + if (maybeCloseError !== undefined) { + if (onEndpointErrorChannel.hasSubscribers) { + onEndpointErrorChannel.publish({ + endpoint: this, + error: maybeCloseError, + }); + } + this.#pendingClose.reject(maybeCloseError); + } else { + // Otherwise we are good to resolve the pending close promise! + this.#pendingClose.resolve(); + } + if (onEndpointClosedChannel.hasSubscribers) { + onEndpointClosedChannel.publish({ + endpoint: this, + }); + } + + // Note that we are intentionally not clearing the + // this.#pendingClose.promise here. this.#pendingClose.resolve = undefined; this.#pendingClose.reject = undefined; - this.#handle = undefined; + this.#pendingError = undefined; } [kNewSession](handle) { - const session = new Session(this.#sessionConfig, this.#streamConfig, handle, this); - ArrayPrototypePush(this.#sessions, session); - this.#onsession(session, this); + const session = this.#newSession(handle); + if (onEndpointServerSessionChannel.hasSubscribers) { + onEndpointServerSessionChannel.publish({ + endpoint: this, + session, + }); + } + this.#onsession(session); + } + + // Called by the QuicSession when it closes to remove itself from + // the active sessions tracked by the QuicEndpoint. + [kRemoveSession](session) { + this.#sessions.delete(session); } async [SymbolAsyncDispose]() { await this.close(); } @@ -2338,7 +2037,7 @@ class Endpoint { depth: options.depth == null ? null : options.depth - 1, }; - return `Endpoint ${inspect({ + return `QuicEndpoint ${inspect({ address: this.address, busy: this.busy, closed: this.closed, @@ -2352,197 +2051,39 @@ class Endpoint { } }; -ObjectDefineProperties(QuicStreamStats.prototype, { - createdAt: kEnumerable, - receivedAt: kEnumerable, - ackedAt: kEnumerable, - closingAt: kEnumerable, - destroyedAt: kEnumerable, - bytesReceived: kEnumerable, - bytesSent: kEnumerable, - maxOffset: kEnumerable, - maxOffsetAcknowledged: kEnumerable, - maxOffsetReceived: kEnumerable, - finalSize: kEnumerable, -}); -ObjectDefineProperties(QuicStreamState.prototype, { - id: kEnumerable, - finSent: kEnumerable, - finReceived: kEnumerable, - readEnded: kEnumerable, - writeEnded: kEnumerable, - destroyed: kEnumerable, - paused: kEnumerable, - reset: kEnumerable, - hasReader: kEnumerable, - wantsBlock: kEnumerable, - wantsHeaders: kEnumerable, - wantsReset: kEnumerable, - wantsTrailers: kEnumerable, -}); -ObjectDefineProperties(QuicStream.prototype, { - stats: kEnumerable, - state: kEnumerable, - session: kEnumerable, - id: kEnumerable, -}); -ObjectDefineProperties(SessionStats.prototype, { - createdAt: kEnumerable, - closingAt: kEnumerable, - destroyedAt: kEnumerable, - handshakeCompletedAt: kEnumerable, - handshakeConfirmedAt: kEnumerable, - gracefulClosingAt: kEnumerable, - bytesReceived: kEnumerable, - bytesSent: kEnumerable, - bidiInStreamCount: kEnumerable, - bidiOutStreamCount: kEnumerable, - uniInStreamCount: kEnumerable, - uniOutStreamCount: kEnumerable, - lossRetransmitCount: kEnumerable, - maxBytesInFlights: kEnumerable, - bytesInFlight: kEnumerable, - blockCount: kEnumerable, - cwnd: kEnumerable, - latestRtt: kEnumerable, - minRtt: kEnumerable, - rttVar: kEnumerable, - smoothedRtt: kEnumerable, - ssthresh: kEnumerable, - datagramsReceived: kEnumerable, - datagramsSent: kEnumerable, - datagramsAcknowledged: kEnumerable, - datagramsLost: kEnumerable, -}); -ObjectDefineProperties(SessionState.prototype, { - hasPathValidationListener: kEnumerable, - hasVersionNegotiationListener: kEnumerable, - hasDatagramListener: kEnumerable, - hasSessionTicketListener: kEnumerable, - isClosing: kEnumerable, - isGracefulClose: kEnumerable, - isSilentClose: kEnumerable, - isStatelessReset: kEnumerable, - isDestroyed: kEnumerable, - isHandshakeCompleted: kEnumerable, - isHandshakeConfirmed: kEnumerable, - isStreamOpenAllowed: kEnumerable, - isPrioritySupported: kEnumerable, - isWrapped: kEnumerable, - lastDatagramId: kEnumerable, -}); -ObjectDefineProperties(Session.prototype, { - closed: kEnumerable, - destroyed: kEnumerable, - endpoint: kEnumerable, - path: kEnumerable, - state: kEnumerable, - stats: kEnumerable, -}); -ObjectDefineProperties(EndpointStats.prototype, { - createdAt: kEnumerable, - destroyedAt: kEnumerable, - bytesReceived: kEnumerable, - bytesSent: kEnumerable, - packetsReceived: kEnumerable, - packetsSent: kEnumerable, - serverSessions: kEnumerable, - clientSessions: kEnumerable, - serverBusyCount: kEnumerable, - retryCount: kEnumerable, - versionNegotiationCount: kEnumerable, - statelessResetCount: kEnumerable, - immediateCloseCount: kEnumerable, -}); -ObjectDefineProperties(EndpointState.prototype, { - isBound: kEnumerable, - isReceiving: kEnumerable, - isListening: kEnumerable, - isClosing: kEnumerable, - isBusy: kEnumerable, - pendingCallbacks: kEnumerable, -}); -ObjectDefineProperties(Endpoint.prototype, { - address: kEnumerable, - busy: kEnumerable, - closed: kEnumerable, - destroyed: kEnumerable, - state: kEnumerable, - stats: kEnumerable, -}); -ObjectDefineProperties(Endpoint, { - CC_ALGO_RENO: { - __proto__: null, - value: CC_ALGO_RENO, - writable: false, - configurable: false, - enumerable: true, - }, - CC_ALGO_CUBIC: { - __proto__: null, - value: CC_ALGO_CUBIC, - writable: false, - configurable: false, - enumerable: true, - }, - CC_ALGO_BBR: { - __proto__: null, - value: CC_ALGO_BBR, - writable: false, - configurable: false, - enumerable: true, - }, - CC_ALGO_RENO_STR: { - __proto__: null, - value: CC_ALGO_RENO_STR, - writable: false, - configurable: false, - enumerable: true, - }, - CC_ALGO_CUBIC_STR: { - __proto__: null, - value: CC_ALGO_CUBIC_STR, - writable: false, - configurable: false, - enumerable: true, - }, - CC_ALGO_BBR_STR: { +function readOnlyConstant(value) { + return { __proto__: null, - value: CC_ALGO_BBR_STR, + value, writable: false, configurable: false, enumerable: true, - }, + }; +} + +ObjectDefineProperties(QuicEndpoint, { + CC_ALGO_RENO: readOnlyConstant(CC_ALGO_RENO), + CC_ALGO_CUBIC: readOnlyConstant(CC_ALGO_CUBIC), + CC_ALGO_BBR: readOnlyConstant(CC_ALGO_BBR), + CC_ALGP_RENO_STR: readOnlyConstant(CC_ALGO_RENO_STR), + CC_ALGO_CUBIC_STR: readOnlyConstant(CC_ALGO_CUBIC_STR), + CC_ALGO_BBR_STR: readOnlyConstant(CC_ALGO_BBR_STR), }); -ObjectDefineProperties(Session, { - DEFAULT_CIPHERS: { - __proto__: null, - value: DEFAULT_CIPHERS, - writable: false, - configurable: false, - enumerable: true, - }, - DEFAULT_GROUPS: { - __proto__: null, - value: DEFAULT_GROUPS, - writable: false, - configurable: false, - enumerable: true, - }, +ObjectDefineProperties(QuicSession, { + DEFAULT_CIPHERS: readOnlyConstant(DEFAULT_CIPHERS), + DEFAULT_GROUPS: readOnlyConstant(DEFAULT_GROUPS), }); module.exports = { - Endpoint, - Session, + QuicEndpoint, + QuicSession, QuicStream, - // Exported for testing - kFinishClose, - SessionState, - SessionStats, + QuicSessionState, + QuicSessionStats, QuicStreamState, QuicStreamStats, - EndpointState, - EndpointStats, + QuicEndpointState, + QuicEndpointStats, }; /* c8 ignore stop */ diff --git a/lib/internal/quic/state.js b/lib/internal/quic/state.js new file mode 100644 index 00000000000000..8bfb2ac83302fb --- /dev/null +++ b/lib/internal/quic/state.js @@ -0,0 +1,566 @@ +'use strict'; + +const { + ArrayBuffer, + DataView, + DataViewPrototypeGetBigInt64, + DataViewPrototypeGetBigUint64, + DataViewPrototypeGetUint8, + DataViewPrototypeSetUint8, + JSONStringify, +} = primordials; + +const { + codes: { + ERR_ILLEGAL_CONSTRUCTOR, + ERR_INVALID_ARG_TYPE, + ERR_INVALID_STATE, + }, +} = require('internal/errors'); + +const { + isArrayBuffer, +} = require('util/types'); + +const { inspect } = require('internal/util/inspect'); + +const { + kFinishClose, + kInspect, + kPrivateConstructor, +} = require('internal/quic/symbols'); + +// This file defines the helper objects for accessing state for +// various QUIC objects. Each of these wraps a DataView. +// Some of the state properties are read only, others are mutable. +// An ArrayBuffer is shared with the C++ level to allow for more +// efficient communication of state across the C++/JS boundary. +// When the state object is no longer needed, it is closed to +// prevent further updates to the buffer. + +const { + IDX_STATE_SESSION_PATH_VALIDATION, + IDX_STATE_SESSION_VERSION_NEGOTIATION, + IDX_STATE_SESSION_DATAGRAM, + IDX_STATE_SESSION_SESSION_TICKET, + IDX_STATE_SESSION_CLOSING, + IDX_STATE_SESSION_GRACEFUL_CLOSE, + IDX_STATE_SESSION_SILENT_CLOSE, + IDX_STATE_SESSION_STATELESS_RESET, + IDX_STATE_SESSION_DESTROYED, + IDX_STATE_SESSION_HANDSHAKE_COMPLETED, + IDX_STATE_SESSION_HANDSHAKE_CONFIRMED, + IDX_STATE_SESSION_STREAM_OPEN_ALLOWED, + IDX_STATE_SESSION_PRIORITY_SUPPORTED, + IDX_STATE_SESSION_WRAPPED, + IDX_STATE_SESSION_LAST_DATAGRAM_ID, + + IDX_STATE_ENDPOINT_BOUND, + IDX_STATE_ENDPOINT_RECEIVING, + IDX_STATE_ENDPOINT_LISTENING, + IDX_STATE_ENDPOINT_CLOSING, + IDX_STATE_ENDPOINT_BUSY, + IDX_STATE_ENDPOINT_PENDING_CALLBACKS, + + IDX_STATE_STREAM_ID, + IDX_STATE_STREAM_FIN_SENT, + IDX_STATE_STREAM_FIN_RECEIVED, + IDX_STATE_STREAM_READ_ENDED, + IDX_STATE_STREAM_WRITE_ENDED, + IDX_STATE_STREAM_DESTROYED, + IDX_STATE_STREAM_PAUSED, + IDX_STATE_STREAM_RESET, + IDX_STATE_STREAM_HAS_READER, + IDX_STATE_STREAM_WANTS_BLOCK, + IDX_STATE_STREAM_WANTS_HEADERS, + IDX_STATE_STREAM_WANTS_RESET, + IDX_STATE_STREAM_WANTS_TRAILERS, +} = internalBinding('quic'); + +class QuicEndpointState { + /** @type {DataView} */ + #handle; + + /** + * @param {symbol} privateSymbol + * @param {ArrayBuffer} buffer + */ + constructor(privateSymbol, buffer) { + if (privateSymbol !== kPrivateConstructor) { + throw new ERR_ILLEGAL_CONSTRUCTOR(); + } + if (!isArrayBuffer(buffer)) { + throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); + } + this.#handle = new DataView(buffer); + } + + #assertNotClosed() { + if (this.#handle.byteLength === 0) { + throw new ERR_INVALID_STATE('Endpoint is closed'); + } + } + + /** @type {boolean} */ + get isBound() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_BOUND); + } + + /** @type {boolean} */ + get isReceiving() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_RECEIVING); + } + + /** @type {boolean} */ + get isListening() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_LISTENING); + } + + /** @type {boolean} */ + get isClosing() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_CLOSING); + } + + /** @type {boolean} */ + get isBusy() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_ENDPOINT_BUSY); + } + + /** + * The number of underlying callbacks that are pending. If the session + * is closing, these are the number of callbacks that the session is + * waiting on before it can be closed. + * @type {bigint} + */ + get pendingCallbacks() { + this.#assertNotClosed(); + return DataViewPrototypeGetBigUint64(this.#handle, IDX_STATE_ENDPOINT_PENDING_CALLBACKS); + } + + toString() { + return JSONStringify(this.toJSON()); + } + + toJSON() { + if (this.#handle.byteLength === 0) return {}; + return { + __proto__: null, + isBound: this.isBound, + isReceiving: this.isReceiving, + isListening: this.isListening, + isClosing: this.isClosing, + isBusy: this.isBusy, + pendingCallbacks: `${this.pendingCallbacks}`, + }; + } + + [kInspect](depth, options) { + if (depth < 0) + return this; + + if (this.#handle.byteLength === 0) { + return 'QuicEndpointState { }'; + } + + const opts = { + ...options, + depth: options.depth == null ? null : options.depth - 1, + }; + + return `QuicEndpointState ${inspect({ + isBound: this.isBound, + isReceiving: this.isReceiving, + isListening: this.isListening, + isClosing: this.isClosing, + isBusy: this.isBusy, + pendingCallbacks: this.pendingCallbacks, + }, opts)}`; + } + + [kFinishClose]() { + // Snapshot the state into a new DataView since the underlying + // buffer will be destroyed. + if (this.#handle.byteLength === 0) return; + this.#handle = new DataView(new ArrayBuffer(0)); + } +} + +class QuicSessionState { + /** @type {DataView} */ + #handle; + + /** + * @param {symbol} privateSymbol + * @param {ArrayBuffer} buffer + */ + constructor(privateSymbol, buffer) { + if (privateSymbol !== kPrivateConstructor) { + throw new ERR_ILLEGAL_CONSTRUCTOR(); + } + if (!isArrayBuffer(buffer)) { + throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); + } + this.#handle = new DataView(buffer); + } + + #assertNotClosed() { + if (this.#handle.byteLength === 0) { + throw new ERR_INVALID_STATE('Session is closed'); + } + } + + /** @type {boolean} */ + get hasPathValidationListener() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_PATH_VALIDATION); + } + + /** @type {boolean} */ + set hasPathValidationListener(val) { + this.#assertNotClosed(); + DataViewPrototypeSetUint8(this.#handle, IDX_STATE_SESSION_PATH_VALIDATION, val ? 1 : 0); + } + + /** @type {boolean} */ + get hasVersionNegotiationListener() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_VERSION_NEGOTIATION); + } + + /** @type {boolean} */ + set hasVersionNegotiationListener(val) { + this.#assertNotClosed(); + DataViewPrototypeSetUint8(this.#handle, IDX_STATE_SESSION_VERSION_NEGOTIATION, val ? 1 : 0); + } + + /** @type {boolean} */ + get hasDatagramListener() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_DATAGRAM); + } + + /** @type {boolean} */ + set hasDatagramListener(val) { + this.#assertNotClosed(); + DataViewPrototypeSetUint8(this.#handle, IDX_STATE_SESSION_DATAGRAM, val ? 1 : 0); + } + + /** @type {boolean} */ + get hasSessionTicketListener() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_SESSION_TICKET); + } + + /** @type {boolean} */ + set hasSessionTicketListener(val) { + this.#assertNotClosed(); + DataViewPrototypeSetUint8(this.#handle, IDX_STATE_SESSION_SESSION_TICKET, val ? 1 : 0); + } + + /** @type {boolean} */ + get isClosing() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_CLOSING); + } + + /** @type {boolean} */ + get isGracefulClose() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_GRACEFUL_CLOSE); + } + + /** @type {boolean} */ + get isSilentClose() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_SILENT_CLOSE); + } + + /** @type {boolean} */ + get isStatelessReset() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_STATELESS_RESET); + } + + /** @type {boolean} */ + get isDestroyed() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_DESTROYED); + } + + /** @type {boolean} */ + get isHandshakeCompleted() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_HANDSHAKE_COMPLETED); + } + + /** @type {boolean} */ + get isHandshakeConfirmed() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_HANDSHAKE_CONFIRMED); + } + + /** @type {boolean} */ + get isStreamOpenAllowed() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_STREAM_OPEN_ALLOWED); + } + + /** @type {boolean} */ + get isPrioritySupported() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_PRIORITY_SUPPORTED); + } + + /** @type {boolean} */ + get isWrapped() { + this.#assertNotClosed(); + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_SESSION_WRAPPED); + } + + /** @type {bigint} */ + get lastDatagramId() { + this.#assertNotClosed(); + return DataViewPrototypeGetBigUint64(this.#handle, IDX_STATE_SESSION_LAST_DATAGRAM_ID); + } + + toString() { + return JSONStringify(this.toJSON()); + } + + toJSON() { + if (this.#handle.byteLength === 0) return {}; + return { + __proto__: null, + hasPathValidationListener: this.hasPathValidationListener, + hasVersionNegotiationListener: this.hasVersionNegotiationListener, + hasDatagramListener: this.hasDatagramListener, + hasSessionTicketListener: this.hasSessionTicketListener, + isClosing: this.isClosing, + isGracefulClose: this.isGracefulClose, + isSilentClose: this.isSilentClose, + isStatelessReset: this.isStatelessReset, + isDestroyed: this.isDestroyed, + isHandshakeCompleted: this.isHandshakeCompleted, + isHandshakeConfirmed: this.isHandshakeConfirmed, + isStreamOpenAllowed: this.isStreamOpenAllowed, + isPrioritySupported: this.isPrioritySupported, + isWrapped: this.isWrapped, + lastDatagramId: `${this.lastDatagramId}`, + }; + } + + [kInspect](depth, options) { + if (depth < 0) + return this; + + if (this.#handle.byteLength === 0) { + return 'QuicSessionState { }'; + } + + const opts = { + ...options, + depth: options.depth == null ? null : options.depth - 1, + }; + + return `QuicSessionState ${inspect({ + hasPathValidationListener: this.hasPathValidationListener, + hasVersionNegotiationListener: this.hasVersionNegotiationListener, + hasDatagramListener: this.hasDatagramListener, + hasSessionTicketListener: this.hasSessionTicketListener, + isClosing: this.isClosing, + isGracefulClose: this.isGracefulClose, + isSilentClose: this.isSilentClose, + isStatelessReset: this.isStatelessReset, + isDestroyed: this.isDestroyed, + isHandshakeCompleted: this.isHandshakeCompleted, + isHandshakeConfirmed: this.isHandshakeConfirmed, + isStreamOpenAllowed: this.isStreamOpenAllowed, + isPrioritySupported: this.isPrioritySupported, + isWrapped: this.isWrapped, + lastDatagramId: this.lastDatagramId, + }, opts)}`; + } + + [kFinishClose]() { + // Snapshot the state into a new DataView since the underlying + // buffer will be destroyed. + if (this.#handle.byteLength === 0) return; + this.#handle = new DataView(new ArrayBuffer(0)); + } +} + +class QuicStreamState { + /** @type {DataView} */ + #handle; + + /** + * @param {symbol} privateSymbol + * @param {ArrayBuffer} buffer + */ + constructor(privateSymbol, buffer) { + if (privateSymbol !== kPrivateConstructor) { + throw new ERR_ILLEGAL_CONSTRUCTOR(); + } + if (!isArrayBuffer(buffer)) { + throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); + } + this.#handle = new DataView(buffer); + } + + /** @type {bigint} */ + get id() { + return DataViewPrototypeGetBigInt64(this.#handle, IDX_STATE_STREAM_ID); + } + + /** @type {boolean} */ + get finSent() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_FIN_SENT); + } + + /** @type {boolean} */ + get finReceived() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_FIN_RECEIVED); + } + + /** @type {boolean} */ + get readEnded() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_READ_ENDED); + } + + /** @type {boolean} */ + get writeEnded() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WRITE_ENDED); + } + + /** @type {boolean} */ + get destroyed() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_DESTROYED); + } + + /** @type {boolean} */ + get paused() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_PAUSED); + } + + /** @type {boolean} */ + get reset() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_RESET); + } + + /** @type {boolean} */ + get hasReader() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_HAS_READER); + } + + /** @type {boolean} */ + get wantsBlock() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WANTS_BLOCK); + } + + /** @type {boolean} */ + set wantsBlock(val) { + DataViewPrototypeSetUint8(this.#handle, IDX_STATE_STREAM_WANTS_BLOCK, val ? 1 : 0); + } + + /** @type {boolean} */ + get wantsHeaders() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WANTS_HEADERS); + } + + /** @type {boolean} */ + set wantsHeaders(val) { + DataViewPrototypeSetUint8(this.#handle, IDX_STATE_STREAM_WANTS_HEADERS, val ? 1 : 0); + } + + /** @type {boolean} */ + get wantsReset() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WANTS_RESET); + } + + /** @type {boolean} */ + set wantsReset(val) { + DataViewPrototypeSetUint8(this.#handle, IDX_STATE_STREAM_WANTS_RESET, val ? 1 : 0); + } + + /** @type {boolean} */ + get wantsTrailers() { + return !!DataViewPrototypeGetUint8(this.#handle, IDX_STATE_STREAM_WANTS_TRAILERS); + } + + /** @type {boolean} */ + set wantsTrailers(val) { + DataViewPrototypeSetUint8(this.#handle, IDX_STATE_STREAM_WANTS_TRAILERS, val ? 1 : 0); + } + + toString() { + return JSONStringify(this.toJSON()); + } + + toJSON() { + if (this.#handle.byteLength === 0) return {}; + return { + __proto__: null, + id: `${this.id}`, + finSent: this.finSent, + finReceived: this.finReceived, + readEnded: this.readEnded, + writeEnded: this.writeEnded, + destroyed: this.destroyed, + paused: this.paused, + reset: this.reset, + hasReader: this.hasReader, + wantsBlock: this.wantsBlock, + wantsHeaders: this.wantsHeaders, + wantsReset: this.wantsReset, + wantsTrailers: this.wantsTrailers, + }; + } + + [kInspect](depth, options) { + if (depth < 0) + return this; + + if (this.#handle.byteLength === 0) { + return 'QuicStreamState { }'; + } + + const opts = { + ...options, + depth: options.depth == null ? null : options.depth - 1, + }; + + return `QuicStreamState ${inspect({ + id: this.id, + finSent: this.finSent, + finReceived: this.finReceived, + readEnded: this.readEnded, + writeEnded: this.writeEnded, + destroyed: this.destroyed, + paused: this.paused, + reset: this.reset, + hasReader: this.hasReader, + wantsBlock: this.wantsBlock, + wantsHeaders: this.wantsHeaders, + wantsReset: this.wantsReset, + wantsTrailers: this.wantsTrailers, + }, opts)}`; + } + + [kFinishClose]() { + // Snapshot the state into a new DataView since the underlying + // buffer will be destroyed. + if (this.#handle.byteLength === 0) return; + this.#handle = new DataView(new ArrayBuffer(0)); + } +} + +module.exports = { + QuicEndpointState, + QuicSessionState, + QuicStreamState, +}; diff --git a/lib/internal/quic/stats.js b/lib/internal/quic/stats.js new file mode 100644 index 00000000000000..3ac9523d9aeca4 --- /dev/null +++ b/lib/internal/quic/stats.js @@ -0,0 +1,646 @@ +'use strict'; + +const { + BigUint64Array, + JSONStringify, +} = primordials; + +const { + isArrayBuffer, +} = require('util/types'); + +const { + codes: { + ERR_ILLEGAL_CONSTRUCTOR, + ERR_INVALID_ARG_TYPE, + }, +} = require('internal/errors'); + +const { inspect } = require('internal/util/inspect'); + +const { + kFinishClose, + kInspect, + kPrivateConstructor, +} = require('internal/quic/symbols'); + +// This file defines the helper objects for accessing statistics collected +// by various QUIC objects. Each of these wraps a BigUint64Array. Every +// stats object is read-only via the API, and the underlying buffer is +// only updated by the QUIC internals. When the stats object is no longer +// needed, it is closed to prevent further updates to the buffer. + +const { + // All of the IDX_STATS_* constants are the index positions of the stats + // fields in the relevant BigUint64Array's that underlie the *Stats objects. + // These are not exposed to end users. + IDX_STATS_ENDPOINT_CREATED_AT, + IDX_STATS_ENDPOINT_DESTROYED_AT, + IDX_STATS_ENDPOINT_BYTES_RECEIVED, + IDX_STATS_ENDPOINT_BYTES_SENT, + IDX_STATS_ENDPOINT_PACKETS_RECEIVED, + IDX_STATS_ENDPOINT_PACKETS_SENT, + IDX_STATS_ENDPOINT_SERVER_SESSIONS, + IDX_STATS_ENDPOINT_CLIENT_SESSIONS, + IDX_STATS_ENDPOINT_SERVER_BUSY_COUNT, + IDX_STATS_ENDPOINT_RETRY_COUNT, + IDX_STATS_ENDPOINT_VERSION_NEGOTIATION_COUNT, + IDX_STATS_ENDPOINT_STATELESS_RESET_COUNT, + IDX_STATS_ENDPOINT_IMMEDIATE_CLOSE_COUNT, + + IDX_STATS_SESSION_CREATED_AT, + IDX_STATS_SESSION_CLOSING_AT, + IDX_STATS_SESSION_DESTROYED_AT, + IDX_STATS_SESSION_HANDSHAKE_COMPLETED_AT, + IDX_STATS_SESSION_HANDSHAKE_CONFIRMED_AT, + IDX_STATS_SESSION_GRACEFUL_CLOSING_AT, + IDX_STATS_SESSION_BYTES_RECEIVED, + IDX_STATS_SESSION_BYTES_SENT, + IDX_STATS_SESSION_BIDI_IN_STREAM_COUNT, + IDX_STATS_SESSION_BIDI_OUT_STREAM_COUNT, + IDX_STATS_SESSION_UNI_IN_STREAM_COUNT, + IDX_STATS_SESSION_UNI_OUT_STREAM_COUNT, + IDX_STATS_SESSION_LOSS_RETRANSMIT_COUNT, + IDX_STATS_SESSION_MAX_BYTES_IN_FLIGHT, + IDX_STATS_SESSION_BYTES_IN_FLIGHT, + IDX_STATS_SESSION_BLOCK_COUNT, + IDX_STATS_SESSION_CWND, + IDX_STATS_SESSION_LATEST_RTT, + IDX_STATS_SESSION_MIN_RTT, + IDX_STATS_SESSION_RTTVAR, + IDX_STATS_SESSION_SMOOTHED_RTT, + IDX_STATS_SESSION_SSTHRESH, + IDX_STATS_SESSION_DATAGRAMS_RECEIVED, + IDX_STATS_SESSION_DATAGRAMS_SENT, + IDX_STATS_SESSION_DATAGRAMS_ACKNOWLEDGED, + IDX_STATS_SESSION_DATAGRAMS_LOST, + + IDX_STATS_STREAM_CREATED_AT, + IDX_STATS_STREAM_RECEIVED_AT, + IDX_STATS_STREAM_ACKED_AT, + IDX_STATS_STREAM_CLOSING_AT, + IDX_STATS_STREAM_DESTROYED_AT, + IDX_STATS_STREAM_BYTES_RECEIVED, + IDX_STATS_STREAM_BYTES_SENT, + IDX_STATS_STREAM_MAX_OFFSET, + IDX_STATS_STREAM_MAX_OFFSET_ACK, + IDX_STATS_STREAM_MAX_OFFSET_RECV, + IDX_STATS_STREAM_FINAL_SIZE, +} = internalBinding('quic'); + +class QuicEndpointStats { + /** @type {BigUint64Array} */ + #handle; + /** @type {boolean} */ + #disconnected = false; + + /** + * @param {symbol} privateSymbol + * @param {ArrayBuffer} buffer + */ + constructor(privateSymbol, buffer) { + // We use the kPrivateConstructor symbol to restrict the ability to + // create new instances of QuicEndpointStats to internal code. + if (privateSymbol !== kPrivateConstructor) { + throw new ERR_ILLEGAL_CONSTRUCTOR(); + } + if (!isArrayBuffer(buffer)) { + throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); + } + this.#handle = new BigUint64Array(buffer); + } + + /** @type {bigint} */ + get createdAt() { + return this.#handle[IDX_STATS_ENDPOINT_CREATED_AT]; + } + + /** @type {bigint} */ + get destroyedAt() { + return this.#handle[IDX_STATS_ENDPOINT_DESTROYED_AT]; + } + + /** @type {bigint} */ + get bytesReceived() { + return this.#handle[IDX_STATS_ENDPOINT_BYTES_RECEIVED]; + } + + /** @type {bigint} */ + get bytesSent() { + return this.#handle[IDX_STATS_ENDPOINT_BYTES_SENT]; + } + + /** @type {bigint} */ + get packetsReceived() { + return this.#handle[IDX_STATS_ENDPOINT_PACKETS_RECEIVED]; + } + + /** @type {bigint} */ + get packetsSent() { + return this.#handle[IDX_STATS_ENDPOINT_PACKETS_SENT]; + } + + /** @type {bigint} */ + get serverSessions() { + return this.#handle[IDX_STATS_ENDPOINT_SERVER_SESSIONS]; + } + + /** @type {bigint} */ + get clientSessions() { + return this.#handle[IDX_STATS_ENDPOINT_CLIENT_SESSIONS]; + } + + /** @type {bigint} */ + get serverBusyCount() { + return this.#handle[IDX_STATS_ENDPOINT_SERVER_BUSY_COUNT]; + } + + /** @type {bigint} */ + get retryCount() { + return this.#handle[IDX_STATS_ENDPOINT_RETRY_COUNT]; + } + + /** @type {bigint} */ + get versionNegotiationCount() { + return this.#handle[IDX_STATS_ENDPOINT_VERSION_NEGOTIATION_COUNT]; + } + + /** @type {bigint} */ + get statelessResetCount() { + return this.#handle[IDX_STATS_ENDPOINT_STATELESS_RESET_COUNT]; + } + + /** @type {bigint} */ + get immediateCloseCount() { + return this.#handle[IDX_STATS_ENDPOINT_IMMEDIATE_CLOSE_COUNT]; + } + + toString() { + return JSONStringify(this.toJSON()); + } + + toJSON() { + return { + __proto__: null, + connected: this.isConnected, + // We need to convert the values to strings because JSON does not + // support BigInts. + createdAt: `${this.createdAt}`, + destroyedAt: `${this.destroyedAt}`, + bytesReceived: `${this.bytesReceived}`, + bytesSent: `${this.bytesSent}`, + packetsReceived: `${this.packetsReceived}`, + packetsSent: `${this.packetsSent}`, + serverSessions: `${this.serverSessions}`, + clientSessions: `${this.clientSessions}`, + serverBusyCount: `${this.serverBusyCount}`, + retryCount: `${this.retryCount}`, + versionNegotiationCount: `${this.versionNegotiationCount}`, + statelessResetCount: `${this.statelessResetCount}`, + immediateCloseCount: `${this.immediateCloseCount}`, + }; + } + + [kInspect](depth, options) { + if (depth < 0) + return this; + + const opts = { + ...options, + depth: options.depth == null ? null : options.depth - 1, + }; + + return `QuicEndpointStats ${inspect({ + connected: this.isConnected, + createdAt: this.createdAt, + destroyedAt: this.destroyedAt, + bytesReceived: this.bytesReceived, + bytesSent: this.bytesSent, + packetsReceived: this.packetsReceived, + packetsSent: this.packetsSent, + serverSessions: this.serverSessions, + clientSessions: this.clientSessions, + serverBusyCount: this.serverBusyCount, + retryCount: this.retryCount, + versionNegotiationCount: this.versionNegotiationCount, + statelessResetCount: this.statelessResetCount, + immediateCloseCount: this.immediateCloseCount, + }, opts)}`; + } + + /** + * True if this QuicEndpointStats object is still connected to the underlying + * Endpoint stats source. If this returns false, then the stats object is + * no longer being updated and should be considered stale. + * @returns {boolean} + */ + get isConnected() { + return !this.#disconnected; + } + + [kFinishClose]() { + // Snapshot the stats into a new BigUint64Array since the underlying + // buffer will be destroyed. + this.#handle = new BigUint64Array(this.#handle); + this.#disconnected = true; + } +} + +class QuicSessionStats { + /** @type {BigUint64Array} */ + #handle; + /** @type {boolean} */ + #disconnected = false; + + /** + * @param {symbol} privateSynbol + * @param {BigUint64Array} buffer + */ + constructor(privateSynbol, buffer) { + // We use the kPrivateConstructor symbol to restrict the ability to + // create new instances of QuicSessionStats to internal code. + if (privateSynbol !== kPrivateConstructor) { + throw new ERR_ILLEGAL_CONSTRUCTOR(); + } + if (!isArrayBuffer(buffer)) { + throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); + } + this.#handle = new BigUint64Array(buffer); + } + + /** @type {bigint} */ + get createdAt() { + return this.#handle[IDX_STATS_SESSION_CREATED_AT]; + } + + /** @type {bigint} */ + get closingAt() { + return this.#handle[IDX_STATS_SESSION_CLOSING_AT]; + } + + /** @type {bigint} */ + get destroyedAt() { + return this.#handle[IDX_STATS_SESSION_DESTROYED_AT]; + } + + /** @type {bigint} */ + get handshakeCompletedAt() { + return this.#handle[IDX_STATS_SESSION_HANDSHAKE_COMPLETED_AT]; + } + + /** @type {bigint} */ + get handshakeConfirmedAt() { + return this.#handle[IDX_STATS_SESSION_HANDSHAKE_CONFIRMED_AT]; + } + + /** @type {bigint} */ + get gracefulClosingAt() { + return this.#handle[IDX_STATS_SESSION_GRACEFUL_CLOSING_AT]; + } + + /** @type {bigint} */ + get bytesReceived() { + return this.#handle[IDX_STATS_SESSION_BYTES_RECEIVED]; + } + + /** @type {bigint} */ + get bytesSent() { + return this.#handle[IDX_STATS_SESSION_BYTES_SENT]; + } + + /** @type {bigint} */ + get bidiInStreamCount() { + return this.#handle[IDX_STATS_SESSION_BIDI_IN_STREAM_COUNT]; + } + + /** @type {bigint} */ + get bidiOutStreamCount() { + return this.#handle[IDX_STATS_SESSION_BIDI_OUT_STREAM_COUNT]; + } + + /** @type {bigint} */ + get uniInStreamCount() { + return this.#handle[IDX_STATS_SESSION_UNI_IN_STREAM_COUNT]; + } + + /** @type {bigint} */ + get uniOutStreamCount() { + return this.#handle[IDX_STATS_SESSION_UNI_OUT_STREAM_COUNT]; + } + + /** @type {bigint} */ + get lossRetransmitCount() { + return this.#handle[IDX_STATS_SESSION_LOSS_RETRANSMIT_COUNT]; + } + + /** @type {bigint} */ + get maxBytesInFlights() { + return this.#handle[IDX_STATS_SESSION_MAX_BYTES_IN_FLIGHT]; + } + + /** @type {bigint} */ + get bytesInFlight() { + return this.#handle[IDX_STATS_SESSION_BYTES_IN_FLIGHT]; + } + + /** @type {bigint} */ + get blockCount() { + return this.#handle[IDX_STATS_SESSION_BLOCK_COUNT]; + } + + /** @type {bigint} */ + get cwnd() { + return this.#handle[IDX_STATS_SESSION_CWND]; + } + + /** @type {bigint} */ + get latestRtt() { + return this.#handle[IDX_STATS_SESSION_LATEST_RTT]; + } + + /** @type {bigint} */ + get minRtt() { + return this.#handle[IDX_STATS_SESSION_MIN_RTT]; + } + + /** @type {bigint} */ + get rttVar() { + return this.#handle[IDX_STATS_SESSION_RTTVAR]; + } + + /** @type {bigint} */ + get smoothedRtt() { + return this.#handle[IDX_STATS_SESSION_SMOOTHED_RTT]; + } + + /** @type {bigint} */ + get ssthresh() { + return this.#handle[IDX_STATS_SESSION_SSTHRESH]; + } + + /** @type {bigint} */ + get datagramsReceived() { + return this.#handle[IDX_STATS_SESSION_DATAGRAMS_RECEIVED]; + } + + /** @type {bigint} */ + get datagramsSent() { + return this.#handle[IDX_STATS_SESSION_DATAGRAMS_SENT]; + } + + /** @type {bigint} */ + get datagramsAcknowledged() { + return this.#handle[IDX_STATS_SESSION_DATAGRAMS_ACKNOWLEDGED]; + } + + /** @type {bigint} */ + get datagramsLost() { + return this.#handle[IDX_STATS_SESSION_DATAGRAMS_LOST]; + } + + toString() { + return JSONStringify(this.toJSON()); + } + + toJSON() { + return { + __proto__: null, + connected: this.isConnected, + // We need to convert the values to strings because JSON does not + // support BigInts. + createdAt: `${this.createdAt}`, + closingAt: `${this.closingAt}`, + destroyedAt: `${this.destroyedAt}`, + handshakeCompletedAt: `${this.handshakeCompletedAt}`, + handshakeConfirmedAt: `${this.handshakeConfirmedAt}`, + gracefulClosingAt: `${this.gracefulClosingAt}`, + bytesReceived: `${this.bytesReceived}`, + bytesSent: `${this.bytesSent}`, + bidiInStreamCount: `${this.bidiInStreamCount}`, + bidiOutStreamCount: `${this.bidiOutStreamCount}`, + uniInStreamCount: `${this.uniInStreamCount}`, + uniOutStreamCount: `${this.uniOutStreamCount}`, + lossRetransmitCount: `${this.lossRetransmitCount}`, + maxBytesInFlights: `${this.maxBytesInFlights}`, + bytesInFlight: `${this.bytesInFlight}`, + blockCount: `${this.blockCount}`, + cwnd: `${this.cwnd}`, + latestRtt: `${this.latestRtt}`, + minRtt: `${this.minRtt}`, + rttVar: `${this.rttVar}`, + smoothedRtt: `${this.smoothedRtt}`, + ssthresh: `${this.ssthresh}`, + datagramsReceived: `${this.datagramsReceived}`, + datagramsSent: `${this.datagramsSent}`, + datagramsAcknowledged: `${this.datagramsAcknowledged}`, + datagramsLost: `${this.datagramsLost}`, + }; + } + + [kInspect](depth, options) { + if (depth < 0) + return this; + + const opts = { + ...options, + depth: options.depth == null ? null : options.depth - 1, + }; + + return `QuicSessionStats ${inspect({ + connected: this.isConnected, + createdAt: this.createdAt, + closingAt: this.closingAt, + destroyedAt: this.destroyedAt, + handshakeCompletedAt: this.handshakeCompletedAt, + handshakeConfirmedAt: this.handshakeConfirmedAt, + gracefulClosingAt: this.gracefulClosingAt, + bytesReceived: this.bytesReceived, + bytesSent: this.bytesSent, + bidiInStreamCount: this.bidiInStreamCount, + bidiOutStreamCount: this.bidiOutStreamCount, + uniInStreamCount: this.uniInStreamCount, + uniOutStreamCount: this.uniOutStreamCount, + lossRetransmitCount: this.lossRetransmitCount, + maxBytesInFlights: this.maxBytesInFlights, + bytesInFlight: this.bytesInFlight, + blockCount: this.blockCount, + cwnd: this.cwnd, + latestRtt: this.latestRtt, + minRtt: this.minRtt, + rttVar: this.rttVar, + smoothedRtt: this.smoothedRtt, + ssthresh: this.ssthresh, + datagramsReceived: this.datagramsReceived, + datagramsSent: this.datagramsSent, + datagramsAcknowledged: this.datagramsAcknowledged, + datagramsLost: this.datagramsLost, + }, opts)}`; + } + + /** + * True if this QuicSessionStats object is still connected to the underlying + * Session stats source. If this returns false, then the stats object is + * no longer being updated and should be considered stale. + * @returns {boolean} + */ + get isConnected() { + return !this.#disconnected; + } + + [kFinishClose]() { + // Snapshot the stats into a new BigUint64Array since the underlying + // buffer will be destroyed. + this.#handle = new BigUint64Array(this.#handle); + this.#disconnected = true; + } +} + +class QuicStreamStats { + /** @type {BigUint64Array} */ + #handle; + /** type {boolean} */ + #disconnected = false; + + /** + * @param {symbol} privateSymbol + * @param {ArrayBuffer} buffer + */ + constructor(privateSymbol, buffer) { + // We use the kPrivateConstructor symbol to restrict the ability to + // create new instances of QuicStreamStats to internal code. + if (privateSymbol !== kPrivateConstructor) { + throw new ERR_ILLEGAL_CONSTRUCTOR(); + } + if (!isArrayBuffer(buffer)) { + throw new ERR_INVALID_ARG_TYPE('buffer', ['ArrayBuffer'], buffer); + } + this.#handle = new BigUint64Array(buffer); + } + + /** @type {bigint} */ + get createdAt() { + return this.#handle[IDX_STATS_STREAM_CREATED_AT]; + } + + /** @type {bigint} */ + get receivedAt() { + return this.#handle[IDX_STATS_STREAM_RECEIVED_AT]; + } + + /** @type {bigint} */ + get ackedAt() { + return this.#handle[IDX_STATS_STREAM_ACKED_AT]; + } + + /** @type {bigint} */ + get closingAt() { + return this.#handle[IDX_STATS_STREAM_CLOSING_AT]; + } + + /** @type {bigint} */ + get destroyedAt() { + return this.#handle[IDX_STATS_STREAM_DESTROYED_AT]; + } + + /** @type {bigint} */ + get bytesReceived() { + return this.#handle[IDX_STATS_STREAM_BYTES_RECEIVED]; + } + + /** @type {bigint} */ + get bytesSent() { + return this.#handle[IDX_STATS_STREAM_BYTES_SENT]; + } + + /** @type {bigint} */ + get maxOffset() { + return this.#handle[IDX_STATS_STREAM_MAX_OFFSET]; + } + + /** @type {bigint} */ + get maxOffsetAcknowledged() { + return this.#handle[IDX_STATS_STREAM_MAX_OFFSET_ACK]; + } + + /** @type {bigint} */ + get maxOffsetReceived() { + return this.#handle[IDX_STATS_STREAM_MAX_OFFSET_RECV]; + } + + /** @type {bigint} */ + get finalSize() { + return this.#handle[IDX_STATS_STREAM_FINAL_SIZE]; + } + + toString() { + return JSONStringify(this.toJSON()); + } + + toJSON() { + return { + __proto__: null, + connected: this.isConnected, + // We need to convert the values to strings because JSON does not + // support BigInts. + createdAt: `${this.createdAt}`, + receivedAt: `${this.receivedAt}`, + ackedAt: `${this.ackedAt}`, + closingAt: `${this.closingAt}`, + destroyedAt: `${this.destroyedAt}`, + bytesReceived: `${this.bytesReceived}`, + bytesSent: `${this.bytesSent}`, + maxOffset: `${this.maxOffset}`, + maxOffsetAcknowledged: `${this.maxOffsetAcknowledged}`, + maxOffsetReceived: `${this.maxOffsetReceived}`, + finalSize: `${this.finalSize}`, + }; + } + + [kInspect](depth, options) { + if (depth < 0) + return this; + + const opts = { + ...options, + depth: options.depth == null ? null : options.depth - 1, + }; + + return `StreamStats ${inspect({ + connected: this.isConnected, + createdAt: this.createdAt, + receivedAt: this.receivedAt, + ackedAt: this.ackedAt, + closingAt: this.closingAt, + destroyedAt: this.destroyedAt, + bytesReceived: this.bytesReceived, + bytesSent: this.bytesSent, + maxOffset: this.maxOffset, + maxOffsetAcknowledged: this.maxOffsetAcknowledged, + maxOffsetReceived: this.maxOffsetReceived, + finalSize: this.finalSize, + }, opts)}`; + } + + /** + * True if this QuicStreamStats object is still connected to the underlying + * Stream stats source. If this returns false, then the stats object is + * no longer being updated and should be considered stale. + * @returns {boolean} + */ + get isConnected() { + return !this.#disconnected; + } + + [kFinishClose]() { + // Snapshot the stats into a new BigUint64Array since the underlying + // buffer will be destroyed. + this.#handle = new BigUint64Array(this.#handle); + this.#disconnected = true; + } +} + +module.exports = { + QuicEndpointStats, + QuicSessionStats, + QuicStreamStats, +}; diff --git a/lib/internal/quic/symbols.js b/lib/internal/quic/symbols.js new file mode 100644 index 00000000000000..c436b5c4b787ff --- /dev/null +++ b/lib/internal/quic/symbols.js @@ -0,0 +1,60 @@ +'use strict'; + +const { + Symbol, +} = primordials; + +const { + customInspectSymbol: kInspect, +} = require('internal/util'); + +const { + kHandle: kKeyObjectHandle, + kKeyObject: kKeyObjectInner, +} = require('internal/crypto/util'); + +// Symbols used to hide various private properties and methods from the +// public API. + +const kBlocked = Symbol('kBlocked'); +const kDatagram = Symbol('kDatagram'); +const kDatagramStatus = Symbol('kDatagramStatus'); +const kError = Symbol('kError'); +const kFinishClose = Symbol('kFinishClose'); +const kHandshake = Symbol('kHandshake'); +const kHeaders = Symbol('kHeaders'); +const kOwner = Symbol('kOwner'); +const kRemoveSession = Symbol('kRemoveSession'); +const kNewSession = Symbol('kNewSession'); +const kRemoveStream = Symbol('kRemoveStream'); +const kNewStream = Symbol('kNewStream'); +const kPathValidation = Symbol('kPathValidation'); +const kReset = Symbol('kReset'); +const kSessionTicket = Symbol('kSessionTicket'); +const kTrailers = Symbol('kTrailers'); +const kVersionNegotiation = Symbol('kVersionNegotiation'); +const kPrivateConstructor = Symbol('kPrivateConstructor'); + +module.exports = { + kBlocked, + kDatagram, + kDatagramStatus, + kError, + kFinishClose, + kHandshake, + kHeaders, + kOwner, + kRemoveSession, + kNewSession, + kRemoveStream, + kNewStream, + kPathValidation, + kReset, + kSessionTicket, + kTrailers, + kVersionNegotiation, + kInspect, + kKeyObjectHandle, + kKeyObjectInner, + kPrivateConstructor, +}; diff --git a/lib/internal/socketaddress.js b/lib/internal/socketaddress.js index e432c8a7d7593a..7fbe63980a0226 100644 --- a/lib/internal/socketaddress.js +++ b/lib/internal/socketaddress.js @@ -37,6 +37,8 @@ const { kDeserialize, } = require('internal/worker/js_transferable'); +const { URL } = require('internal/url'); + const kHandle = Symbol('kHandle'); const kDetail = Symbol('kDetail'); @@ -74,7 +76,7 @@ class SocketAddress { validatePort(port, 'options.port'); validateUint32(flowlabel, 'options.flowlabel', false); - this[kHandle] = new _SocketAddress(address, port, type, flowlabel); + this[kHandle] = new _SocketAddress(address, port | 0, type, flowlabel | 0); this[kDetail] = this[kHandle].detail({ address: undefined, port: undefined, @@ -138,6 +140,36 @@ class SocketAddress { flowlabel: this.flowlabel, }; } + + /** + * Parse an "${ip}:${port}" formatted string into a SocketAddress. + * Returns undefined if the input cannot be successfully parsed. + * @param {string} input + * @returns {SocketAddress|undefined} + */ + static parse(input) { + validateString(input, 'input'); + // While URL.parse is not expected to throw, there are several + // other pieces here that do... the destucturing, the SocketAddress + // constructor, etc. So we wrap this in a try/catch to be safe. + try { + const { + hostname: address, + port, + } = URL.parse(`http://${input}`); + if (address.startsWith('[') && address.endsWith(']')) { + return new SocketAddress({ + address: address.slice(1, -1), + port: port | 0, + family: 'ipv6', + }); + } + return new SocketAddress({ address, port: port | 0 }); + } catch { + // Ignore errors here. Return undefined if the input cannot + // be successfully parsed or is not a proper socket address. + } + } } class InternalSocketAddress { diff --git a/lib/internal/test_runner/runner.js b/lib/internal/test_runner/runner.js index 9fd10dbb5f4b1e..de30789ba4b79f 100644 --- a/lib/internal/test_runner/runner.js +++ b/lib/internal/test_runner/runner.js @@ -17,7 +17,6 @@ const { ArrayPrototypeSort, ObjectAssign, PromisePrototypeThen, - PromiseResolve, PromiseWithResolvers, SafeMap, SafePromiseAll, @@ -798,9 +797,17 @@ function run(options = kEmptyObject) { } } - const setupPromise = PromiseResolve(setup?.(root.reporter)); - PromisePrototypeThen(PromisePrototypeThen(PromisePrototypeThen(setupPromise, runFiles), postRun), teardown); + const runChain = async () => { + if (typeof setup === 'function') { + await setup(root.reporter); + } + + await runFiles(); + postRun?.(); + teardown?.(); + }; + runChain(); return root.reporter; } diff --git a/lib/internal/test_runner/test.js b/lib/internal/test_runner/test.js index 99b123c7e00d1d..1c0b1e6223b4bf 100644 --- a/lib/internal/test_runner/test.js +++ b/lib/internal/test_runner/test.js @@ -2,7 +2,6 @@ const { ArrayPrototypePush, ArrayPrototypePushApply, - ArrayPrototypeReduce, ArrayPrototypeShift, ArrayPrototypeSlice, ArrayPrototypeSome, @@ -24,7 +23,6 @@ const { SafeMap, SafePromiseAll, SafePromiseAllReturnVoid, - SafePromisePrototypeFinally, SafePromiseRace, SafeSet, StringPrototypeStartsWith, @@ -104,6 +102,7 @@ function lazyAssertObject(harness) { if (assertObj === undefined) { assertObj = new SafeMap(); const assert = require('assert'); + const { SnapshotManager } = require('internal/test_runner/snapshot'); const methodsToCopy = [ 'deepEqual', 'deepStrictEqual', @@ -118,6 +117,7 @@ function lazyAssertObject(harness) { 'notDeepStrictEqual', 'notEqual', 'notStrictEqual', + 'partialDeepStrictEqual', 'rejects', 'strictEqual', 'throws', @@ -126,12 +126,8 @@ function lazyAssertObject(harness) { assertObj.set(methodsToCopy[i], assert[methodsToCopy[i]]); } - const { getOptionValue } = require('internal/options'); - if (getOptionValue('--experimental-test-snapshots')) { - const { SnapshotManager } = require('internal/test_runner/snapshot'); - harness.snapshotManager = new SnapshotManager(harness.config.updateSnapshots); - assertObj.set('snapshot', harness.snapshotManager.createAssert()); - } + harness.snapshotManager = new SnapshotManager(harness.config.updateSnapshots); + assertObj.set('snapshot', harness.snapshotManager.createAssert()); } return assertObj; } @@ -848,13 +844,14 @@ class Test extends AsyncResource { async runHook(hook, args) { validateOneOf(hook, 'hook name', kHookNames); try { - await ArrayPrototypeReduce(this.hooks[hook], async (prev, hook) => { - await prev; + const hooks = this.hooks[hook]; + for (let i = 0; i < hooks.length; ++i) { + const hook = hooks[i]; await hook.run(args); if (hook.error) { throw hook.error; } - }, PromiseResolve()); + } } catch (err) { const error = new ERR_TEST_FAILURE(`failed running ${hook} hook`, kHookFailure); error.cause = isTestFailureError(err) ? err.cause : err; @@ -1261,27 +1258,20 @@ class Suite extends Test { this.skipped = false; } + this.buildSuite = this.createBuild(); + this.fn = noop; + } + + async createBuild() { try { const { ctx, args } = this.getRunArgs(); const runArgs = [this.fn, ctx]; ArrayPrototypePushApply(runArgs, args); - this.buildSuite = SafePromisePrototypeFinally( - PromisePrototypeThen( - PromiseResolve(ReflectApply(this.runInAsyncScope, this, runArgs)), - undefined, - (err) => { - this.fail(new ERR_TEST_FAILURE(err, kTestCodeFailure)); - }), - () => this.postBuild(), - ); + await ReflectApply(this.runInAsyncScope, this, runArgs); } catch (err) { this.fail(new ERR_TEST_FAILURE(err, kTestCodeFailure)); - this.postBuild(); } - this.fn = noop; - } - postBuild() { this.buildPhaseFinished = true; } diff --git a/lib/internal/util/debuglog.js b/lib/internal/util/debuglog.js index 271c9d1497d88f..96b10c4bbac767 100644 --- a/lib/internal/util/debuglog.js +++ b/lib/internal/util/debuglog.js @@ -173,7 +173,7 @@ function formatTime(ms) { } function safeTraceLabel(label) { - return label.replace(/\\/g, '\\\\').replaceAll('"', '\\"'); + return label.replaceAll('\\', '\\\\').replaceAll('"', '\\"'); } /** diff --git a/lib/internal/util/inspect.js b/lib/internal/util/inspect.js index a697459468d7b9..57586a888e191f 100644 --- a/lib/internal/util/inspect.js +++ b/lib/internal/util/inspect.js @@ -2,7 +2,10 @@ const { Array, + ArrayBuffer, + ArrayBufferPrototype, ArrayIsArray, + ArrayPrototype, ArrayPrototypeFilter, ArrayPrototypeForEach, ArrayPrototypeIncludes, @@ -22,10 +25,15 @@ const { DatePrototypeToISOString, DatePrototypeToString, ErrorPrototypeToString, + Function, + FunctionPrototype, FunctionPrototypeBind, FunctionPrototypeCall, + FunctionPrototypeSymbolHasInstance, FunctionPrototypeToString, JSONStringify, + Map, + MapPrototype, MapPrototypeEntries, MapPrototypeGetSize, MathFloor, @@ -50,6 +58,7 @@ const { ObjectGetPrototypeOf, ObjectIs, ObjectKeys, + ObjectPrototype, ObjectPrototypeHasOwnProperty, ObjectPrototypePropertyIsEnumerable, ObjectSeal, @@ -64,6 +73,8 @@ const { SafeMap, SafeSet, SafeStringIterator, + Set, + SetPrototype, SetPrototypeGetSize, SetPrototypeValues, String, @@ -89,6 +100,8 @@ const { SymbolPrototypeValueOf, SymbolToPrimitive, SymbolToStringTag, + TypedArray, + TypedArrayPrototype, TypedArrayPrototypeGetLength, TypedArrayPrototypeGetSymbolToStringTag, Uint8Array, @@ -593,10 +606,31 @@ function isInstanceof(object, proto) { } } +// Special-case for some builtin prototypes in case their `constructor` property has been tampered. +const wellKnownPrototypes = new SafeMap(); +wellKnownPrototypes.set(ArrayPrototype, { name: 'Array', constructor: Array }); +wellKnownPrototypes.set(ArrayBufferPrototype, { name: 'ArrayBuffer', constructor: ArrayBuffer }); +wellKnownPrototypes.set(FunctionPrototype, { name: 'Function', constructor: Function }); +wellKnownPrototypes.set(MapPrototype, { name: 'Map', constructor: Map }); +wellKnownPrototypes.set(ObjectPrototype, { name: 'Object', constructor: Object }); +wellKnownPrototypes.set(SetPrototype, { name: 'Set', constructor: Set }); +wellKnownPrototypes.set(TypedArrayPrototype, { name: 'TypedArray', constructor: TypedArray }); + function getConstructorName(obj, ctx, recurseTimes, protoProps) { let firstProto; const tmp = obj; while (obj || isUndetectableObject(obj)) { + const wellKnownPrototypeNameAndConstructor = wellKnownPrototypes.get(obj); + if (wellKnownPrototypeNameAndConstructor != null) { + const { name, constructor } = wellKnownPrototypeNameAndConstructor; + if (FunctionPrototypeSymbolHasInstance(constructor, tmp)) { + if (protoProps !== undefined && firstProto !== obj) { + addPrototypeProperties( + ctx, tmp, firstProto || tmp, recurseTimes, protoProps); + } + return name; + } + } const descriptor = ObjectGetOwnPropertyDescriptor(obj, 'constructor'); if (descriptor !== undefined && typeof descriptor.value === 'function' && @@ -805,12 +839,12 @@ function formatValue(ctx, value, recurseTimes, typedArray) { // Filter out the util module, its inspect function is special. maybeCustom !== inspect && // Also filter out any prototype objects using the circular check. - !(value.constructor && value.constructor.prototype === value)) { + ObjectGetOwnPropertyDescriptor(value, 'constructor')?.value?.prototype !== value) { // This makes sure the recurseTimes are reported as before while using // a counter internally. const depth = ctx.depth === null ? null : ctx.depth - recurseTimes; const isCrossContext = - proxy !== undefined || !(context instanceof Object); + proxy !== undefined || !FunctionPrototypeSymbolHasInstance(Object, context); const ret = FunctionPrototypeCall( maybeCustom, context, @@ -954,7 +988,11 @@ function formatRaw(ctx, value, recurseTimes, typedArray) { if (noIterator) { keys = getKeys(value, ctx.showHidden); braces = ['{', '}']; - if (constructor === 'Object') { + if (typeof value === 'function') { + base = getFunctionBase(value, constructor, tag); + if (keys.length === 0 && protoProps === undefined) + return ctx.stylize(base, 'special'); + } else if (constructor === 'Object') { if (isArgumentsObject(value)) { braces[0] = '[Arguments] {'; } else if (tag !== '') { @@ -963,10 +1001,6 @@ function formatRaw(ctx, value, recurseTimes, typedArray) { if (keys.length === 0 && protoProps === undefined) { return `${braces[0]}}`; } - } else if (typeof value === 'function') { - base = getFunctionBase(value, constructor, tag); - if (keys.length === 0 && protoProps === undefined) - return ctx.stylize(base, 'special'); } else if (isRegExp(value)) { // Make RegExps say that they are RegExps base = RegExpPrototypeToString( diff --git a/lib/internal/webstreams/readablestream.js b/lib/internal/webstreams/readablestream.js index 3805526e3fd169..885932894dafcc 100644 --- a/lib/internal/webstreams/readablestream.js +++ b/lib/internal/webstreams/readablestream.js @@ -94,6 +94,7 @@ const { ArrayBufferViewGetByteLength, ArrayBufferViewGetByteOffset, AsyncIterator, + canCopyArrayBuffer, cloneAsUint8Array, copyArrayBuffer, createPromiseCallback, @@ -2552,6 +2553,15 @@ function readableByteStreamControllerCommitPullIntoDescriptor(stream, desc) { } } +function readableByteStreamControllerCommitPullIntoDescriptors(stream, descriptors) { + for (let i = 0; i < descriptors.length; ++i) { + readableByteStreamControllerCommitPullIntoDescriptor( + stream, + descriptors[i], + ); + } +} + function readableByteStreamControllerInvalidateBYOBRequest(controller) { if (controller[kState].byobRequest === null) return; @@ -2758,11 +2768,11 @@ function readableByteStreamControllerRespondInClosedState(controller, desc) { stream, } = controller[kState]; if (readableStreamHasBYOBReader(stream)) { - while (readableStreamGetNumReadIntoRequests(stream) > 0) { - readableByteStreamControllerCommitPullIntoDescriptor( - stream, - readableByteStreamControllerShiftPendingPullInto(controller)); + const filledPullIntos = []; + for (let i = 0; i < readableStreamGetNumReadIntoRequests(stream); ++i) { + ArrayPrototypePush(filledPullIntos, readableByteStreamControllerShiftPendingPullInto(controller)); } + readableByteStreamControllerCommitPullIntoDescriptors(stream, filledPullIntos); } } @@ -2843,8 +2853,9 @@ function readableByteStreamControllerEnqueue(controller, chunk) { transferredBuffer, byteOffset, byteLength); - readableByteStreamControllerProcessPullIntoDescriptorsUsingQueue( + const filledPullIntos = readableByteStreamControllerProcessPullIntoDescriptorsUsingQueue( controller); + readableByteStreamControllerCommitPullIntoDescriptors(stream, filledPullIntos); } else { assert(!isReadableStreamLocked(stream)); readableByteStreamControllerEnqueueChunkToQueue( @@ -2937,6 +2948,7 @@ function readableByteStreamControllerFillPullIntoDescriptorFromQueue( const maxAlignedBytes = maxBytesFilled - (maxBytesFilled % elementSize); let totalBytesToCopyRemaining = maxBytesToCopy; let ready = false; + assert(!ArrayBufferPrototypeGetDetached(buffer)); assert(bytesFilled < minimumFill); if (maxAlignedBytes >= minimumFill) { totalBytesToCopyRemaining = maxAlignedBytes - bytesFilled; @@ -2952,12 +2964,12 @@ function readableByteStreamControllerFillPullIntoDescriptorFromQueue( totalBytesToCopyRemaining, headOfQueue.byteLength); const destStart = byteOffset + desc.bytesFilled; - const arrayBufferByteLength = ArrayBufferPrototypeGetByteLength(buffer); - if (arrayBufferByteLength - destStart < bytesToCopy) { - throw new ERR_INVALID_STATE.RangeError( - 'view ArrayBuffer size is invalid'); - } - assert(arrayBufferByteLength - destStart >= bytesToCopy); + assert(canCopyArrayBuffer( + buffer, + destStart, + headOfQueue.buffer, + headOfQueue.byteOffset, + bytesToCopy)); copyArrayBuffer( buffer, destStart, @@ -2991,26 +3003,30 @@ function readableByteStreamControllerProcessPullIntoDescriptorsUsingQueue( const { closeRequested, pendingPullIntos, - stream, } = controller[kState]; assert(!closeRequested); + const filledPullIntos = []; while (pendingPullIntos.length) { if (!controller[kState].queueTotalSize) - return; + break; const desc = pendingPullIntos[0]; if (readableByteStreamControllerFillPullIntoDescriptorFromQueue( controller, desc)) { readableByteStreamControllerShiftPendingPullInto(controller); - readableByteStreamControllerCommitPullIntoDescriptor(stream, desc); + ArrayPrototypePush(filledPullIntos, desc); } } + return filledPullIntos; } function readableByteStreamControllerRespondInReadableState( controller, bytesWritten, desc) { + const { + stream, + } = controller[kState]; const { buffer, bytesFilled, @@ -3031,9 +3047,10 @@ function readableByteStreamControllerRespondInReadableState( controller, desc, ); - readableByteStreamControllerProcessPullIntoDescriptorsUsingQueue( + const filledPullIntos = readableByteStreamControllerProcessPullIntoDescriptorsUsingQueue( controller, ); + readableByteStreamControllerCommitPullIntoDescriptors(stream, filledPullIntos); return; } @@ -3059,10 +3076,10 @@ function readableByteStreamControllerRespondInReadableState( ArrayBufferPrototypeGetByteLength(remainder)); } desc.bytesFilled -= remainderSize; - readableByteStreamControllerCommitPullIntoDescriptor( - controller[kState].stream, - desc); - readableByteStreamControllerProcessPullIntoDescriptorsUsingQueue(controller); + const filledPullIntos = readableByteStreamControllerProcessPullIntoDescriptorsUsingQueue(controller); + + readableByteStreamControllerCommitPullIntoDescriptor(stream, desc); + readableByteStreamControllerCommitPullIntoDescriptors(stream, filledPullIntos); } function readableByteStreamControllerRespondWithNewView(controller, view) { diff --git a/lib/internal/webstreams/util.js b/lib/internal/webstreams/util.js index 2c70ef7acdfe66..5bf016f73b7af5 100644 --- a/lib/internal/webstreams/util.js +++ b/lib/internal/webstreams/util.js @@ -1,6 +1,8 @@ 'use strict'; const { + ArrayBufferPrototypeGetByteLength, + ArrayBufferPrototypeGetDetached, ArrayBufferPrototypeSlice, ArrayPrototypePush, ArrayPrototypeShift, @@ -107,6 +109,14 @@ function cloneAsUint8Array(view) { ); } +function canCopyArrayBuffer(toBuffer, toIndex, fromBuffer, fromIndex, count) { + return toBuffer !== fromBuffer && + !ArrayBufferPrototypeGetDetached(toBuffer) && + !ArrayBufferPrototypeGetDetached(fromBuffer) && + toIndex + count <= ArrayBufferPrototypeGetByteLength(toBuffer) && + fromIndex + count <= ArrayBufferPrototypeGetByteLength(fromBuffer); +} + function isBrandCheck(brand) { return (value) => { return value != null && @@ -261,6 +271,7 @@ module.exports = { ArrayBufferViewGetByteLength, ArrayBufferViewGetByteOffset, AsyncIterator, + canCopyArrayBuffer, createPromiseCallback, cloneAsUint8Array, copyArrayBuffer, diff --git a/lib/module.js b/lib/module.js index 48ba9215b081eb..92c26fb548c02f 100644 --- a/lib/module.js +++ b/lib/module.js @@ -10,13 +10,18 @@ const { flushCompileCache, getCompileCacheDir, } = require('internal/modules/helpers'); +const { + findPackageJSON, +} = require('internal/modules/package_json_reader'); +const { stripTypeScriptTypes } = require('internal/modules/typescript'); Module.findSourceMap = findSourceMap; Module.register = register; Module.SourceMap = SourceMap; Module.constants = constants; Module.enableCompileCache = enableCompileCache; +Module.findPackageJSON = findPackageJSON; Module.flushCompileCache = flushCompileCache; - +Module.stripTypeScriptTypes = stripTypeScriptTypes; Module.getCompileCacheDir = getCompileCacheDir; module.exports = Module; diff --git a/lib/net.js b/lib/net.js index e438c1c8e781b3..bbf24145ad8c12 100644 --- a/lib/net.js +++ b/lib/net.js @@ -103,6 +103,7 @@ const { ERR_INVALID_FD_TYPE, ERR_INVALID_HANDLE_TYPE, ERR_INVALID_IP_ADDRESS, + ERR_IP_BLOCKED, ERR_MISSING_ARGS, ERR_SERVER_ALREADY_LISTEN, ERR_SERVER_NOT_RUNNING, @@ -510,6 +511,12 @@ function Socket(options) { // Used after `.destroy()` this[kBytesRead] = 0; this[kBytesWritten] = 0; + if (options.blockList) { + if (!module.exports.BlockList.isBlockList(options.blockList)) { + throw new ERR_INVALID_ARG_TYPE('options.blockList', 'net.BlockList', options.blockList); + } + this.blockList = options.blockList; + } } ObjectSetPrototypeOf(Socket.prototype, stream.Duplex.prototype); ObjectSetPrototypeOf(Socket, stream.Duplex); @@ -1073,6 +1080,10 @@ function internalConnect( self.emit('connectionAttempt', address, port, addressType); if (addressType === 6 || addressType === 4) { + if (self.blockList?.check(address, `ipv${addressType}`)) { + self.destroy(new ERR_IP_BLOCKED(address)); + return; + } const req = new TCPConnectWrap(); req.oncomplete = afterConnect; req.address = address; @@ -1162,6 +1173,14 @@ function internalConnectMultiple(context, canceled) { } } + if (self.blockList?.check(address, `ipv${addressType}`)) { + const ex = new ERR_IP_BLOCKED(address); + ArrayPrototypePush(context.errors, ex); + self.emit('connectionAttemptFailed', address, port, addressType, ex); + internalConnectMultiple(context); + return; + } + debug('connect/multiple: attempting to connect to %s:%d (addressType: %d)', address, port, addressType); self.emit('connectionAttempt', address, port, addressType); @@ -1791,6 +1810,12 @@ function Server(options, connectionListener) { this.keepAlive = Boolean(options.keepAlive); this.keepAliveInitialDelay = ~~(options.keepAliveInitialDelay / 1000); this.highWaterMark = options.highWaterMark ?? getDefaultHighWaterMark(); + if (options.blockList) { + if (!module.exports.BlockList.isBlockList(options.blockList)) { + throw new ERR_INVALID_ARG_TYPE('options.blockList', 'net.BlockList', options.blockList); + } + this.blockList = options.blockList; + } } ObjectSetPrototypeOf(Server.prototype, EventEmitter.prototype); ObjectSetPrototypeOf(Server, EventEmitter); @@ -2239,7 +2264,15 @@ function onconnection(err, clientHandle) { clientHandle.close(); return; } - + if (self.blockList && typeof clientHandle.getpeername === 'function') { + const remoteInfo = { __proto__: null }; + clientHandle.getpeername(remoteInfo); + const addressType = isIP(remoteInfo.address); + if (addressType && self.blockList.check(remoteInfo.address, `ipv${addressType}`)) { + clientHandle.close(); + return; + } + } const socket = new Socket({ handle: clientHandle, allowHalfOpen: self.allowHalfOpen, diff --git a/lib/test.js b/lib/test.js index ba72d6e4ecbdbf..8e7c6295a37225 100644 --- a/lib/test.js +++ b/lib/test.js @@ -7,7 +7,6 @@ const { const { test, suite, before, after, beforeEach, afterEach } = require('internal/test_runner/harness'); const { run } = require('internal/test_runner/runner'); -const { getOptionValue } = require('internal/options'); module.exports = test; ObjectAssign(module.exports, { @@ -39,28 +38,26 @@ ObjectDefineProperty(module.exports, 'mock', { }, }); -if (getOptionValue('--experimental-test-snapshots')) { - let lazySnapshot; +let lazySnapshot; - ObjectDefineProperty(module.exports, 'snapshot', { - __proto__: null, - configurable: true, - enumerable: true, - get() { - if (lazySnapshot === undefined) { - const { - setDefaultSnapshotSerializers, - setResolveSnapshotPath, - } = require('internal/test_runner/snapshot'); - - lazySnapshot = { - __proto__: null, - setDefaultSnapshotSerializers, - setResolveSnapshotPath, - }; - } +ObjectDefineProperty(module.exports, 'snapshot', { + __proto__: null, + configurable: true, + enumerable: true, + get() { + if (lazySnapshot === undefined) { + const { + setDefaultSnapshotSerializers, + setResolveSnapshotPath, + } = require('internal/test_runner/snapshot'); + + lazySnapshot = { + __proto__: null, + setDefaultSnapshotSerializers, + setResolveSnapshotPath, + }; + } - return lazySnapshot; - }, - }); -} + return lazySnapshot; + }, +}); diff --git a/lib/url.js b/lib/url.js index ef1b1a23d9a5c8..8acec11816f88e 100644 --- a/lib/url.js +++ b/lib/url.js @@ -705,7 +705,7 @@ Url.prototype.format = function format() { } } - search = search.replace(/#/g, '%23'); + search = search.replaceAll('#', '%23'); if (hash && hash.charCodeAt(0) !== CHAR_HASH) hash = '#' + hash; diff --git a/lib/util.js b/lib/util.js index e4a65acaa70515..78e5c1555449f8 100644 --- a/lib/util.js +++ b/lib/util.js @@ -25,6 +25,7 @@ const { ArrayIsArray, ArrayPrototypeJoin, ArrayPrototypePop, + ArrayPrototypePush, Date, DatePrototypeGetDate, DatePrototypeGetHours, @@ -70,6 +71,7 @@ const { validateNumber, validateString, validateOneOf, + validateObject, } = require('internal/validators'); const { isBuffer } = require('buffer').Buffer; const { @@ -84,11 +86,13 @@ function lazyUtilColors() { utilColors ??= require('internal/util/colors'); return utilColors; } +const { getOptionValue } = require('internal/options'); const binding = internalBinding('util'); const { deprecate, + getLazy, getSystemErrorMap, getSystemErrorName: internalErrorName, getSystemErrorMessage: internalErrorMessage, @@ -472,14 +476,90 @@ function parseEnv(content) { return binding.parseEnv(content); } +const lazySourceMap = getLazy(() => require('internal/source_map/source_map_cache')); + +/** + * @typedef {object} CallSite // The call site + * @property {string} scriptName // The name of the resource that contains the + * script for the function for this StackFrame + * @property {string} functionName // The name of the function associated with this stack frame + * @property {number} lineNumber // The number, 1-based, of the line for the associate function call + * @property {number} columnNumber // The 1-based column offset on the line for the associated function call + */ + +/** + * @param {CallSite} callSite // The call site object to reconstruct from source map + * @returns {CallSite | undefined} // The reconstructed call site object + */ +function reconstructCallSite(callSite) { + const { scriptName, lineNumber, column } = callSite; + const sourceMap = lazySourceMap().findSourceMap(scriptName); + if (!sourceMap) return; + const entry = sourceMap.findEntry(lineNumber - 1, column - 1); + if (!entry?.originalSource) return; + return { + __proto__: null, + // If the name is not found, it is an empty string to match the behavior of `util.getCallSite()` + functionName: entry.name ?? '', + scriptName: entry.originalSource, + lineNumber: entry.originalLine + 1, + column: entry.originalColumn + 1, + }; +} + +/** + * + * The call site array to map + * @param {CallSite[]} callSites + * Array of objects with the reconstructed call site + * @returns {CallSite[]} + */ +function mapCallSite(callSites) { + const result = []; + for (let i = 0; i < callSites.length; ++i) { + const callSite = callSites[i]; + const found = reconstructCallSite(callSite); + ArrayPrototypePush(result, found ?? callSite); + } + return result; +} + +/** + * @typedef {object} CallSiteOptions // The call site options + * @property {boolean} sourceMap // Enable source map support + */ + /** * Returns the callSite * @param {number} frameCount - * @returns {object} + * @param {CallSiteOptions} options + * @returns {CallSite[]} */ -function getCallSites(frameCount = 10) { +function getCallSites(frameCount = 10, options) { + // If options is not provided check if frameCount is an object + if (options === undefined) { + if (typeof frameCount === 'object') { + // If frameCount is an object, it is the options object + options = frameCount; + validateObject(options, 'options'); + validateBoolean(options.sourceMap, 'options.sourceMap'); + frameCount = 10; + } else { + // If options is not provided, set it to an empty object + options = {}; + }; + } else { + // If options is provided, validate it + validateObject(options, 'options'); + validateBoolean(options.sourceMap, 'options.sourceMap'); + } + // Using kDefaultMaxCallStackSizeToCapture as reference validateNumber(frameCount, 'frameCount', 1, 200); + // If options.sourceMaps is true or if sourceMaps are enabled but the option.sourceMaps is not set explictly to false + if (options.sourceMap === true || (getOptionValue('--enable-source-maps') && options.sourceMap !== false)) { + return mapCallSite(binding.getCallSites(frameCount)); + } return binding.getCallSites(frameCount); }; diff --git a/lib/v8.js b/lib/v8.js index 7a8979887bab49..e379233ea49e0d 100644 --- a/lib/v8.js +++ b/lib/v8.js @@ -31,6 +31,9 @@ const { Uint32Array, Uint8Array, Uint8ClampedArray, + globalThis: { + Float16Array, + }, } = primordials; const { Buffer } = require('buffer'); @@ -63,6 +66,7 @@ const { } = require('internal/heap_utils'); const promiseHooks = require('internal/promise_hooks'); const { getOptionValue } = require('internal/options'); + /** * Generates a snapshot of the current V8 heap * and writes it to a JSON file. @@ -289,6 +293,7 @@ function arrayBufferViewTypeToIndex(abView) { // Index 10 is FastBuffer. if (type === '[object BigInt64Array]') return 11; if (type === '[object BigUint64Array]') return 12; + if (type === '[object Float16Array]') return 13; return -1; } @@ -306,6 +311,7 @@ function arrayBufferViewIndexToType(index) { if (index === 10) return FastBuffer; if (index === 11) return BigInt64Array; if (index === 12) return BigUint64Array; + if (index === 13) return Float16Array; return undefined; } diff --git a/lib/vm.js b/lib/vm.js index 3eea66b3f07437..ae710806201893 100644 --- a/lib/vm.js +++ b/lib/vm.js @@ -319,6 +319,7 @@ function runInThisContext(code, options) { function compileFunction(code, params, options = kEmptyObject) { validateString(code, 'code'); + validateObject(options, 'options'); if (params !== undefined) { validateStringArray(params, 'params'); } diff --git a/node.gni b/node.gni index 057686f0046e5e..a2123cc6c6d21c 100644 --- a/node.gni +++ b/node.gni @@ -7,9 +7,15 @@ declare_args() { # The location of Node.js in source code tree. node_path = "//node" - # The location of V8, use the one from node's deps by default. + # The location of V8 - use the one from node's deps by default. node_v8_path = "$node_path/deps/v8" + # The location of OpenSSL - use the one from node's deps by default. + node_openssl_path = "$node_path/deps/openssl" + + # The location of simdutf - use the one from node's deps by default. + node_simdutf_path = "$node_path/deps/simdutf" + # The NODE_MODULE_VERSION defined in node_version.h. node_module_version = exec_script("$node_path/tools/getmoduleversion.py", [], "value") diff --git a/node.gyp b/node.gyp index 195fe541bcb9f4..b21cfbf2fad024 100644 --- a/node.gyp +++ b/node.gyp @@ -481,6 +481,7 @@ '-Wno-unused-parameter', '-Werror=undefined-inline', '-Werror=extra-semi', + '-Werror=ctad-maybe-unsupported', ], }, @@ -857,9 +858,6 @@ 'dependencies': [ 'deps/googletest/googletest.gyp:gtest_prod', 'deps/histogram/histogram.gyp:histogram', - 'deps/simdjson/simdjson.gyp:simdjson', - 'deps/simdutf/simdutf.gyp:simdutf', - 'deps/ada/ada.gyp:ada', 'deps/nbytes/nbytes.gyp:nbytes', 'node_js2c#host', ], @@ -1135,7 +1133,6 @@ 'deps/googletest/googletest.gyp:gtest_prod', 'deps/histogram/histogram.gyp:histogram', 'deps/uvwasi/uvwasi.gyp:uvwasi', - 'deps/ada/ada.gyp:ada', 'deps/nbytes/nbytes.gyp:nbytes', ], 'includes': [ @@ -1182,9 +1179,6 @@ 'deps/googletest/googletest.gyp:gtest', 'deps/googletest/googletest.gyp:gtest_main', 'deps/histogram/histogram.gyp:histogram', - 'deps/simdjson/simdjson.gyp:simdjson', - 'deps/simdutf/simdutf.gyp:simdutf', - 'deps/ada/ada.gyp:ada', 'deps/nbytes/nbytes.gyp:nbytes', ], @@ -1262,7 +1256,6 @@ 'dependencies': [ '<(node_lib_target_name)', 'deps/histogram/histogram.gyp:histogram', - 'deps/ada/ada.gyp:ada', 'deps/nbytes/nbytes.gyp:nbytes', ], @@ -1312,6 +1305,26 @@ ], }, # embedtest + { + 'target_name': 'sqlite_extension', + 'type': 'shared_library', + 'sources': [ + 'test/sqlite/extension.c' + ], + + 'include_dirs': [ + 'test/sqlite', + 'deps/sqlite', + ], + + 'cflags': [ + '-fPIC', + '-Wall', + '-Wextra', + '-O3', + ], + }, # sqlitetest + { 'target_name': 'overlapped-checker', 'type': 'executable', @@ -1337,9 +1350,6 @@ 'target_name': 'node_js2c', 'type': 'executable', 'toolsets': ['host'], - 'dependencies': [ - 'deps/simdutf/simdutf.gyp:simdutf#host', - ], 'include_dirs': [ 'tools', 'src', @@ -1351,6 +1361,9 @@ 'src/embedded_data.cc', ], 'conditions': [ + [ 'node_shared_simdutf=="false"', { + 'dependencies': [ 'deps/simdutf/simdutf.gyp:simdutf#host' ], + }], [ 'node_shared_libuv=="false"', { 'dependencies': [ 'deps/uv/uv.gyp:libuv#host' ], }], @@ -1376,10 +1389,7 @@ 'dependencies': [ '<(node_lib_target_name)', 'deps/histogram/histogram.gyp:histogram', - 'deps/ada/ada.gyp:ada', 'deps/nbytes/nbytes.gyp:nbytes', - 'deps/simdjson/simdjson.gyp:simdjson', - 'deps/simdutf/simdutf.gyp:simdutf', ], 'includes': [ diff --git a/node.gypi b/node.gypi index 9c989022a9ad36..7ae62bd7e7134a 100644 --- a/node.gypi +++ b/node.gypi @@ -27,7 +27,11 @@ 'conditions': [ [ 'clang==1', { - 'cflags': [ '-Werror=undefined-inline', '-Werror=extra-semi'] + 'cflags': [ + '-Werror=undefined-inline', + '-Werror=extra-semi', + '-Werror=ctad-maybe-unsupported', + ], }], [ '"<(_type)"=="executable"', { 'msvs_settings': { @@ -212,6 +216,18 @@ 'dependencies': [ 'deps/nghttp2/nghttp2.gyp:nghttp2' ], }], + [ 'node_shared_ada=="false"', { + 'dependencies': [ 'deps/ada/ada.gyp:ada' ], + }], + + [ 'node_shared_simdjson=="false"', { + 'dependencies': [ 'deps/simdjson/simdjson.gyp:simdjson' ], + }], + + [ 'node_shared_simdutf=="false"', { + 'dependencies': [ 'deps/simdutf/simdutf.gyp:simdutf' ], + }], + [ 'node_shared_brotli=="false"', { 'dependencies': [ 'deps/brotli/brotli.gyp:brotli' ], }], diff --git a/src/async_wrap.cc b/src/async_wrap.cc index 8868245403d658..30490b90e77b64 100644 --- a/src/async_wrap.cc +++ b/src/async_wrap.cc @@ -488,31 +488,15 @@ AsyncWrap::AsyncWrap(Environment* env, Local object, ProviderType provider, double execution_async_id) - : AsyncWrap(env, object, provider, execution_async_id, false) {} - -AsyncWrap::AsyncWrap(Environment* env, - Local object, - ProviderType provider, - double execution_async_id, - bool silent) : AsyncWrap(env, object) { CHECK_NE(provider, PROVIDER_NONE); provider_type_ = provider; // Use AsyncReset() call to execute the init() callbacks. - AsyncReset(object, execution_async_id, silent); + AsyncReset(object, execution_async_id); init_hook_ran_ = true; } -AsyncWrap::AsyncWrap(Environment* env, - Local object, - ProviderType provider, - double execution_async_id, - double trigger_async_id) - : AsyncWrap(env, object, provider, execution_async_id, true) { - trigger_async_id_ = trigger_async_id; -} - AsyncWrap::AsyncWrap(Environment* env, Local object) : BaseObject(env, object), context_frame_(env->isolate(), @@ -592,8 +576,7 @@ void AsyncWrap::EmitDestroy(Environment* env, double async_id) { // Generalized call for both the constructor and for handles that are pooled // and reused over their lifetime. This way a new uid can be assigned when // the resource is pulled out of the pool and put back into use. -void AsyncWrap::AsyncReset(Local resource, double execution_async_id, - bool silent) { +void AsyncWrap::AsyncReset(Local resource, double execution_async_id) { CHECK_NE(provider_type(), PROVIDER_NONE); if (async_id_ != kInvalidAsyncId) { @@ -642,8 +625,6 @@ void AsyncWrap::AsyncReset(Local resource, double execution_async_id, context_frame_.Reset(isolate, async_context_frame::current(isolate)); - if (silent) return; - EmitAsyncInit(env(), resource, env()->async_hooks()->provider_string(provider_type()), async_id_, trigger_async_id_); diff --git a/src/async_wrap.h b/src/async_wrap.h index d656f72fdabd62..5b33290b4bb2d0 100644 --- a/src/async_wrap.h +++ b/src/async_wrap.h @@ -190,8 +190,7 @@ class AsyncWrap : public BaseObject { inline v8::Local context_frame() const; void AsyncReset(v8::Local resource, - double execution_async_id = kInvalidAsyncId, - bool silent = false); + double execution_async_id = kInvalidAsyncId); // Only call these within a valid HandleScope. v8::MaybeLocal MakeCallback(const v8::Local cb, @@ -224,18 +223,6 @@ class AsyncWrap : public BaseObject { bool IsDoneInitializing() const override; private: - friend class PromiseWrap; - - AsyncWrap(Environment* env, - v8::Local promise, - ProviderType provider, - double execution_async_id, - bool silent); - AsyncWrap(Environment* env, - v8::Local promise, - ProviderType provider, - double execution_async_id, - double trigger_async_id); ProviderType provider_type_ = PROVIDER_NONE; bool init_hook_ran_ = false; // Because the values may be Reset(), cannot be made const. diff --git a/src/debug_utils.cc b/src/debug_utils.cc index 9d36082db672ce..c8b3b11ee34d7a 100644 --- a/src/debug_utils.cc +++ b/src/debug_utils.cc @@ -62,9 +62,9 @@ EnabledDebugList enabled_debug_list; using v8::Local; using v8::StackTrace; -void EnabledDebugList::Parse(std::shared_ptr env_vars) { +void EnabledDebugList::Parse(Environment* env) { std::string cats; - credentials::SafeGetenv("NODE_DEBUG_NATIVE", &cats, env_vars); + credentials::SafeGetenv("NODE_DEBUG_NATIVE", &cats, env); Parse(cats); } diff --git a/src/debug_utils.h b/src/debug_utils.h index 359b8d6b421020..d4391ac987ba5b 100644 --- a/src/debug_utils.h +++ b/src/debug_utils.h @@ -74,10 +74,10 @@ class NODE_EXTERN_PRIVATE EnabledDebugList { return enabled_[static_cast(category)]; } - // Uses NODE_DEBUG_NATIVE to initialize the categories. The env_vars variable + // Uses NODE_DEBUG_NATIVE to initialize the categories. env->env_vars() // is parsed if it is not a nullptr, otherwise the system environment // variables are parsed. - void Parse(std::shared_ptr env_vars); + void Parse(Environment* env); private: // Enable all categories matching cats. diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 97ddd59fb661c8..885a0d072312e9 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -1,6 +1,7 @@ #include "encoding_binding.h" #include "ada.h" #include "env-inl.h" +#include "node_buffer.h" #include "node_errors.h" #include "node_external_reference.h" #include "simdutf.h" @@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data, SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8); SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII); SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode); + SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1); } void BindingData::CreatePerContextProperties(Local target, @@ -243,6 +245,52 @@ void BindingData::RegisterTimerExternalReferences( registry->Register(DecodeUTF8); registry->Register(ToASCII); registry->Register(ToUnicode); + registry->Register(DecodeLatin1); +} + +void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + CHECK_GE(args.Length(), 1); + if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || + args[0]->IsArrayBufferView())) { + return node::THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"input\" argument must be an instance of ArrayBuffer, " + "SharedArrayBuffer, or ArrayBufferView."); + } + + bool ignore_bom = args[1]->IsTrue(); + bool has_fatal = args[2]->IsTrue(); + + ArrayBufferViewContents buffer(args[0]); + const uint8_t* data = buffer.data(); + size_t length = buffer.length(); + + if (ignore_bom && length > 0 && data[0] == 0xFF) { + data++; + length--; + } + + if (length == 0) { + return args.GetReturnValue().SetEmptyString(); + } + + std::string result(length * 2, '\0'); + + size_t written = simdutf::convert_latin1_to_utf8( + reinterpret_cast(data), length, result.data()); + + if (has_fatal && written == 0) { + return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( + env->isolate(), "The encoded data was not valid for encoding latin1"); + } + + Local output = + String::NewFromUtf8( + env->isolate(), result.c_str(), v8::NewStringType::kNormal, written) + .ToLocalChecked(); + args.GetReturnValue().Set(output); } } // namespace encoding_binding diff --git a/src/encoding_binding.h b/src/encoding_binding.h index 2690cb74f8a05b..97f55394d27641 100644 --- a/src/encoding_binding.h +++ b/src/encoding_binding.h @@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject { static void EncodeInto(const v8::FunctionCallbackInfo& args); static void EncodeUtf8String(const v8::FunctionCallbackInfo& args); static void DecodeUTF8(const v8::FunctionCallbackInfo& args); + static void DecodeLatin1(const v8::FunctionCallbackInfo& args); static void ToASCII(const v8::FunctionCallbackInfo& args); static void ToUnicode(const v8::FunctionCallbackInfo& args); diff --git a/src/env-inl.h b/src/env-inl.h index 3b041dd28ba32f..c460018af954e6 100644 --- a/src/env-inl.h +++ b/src/env-inl.h @@ -915,6 +915,10 @@ inline bool Environment::is_in_heapsnapshot_heap_limit_callback() const { return is_in_heapsnapshot_heap_limit_callback_; } +inline bool Environment::report_exclude_env() const { + return options_->report_exclude_env; +} + inline void Environment::AddHeapSnapshotNearHeapLimitCallback() { DCHECK(!heapsnapshot_near_heap_limit_callback_added_); heapsnapshot_near_heap_limit_callback_added_ = true; diff --git a/src/env.cc b/src/env.cc index 130524cad713e8..adb6cc1c2f1c01 100644 --- a/src/env.cc +++ b/src/env.cc @@ -87,13 +87,13 @@ void AsyncHooks::ResetPromiseHooks(Local init, js_promise_hooks_[3].Reset(env()->isolate(), resolve); } -Local AsyncHooks::GetPromiseHooks(Isolate* isolate) { - std::vector> values; +Local AsyncHooks::GetPromiseHooks(Isolate* isolate) const { + v8::LocalVector values(isolate, js_promise_hooks_.size()); for (size_t i = 0; i < js_promise_hooks_.size(); ++i) { if (js_promise_hooks_[i].IsEmpty()) { - values.push_back(Undefined(isolate)); + values[i] = Undefined(isolate); } else { - values.push_back(js_promise_hooks_[i].Get(isolate)); + values[i] = js_promise_hooks_[i].Get(isolate); } } return Array::New(isolate, values.data(), values.size()); @@ -236,13 +236,10 @@ void Environment::TrackContext(Local context) { void Environment::UntrackContext(Local context) { HandleScope handle_scope(isolate_); - contexts_.erase(std::remove_if(contexts_.begin(), - contexts_.end(), - [&](auto&& el) { return el.IsEmpty(); }), - contexts_.end()); + std::erase_if(contexts_, [&](auto&& el) { return el.IsEmpty(); }); for (auto it = contexts_.begin(); it != contexts_.end(); it++) { - Local saved_context = PersistentToLocal::Weak(isolate_, *it); - if (saved_context == context) { + if (Local saved_context = PersistentToLocal::Weak(isolate_, *it); + saved_context == context) { it->Reset(); contexts_.erase(it); break; @@ -351,9 +348,11 @@ IsolateDataSerializeInfo IsolateData::Serialize(SnapshotCreator* creator) { #undef VS #undef VP - for (size_t i = 0; i < AsyncWrap::PROVIDERS_LENGTH; i++) + info.primitive_values.reserve(info.primitive_values.size() + + AsyncWrap::PROVIDERS_LENGTH); + for (size_t i = 0; i < AsyncWrap::PROVIDERS_LENGTH; i++) { info.primitive_values.push_back(creator->AddData(async_wrap_provider(i))); - + } uint32_t id = 0; #define VM(PropertyName) V(PropertyName##_binding_template, ObjectTemplate) #define V(PropertyName, TypeName) \ @@ -375,7 +374,7 @@ IsolateDataSerializeInfo IsolateData::Serialize(SnapshotCreator* creator) { void IsolateData::DeserializeProperties(const IsolateDataSerializeInfo* info) { size_t i = 0; - v8::Isolate::Scope isolate_scope(isolate_); + Isolate::Scope isolate_scope(isolate_); HandleScope handle_scope(isolate_); if (per_process::enabled_debug_list.enabled(DebugCategory::MKSNAPSHOT)) { @@ -732,9 +731,8 @@ void Environment::TryLoadAddon( std::string Environment::GetCwd(const std::string& exec_path) { char cwd[PATH_MAX_BYTES]; size_t size = PATH_MAX_BYTES; - const int err = uv_cwd(cwd, &size); - if (err == 0) { + if (uv_cwd(cwd, &size) == 0) { CHECK_GT(size, 0); return cwd; } @@ -780,7 +778,7 @@ std::string Environment::GetExecPath(const std::vector& argv) { std::string exec_path; if (uv_exepath(exec_path_buf, &exec_path_len) == 0) { exec_path = std::string(exec_path_buf, exec_path_len); - } else if (argv.size() > 0) { + } else if (!argv.empty()) { exec_path = argv[0]; } @@ -875,9 +873,6 @@ Environment::Environment(IsolateData* isolate_data, EnvironmentFlags::kOwnsInspector; } - set_env_vars(per_process::system_environment); - enabled_debug_list_.Parse(env_vars()); - // We create new copies of the per-Environment option sets, so that it is // easier to modify them after Environment creation. The defaults are // part of the per-Isolate option set, for which in turn the defaults are @@ -887,6 +882,13 @@ Environment::Environment(IsolateData* isolate_data, inspector_host_port_ = std::make_shared>( options_->debug_options().host_port); + set_env_vars(per_process::system_environment); + // This should be done after options is created, so that --trace-env can be + // checked when parsing NODE_DEBUG_NATIVE. It should also be done after + // env_vars() is set so that the parser uses values from env->env_vars() + // which may or may not be the system environment variable store. + enabled_debug_list_.Parse(this); + heap_snapshot_near_heap_limit_ = static_cast(options_->heap_snapshot_near_heap_limit); @@ -929,7 +931,7 @@ Environment::Environment(IsolateData* isolate_data, std::move(traced_value)); } - if (options_->experimental_permission) { + if (options_->permission) { permission()->EnablePermissions(); // The process shouldn't be able to neither // spawn/worker nor use addons or enable inspector @@ -1115,8 +1117,7 @@ void Environment::InitializeLibuv() { void Environment::InitializeCompileCache() { std::string dir_from_env; - if (!credentials::SafeGetenv( - "NODE_COMPILE_CACHE", &dir_from_env, env_vars()) || + if (!credentials::SafeGetenv("NODE_COMPILE_CACHE", &dir_from_env, this) || dir_from_env.empty()) { return; } @@ -1128,7 +1129,7 @@ CompileCacheEnableResult Environment::EnableCompileCache( CompileCacheEnableResult result; std::string disable_env; if (credentials::SafeGetenv( - "NODE_DISABLE_COMPILE_CACHE", &disable_env, env_vars())) { + "NODE_DISABLE_COMPILE_CACHE", &disable_env, this)) { result.status = CompileCacheEnableStatus::DISABLED; result.message = "Disabled by NODE_DISABLE_COMPILE_CACHE"; Debug(this, @@ -1247,7 +1248,8 @@ void Environment::StartProfilerIdleNotifier() { } void Environment::PrintSyncTrace() const { - if (!trace_sync_io_) return; + if (!trace_sync_io_) [[likely]] + return; HandleScope handle_scope(isolate()); @@ -1322,7 +1324,7 @@ void Environment::AtExit(void (*cb)(void* arg), void* arg) { at_exit_functions_.push_front(ExitCallback{cb, arg}); } -Maybe Environment::CheckUnsettledTopLevelAwait() { +Maybe Environment::CheckUnsettledTopLevelAwait() const { HandleScope scope(isolate_); Local ctx = context(); Local value; @@ -1524,7 +1526,7 @@ void Environment::RunTimers(uv_timer_t* handle) { int64_t expiry_ms = ret.ToLocalChecked()->IntegerValue(env->context()).FromJust(); - uv_handle_t* h = reinterpret_cast(handle); + auto* h = reinterpret_cast(handle); if (expiry_ms != 0) { int64_t duration_ms = @@ -1590,8 +1592,7 @@ Local Environment::GetNow() { uint64_t now = GetNowUint64(); if (now <= 0xffffffff) return Integer::NewFromUnsigned(isolate(), static_cast(now)); - else - return Number::New(isolate(), static_cast(now)); + return Number::New(isolate(), static_cast(now)); } void CollectExceptionInfo(Environment* env, @@ -1650,8 +1651,8 @@ void Environment::CollectUVExceptionInfo(Local object, message = uv_strerror(errorno); } - node::CollectExceptionInfo(this, obj, errorno, err_string, - syscall, message, path, dest); + CollectExceptionInfo( + this, obj, errorno, err_string, syscall, message, path, dest); } ImmediateInfo::ImmediateInfo(Isolate* isolate, const SerializeInfo* info) @@ -1981,7 +1982,7 @@ void Environment::BuildEmbedderGraph(Isolate* isolate, EmbedderGraph* graph, void* data) { MemoryTracker tracker(isolate, graph); - Environment* env = static_cast(data); + auto* env = static_cast(data); // Start traversing embedder objects from the root Environment object. tracker.Track(env); } @@ -2043,7 +2044,7 @@ void Environment::TracePromises(PromiseHookType type, size_t Environment::NearHeapLimitCallback(void* data, size_t current_heap_limit, size_t initial_heap_limit) { - Environment* env = static_cast(data); + auto* env = static_cast(data); Debug(env, DebugCategory::DIAGNOSTICS, @@ -2089,8 +2090,8 @@ size_t Environment::NearHeapLimitCallback(void* data, DebugCategory::DIAGNOSTICS, "Estimated available memory=%" PRIu64 ", " "estimated overhead=%" PRIu64 "\n", - static_cast(available), - static_cast(estimated_overhead)); + available, + estimated_overhead); // This might be hit when the snapshot is being taken in another // NearHeapLimitCallback invocation. diff --git a/src/env.h b/src/env.h index 55124cd38e75ab..16312e548e526b 100644 --- a/src/env.h +++ b/src/env.h @@ -333,7 +333,7 @@ class AsyncHooks : public MemoryRetainer { v8::Local resolve); // Used for testing since V8 doesn't provide API for retrieving configured // JS promise hooks. - v8::Local GetPromiseHooks(v8::Isolate* isolate); + v8::Local GetPromiseHooks(v8::Isolate* isolate) const; inline v8::Local provider_string(int idx); inline void no_force_checks(); @@ -852,7 +852,7 @@ class Environment final : public MemoryRetainer { void AtExit(void (*cb)(void* arg), void* arg); void RunAtExitCallbacks(); - v8::Maybe CheckUnsettledTopLevelAwait(); + v8::Maybe CheckUnsettledTopLevelAwait() const; void RunWeakRefCleanup(); v8::MaybeLocal RunSnapshotSerializeCallback() const; @@ -1048,6 +1048,8 @@ class Environment final : public MemoryRetainer { inline void set_heap_snapshot_near_heap_limit(uint32_t limit); inline bool is_in_heapsnapshot_heap_limit_callback() const; + inline bool report_exclude_env() const; + inline void AddHeapSnapshotNearHeapLimitCallback(); inline void RemoveHeapSnapshotNearHeapLimitCallback(size_t heap_limit); diff --git a/src/env_properties.h b/src/env_properties.h index 84bb1d492b512b..592e95b0584a87 100644 --- a/src/env_properties.h +++ b/src/env_properties.h @@ -194,8 +194,10 @@ V(ipv4_string, "IPv4") \ V(ipv6_string, "IPv6") \ V(isclosing_string, "isClosing") \ + V(isfinished_string, "isFinished") \ V(issuer_string, "issuer") \ V(issuercert_string, "issuerCertificate") \ + V(iterator_string, "Iterator") \ V(jwk_crv_string, "crv") \ V(jwk_d_string, "d") \ V(jwk_dp_string, "dp") \ @@ -241,6 +243,7 @@ V(nistcurve_string, "nistCurve") \ V(node_string, "node") \ V(nsname_string, "nsname") \ + V(num_cols_string, "num_cols") \ V(object_string, "Object") \ V(ocsp_request_string, "OCSPRequest") \ V(oncertcb_string, "oncertcb") \ @@ -288,6 +291,7 @@ V(priority_string, "priority") \ V(process_string, "process") \ V(promise_string, "promise") \ + V(prototype_string, "prototype") \ V(psk_string, "psk") \ V(pubkey_string, "pubkey") \ V(public_exponent_string, "publicExponent") \ @@ -309,6 +313,7 @@ V(require_string, "require") \ V(resource_string, "resource") \ V(retry_string, "retry") \ + V(return_string, "return") \ V(salt_length_string, "saltLength") \ V(scheme_string, "scheme") \ V(scopeid_string, "scopeid") \ @@ -332,6 +337,7 @@ V(standard_name_string, "standardName") \ V(start_time_string, "startTime") \ V(state_string, "state") \ + V(statement_string, "statement") \ V(stats_string, "stats") \ V(status_string, "status") \ V(stdio_string, "stdio") \ diff --git a/src/js_native_api_v8.cc b/src/js_native_api_v8.cc index bcb9e5ca8d2926..97a59e4a17f548 100644 --- a/src/js_native_api_v8.cc +++ b/src/js_native_api_v8.cc @@ -2769,10 +2769,12 @@ napi_status NAPI_CDECL napi_create_reference(napi_env env, // Deletes a reference. The referenced value is released, and may be GC'd unless // there are other references to it. +// For a napi_reference returned from `napi_wrap`, this must be called in the +// finalizer. napi_status NAPI_CDECL napi_delete_reference(napi_env env, napi_ref ref) { // Omit NAPI_PREAMBLE and GET_RETURN_STATUS because V8 calls here cannot throw // JS exceptions. - CHECK_ENV_NOT_IN_GC(env); + CHECK_ENV(env); CHECK_ARG(env, ref); delete reinterpret_cast(ref); diff --git a/src/module_wrap.cc b/src/module_wrap.cc index 6f8c9d10955b88..8be6dbd1d0262c 100644 --- a/src/module_wrap.cc +++ b/src/module_wrap.cc @@ -663,6 +663,22 @@ void ModuleWrap::EvaluateSync(const FunctionCallbackInfo& args) { CHECK(result->IsPromise()); Local promise = result.As(); if (promise->State() == Promise::PromiseState::kRejected) { + // The rejected promise is created by V8, so we don't get a chance to mark + // it as resolved before the rejection happens from evaluation. But we can + // tell the promise rejection callback to treat it as a promise rejected + // before handler was added which would remove it from the unhandled + // rejection handling, since we are converting it into an error and throw + // from here directly. + Local type = v8::Integer::New( + isolate, + static_cast( + v8::PromiseRejectEvent::kPromiseHandlerAddedAfterReject)); + Local args[] = {type, promise, Undefined(isolate)}; + if (env->promise_reject_callback() + ->Call(context, Undefined(isolate), arraysize(args), args) + .IsEmpty()) { + return; + } Local exception = promise->Result(); Local message = v8::Exception::CreateMessage(isolate, exception); diff --git a/src/node.cc b/src/node.cc index 1a2a43bdd37441..f4365c0eda7330 100644 --- a/src/node.cc +++ b/src/node.cc @@ -369,7 +369,8 @@ MaybeLocal StartExecution(Environment* env, StartExecutionCallback cb) { CHECK(!env->isolate_data()->is_building_snapshot()); #ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION - if (sea::IsSingleExecutable()) { + // Snapshot in SEA is only loaded for the main thread. + if (sea::IsSingleExecutable() && env->is_main_thread()) { sea::SeaResource sea = sea::FindSingleExecutableResource(); // The SEA preparation blob building process should already enforce this, // this check is just here to guard against the unlikely case where @@ -391,6 +392,9 @@ MaybeLocal StartExecution(Environment* env, StartExecutionCallback cb) { // move the pre-execution part into a different file that can be // reused when dealing with user-defined main functions. if (!env->snapshot_deserialize_main().IsEmpty()) { + // Custom worker snapshot is not supported yet, + // so workers can't have deserialize main functions. + CHECK(env->is_main_thread()); return env->RunSnapshotDeserializeMain(); } @@ -1045,7 +1049,7 @@ InitializeOncePerProcessInternal(const std::vector& args, if (!(flags & ProcessInitializationFlags::kNoParseGlobalDebugVariables)) { // Initialized the enabled list for Debug() calls with system // environment variables. - per_process::enabled_debug_list.Parse(per_process::system_environment); + per_process::enabled_debug_list.Parse(nullptr); } PlatformInit(flags); diff --git a/src/node_builtins.cc b/src/node_builtins.cc index 1bec44f6f29b0b..9aaf5626fcfe4a 100644 --- a/src/node_builtins.cc +++ b/src/node_builtins.cc @@ -133,7 +133,8 @@ BuiltinLoader::BuiltinCategories BuiltinLoader::GetBuiltinCategories() const { "internal/streams/lazy_transform", #endif // !HAVE_OPENSSL #if !NODE_OPENSSL_HAS_QUIC - "internal/quic/quic", + "internal/quic/quic", "internal/quic/symbols", "internal/quic/stats", + "internal/quic/state", #endif // !NODE_OPENSSL_HAS_QUIC "sqlite", // Experimental. "sys", // Deprecated. diff --git a/src/node_builtins.h b/src/node_builtins.h index 1cb85b9058d065..a73de23a1debfd 100644 --- a/src/node_builtins.h +++ b/src/node_builtins.h @@ -71,7 +71,7 @@ using BuiltinSourceMap = std::map; using BuiltinCodeCacheMap = std::unordered_map; -// Generated by tools/js2c.py as node_javascript.cc +// Generated by tools/js2c.cc as node_javascript.cc void RegisterExternalReferencesForInternalizedBuiltinCode( ExternalReferenceRegistry* registry); @@ -134,7 +134,7 @@ class NODE_EXTERN_PRIVATE BuiltinLoader { // Only allow access from friends. friend class CodeCacheBuilder; - // Generated by tools/js2c.py as node_javascript.cc + // Generated by tools/js2c.cc as node_javascript.cc void LoadJavaScriptSource(); // Loads data into source_ UnionBytes GetConfig(); // Return data for config.gypi diff --git a/src/node_credentials.cc b/src/node_credentials.cc index 2a7f2e878bc953..0152f6269c83ef 100644 --- a/src/node_credentials.cc +++ b/src/node_credentials.cc @@ -72,9 +72,7 @@ static bool HasOnly(int capability) { // process only has the capability CAP_NET_BIND_SERVICE set. If the current // process does not have any capabilities set and the process is running as // setuid root then lookup will not be allowed. -bool SafeGetenv(const char* key, - std::string* text, - std::shared_ptr env_vars) { +bool SafeGetenv(const char* key, std::string* text, Environment* env) { #if !defined(__CloudABI__) && !defined(_WIN32) #if defined(__linux__) if ((!HasOnly(CAP_NET_BIND_SERVICE) && linux_at_secure()) || @@ -87,14 +85,31 @@ bool SafeGetenv(const char* key, // Fallback to system environment which reads the real environment variable // through uv_os_getenv. - if (env_vars == nullptr) { + std::shared_ptr env_vars; + if (env == nullptr) { env_vars = per_process::system_environment; + } else { + env_vars = env->env_vars(); } std::optional value = env_vars->Get(key); - if (!value.has_value()) return false; - *text = value.value(); - return true; + + bool has_env = value.has_value(); + if (has_env) { + *text = value.value(); + } + + auto options = + (env != nullptr ? env->options() + : per_process::cli_options->per_isolate->per_env); + + if (options->trace_env) { + fprintf(stderr, "[--trace-env] get environment variable \"%s\"\n", key); + + PrintTraceEnvStack(options); + } + + return has_env; } static void SafeGetenv(const FunctionCallbackInfo& args) { @@ -103,7 +118,7 @@ static void SafeGetenv(const FunctionCallbackInfo& args) { Isolate* isolate = env->isolate(); Utf8Value strenvtag(isolate, args[0]); std::string text; - if (!SafeGetenv(*strenvtag, &text, env->env_vars())) return; + if (!SafeGetenv(*strenvtag, &text, env)) return; Local result = ToV8Value(isolate->GetCurrentContext(), text).ToLocalChecked(); args.GetReturnValue().Set(result); @@ -117,7 +132,7 @@ static void GetTempDir(const FunctionCallbackInfo& args) { // Let's wrap SafeGetEnv since it returns true for empty string. auto get_env = [&dir, &env](std::string_view key) { - USE(SafeGetenv(key.data(), &dir, env->env_vars())); + USE(SafeGetenv(key.data(), &dir, env)); return !dir.empty(); }; diff --git a/src/node_debug.cc b/src/node_debug.cc index d87cdf61d2d92d..0f254b1fbfe820 100644 --- a/src/node_debug.cc +++ b/src/node_debug.cc @@ -12,8 +12,7 @@ #include #endif // DEBUG -namespace node { -namespace debug { +namespace node::debug { #ifdef DEBUG using v8::Context; @@ -41,7 +40,7 @@ void GetV8FastApiCallCount(const FunctionCallbackInfo& args) { return; } Utf8Value utf8_key(env->isolate(), args[0]); - args.GetReturnValue().Set(GetV8FastApiCallCount(utf8_key.ToString())); + args.GetReturnValue().Set(GetV8FastApiCallCount(utf8_key.ToStringView())); } void SlowIsEven(const FunctionCallbackInfo& args) { @@ -93,8 +92,7 @@ void Initialize(Local target, } #endif // DEBUG -} // namespace debug -} // namespace node +} // namespace node::debug #ifdef DEBUG NODE_BINDING_CONTEXT_AWARE_INTERNAL(debug, node::debug::Initialize) diff --git a/src/node_dotenv.cc b/src/node_dotenv.cc index f594df875d7a0c..049f5cfcb77b9c 100644 --- a/src/node_dotenv.cc +++ b/src/node_dotenv.cc @@ -182,7 +182,10 @@ void Dotenv::ParseContent(const std::string_view input) { } store_.insert_or_assign(std::string(key), multi_line_value); - content.remove_prefix(content.find('\n', closing_quote + 1)); + auto newline = content.find('\n', closing_quote + 1); + if (newline != std::string_view::npos) { + content.remove_prefix(newline); + } continue; } } @@ -210,7 +213,10 @@ void Dotenv::ParseContent(const std::string_view input) { store_.insert_or_assign(std::string(key), value); // Select the first newline after the closing quotation mark // since there could be newline characters inside the value. - content.remove_prefix(content.find('\n', closing_quote + 1)); + auto newline = content.find('\n', closing_quote + 1); + if (newline != std::string_view::npos) { + content.remove_prefix(newline); + } } } else { // Regular key value pair. diff --git a/src/node_env_var.cc b/src/node_env_var.cc index d19d11dc714e08..82b8231c435775 100644 --- a/src/node_env_var.cc +++ b/src/node_env_var.cc @@ -337,6 +337,19 @@ Maybe KVStore::AssignToObject(v8::Isolate* isolate, return JustVoid(); } +void PrintTraceEnvStack(Environment* env) { + PrintTraceEnvStack(env->options()); +} + +void PrintTraceEnvStack(std::shared_ptr options) { + if (options->trace_env_native_stack) { + DumpNativeBacktrace(stderr); + } + if (options->trace_env_js_stack) { + DumpJavaScriptBacktrace(stderr); + } +} + static Intercepted EnvGetter(Local property, const PropertyCallbackInfo& info) { Environment* env = Environment::GetCurrent(info); @@ -348,7 +361,18 @@ static Intercepted EnvGetter(Local property, CHECK(property->IsString()); MaybeLocal value_string = env->env_vars()->Get(env->isolate(), property.As()); - if (!value_string.IsEmpty()) { + + bool has_env = !value_string.IsEmpty(); + if (env->options()->trace_env) { + Utf8Value key(env->isolate(), property.As()); + fprintf(stderr, + "[--trace-env] get environment variable \"%.*s\"\n", + static_cast(key.length()), + key.out()); + PrintTraceEnvStack(env); + } + + if (has_env) { info.GetReturnValue().Set(value_string.ToLocalChecked()); return Intercepted::kYes; } @@ -386,6 +410,14 @@ static Intercepted EnvSetter(Local property, } env->env_vars()->Set(env->isolate(), key, value_string); + if (env->options()->trace_env) { + Utf8Value key_utf8(env->isolate(), key); + fprintf(stderr, + "[--trace-env] set environment variable \"%.*s\"\n", + static_cast(key_utf8.length()), + key_utf8.out()); + PrintTraceEnvStack(env); + } return Intercepted::kYes; } @@ -396,7 +428,18 @@ static Intercepted EnvQuery(Local property, CHECK(env->has_run_bootstrapping_code()); if (property->IsString()) { int32_t rc = env->env_vars()->Query(env->isolate(), property.As()); - if (rc != -1) { + bool has_env = (rc != -1); + + if (env->options()->trace_env) { + Utf8Value key_utf8(env->isolate(), property.As()); + fprintf(stderr, + "[--trace-env] query environment variable \"%.*s\": %s\n", + static_cast(key_utf8.length()), + key_utf8.out(), + has_env ? "is set" : "is not set"); + PrintTraceEnvStack(env); + } + if (has_env) { // Return attributes for the property. info.GetReturnValue().Set(v8::None); return Intercepted::kYes; @@ -411,6 +454,15 @@ static Intercepted EnvDeleter(Local property, CHECK(env->has_run_bootstrapping_code()); if (property->IsString()) { env->env_vars()->Delete(env->isolate(), property.As()); + + if (env->options()->trace_env) { + Utf8Value key_utf8(env->isolate(), property.As()); + fprintf(stderr, + "[--trace-env] delete environment variable \"%.*s\"\n", + static_cast(key_utf8.length()), + key_utf8.out()); + PrintTraceEnvStack(env); + } } // process.env never has non-configurable properties, so always @@ -423,6 +475,12 @@ static void EnvEnumerator(const PropertyCallbackInfo& info) { Environment* env = Environment::GetCurrent(info); CHECK(env->has_run_bootstrapping_code()); + if (env->options()->trace_env) { + fprintf(stderr, "[--trace-env] enumerate environment variables\n"); + + PrintTraceEnvStack(env); + } + info.GetReturnValue().Set( env->env_vars()->Enumerate(env->isolate())); } diff --git a/src/node_errors.h b/src/node_errors.h index d333422ae8cd74..15d7143299fe3f 100644 --- a/src/node_errors.h +++ b/src/node_errors.h @@ -91,6 +91,7 @@ void OOMErrorHandler(const char* location, const v8::OOMDetails& details); V(ERR_INVALID_THIS, TypeError) \ V(ERR_INVALID_URL, TypeError) \ V(ERR_INVALID_URL_SCHEME, TypeError) \ + V(ERR_LOAD_SQLITE_EXTENSION, Error) \ V(ERR_MEMORY_ALLOCATION_FAILED, Error) \ V(ERR_MESSAGE_TARGET_CONTEXT_UNAVAILABLE, Error) \ V(ERR_MISSING_ARGS, TypeError) \ @@ -191,6 +192,7 @@ ERRORS_WITH_CODE(V) V(ERR_INVALID_STATE, "Invalid state") \ V(ERR_INVALID_THIS, "Value of \"this\" is the wrong type") \ V(ERR_INVALID_URL_SCHEME, "The URL must be of scheme file:") \ + V(ERR_LOAD_SQLITE_EXTENSION, "Failed to load SQLite extension") \ V(ERR_MEMORY_ALLOCATION_FAILED, "Failed to allocate memory") \ V(ERR_OSSL_EVP_INVALID_DIGEST, "Invalid digest used") \ V(ERR_MESSAGE_TARGET_CONTEXT_UNAVAILABLE, \ diff --git a/src/node_file.cc b/src/node_file.cc index 90b0c9f1fe0332..4bdbfa1be2c22f 100644 --- a/src/node_file.cc +++ b/src/node_file.cc @@ -1031,6 +1031,8 @@ static void ExistsSync(const FunctionCallbackInfo& args) { // Used to speed up module loading. Returns 0 if the path refers to // a file, 1 when it's a directory or < 0 on error (usually -ENOENT.) // The speedup comes from not creating thousands of Stat and Error objects. +// Do not expose this function through public API as it doesn't hold +// Permission Model checks. static void InternalModuleStat(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); @@ -1039,8 +1041,6 @@ static void InternalModuleStat(const FunctionCallbackInfo& args) { BufferValue path(env->isolate(), args[1]); CHECK_NOT_NULL(*path); ToNamespacedPath(env, &path); - THROW_IF_INSUFFICIENT_PERMISSIONS( - env, permission::PermissionScope::kFileSystemRead, path.ToStringView()); uv_fs_t req; int rc = uv_fs_stat(env->event_loop(), &req, *path, nullptr); @@ -1067,15 +1067,8 @@ static int32_t FastInternalModuleStat( } HandleScope scope(isolate); - Environment* env = Environment::GetCurrent(recv->GetCreationContextChecked()); auto path = std::filesystem::path(input.data, input.data + input.length); - if (!env->permission()->is_granted( - env, permission::PermissionScope::kFileSystemRead, path.string())) - [[unlikely]] { - options.fallback = true; - return -1; - } switch (std::filesystem::status(path).type()) { case std::filesystem::file_type::directory: @@ -1894,8 +1887,29 @@ static void ReadDir(const FunctionCallbackInfo& args) { BufferValue path(isolate, args[0]); CHECK_NOT_NULL(*path); +#ifdef _WIN32 + // On Windows, some API functions accept paths with trailing slashes, + // while others do not. This code checks if the input path ends with + // a slash (either '/' or '\\') and, if so, ensures that the processed + // path also ends with a trailing backslash ('\\'). + bool slashCheck = false; + if (path.ToStringView().ends_with("/") || + path.ToStringView().ends_with("\\")) { + slashCheck = true; + } +#endif + ToNamespacedPath(env, &path); +#ifdef _WIN32 + if (slashCheck) { + size_t new_length = path.length() + 1; + path.AllocateSufficientStorage(new_length + 1); + path.SetLengthAndZeroTerminate(new_length); + path.out()[new_length - 1] = '\\'; + } +#endif + const enum encoding encoding = ParseEncoding(isolate, args[1], UTF8); bool with_types = args[2]->IsTrue(); diff --git a/src/node_http2.cc b/src/node_http2.cc index 593c8b5f07a2a4..f9b5226aea50dc 100644 --- a/src/node_http2.cc +++ b/src/node_http2.cc @@ -803,13 +803,15 @@ void Http2Session::Close(uint32_t code, bool socket_closed) { CHECK_EQ(nghttp2_session_terminate_session(session_.get(), code), 0); SendPendingData(); } else if (stream_ != nullptr) { + // so that the previous listener of the socket, typically, JS code of a + // (tls) socket will be notified of any activity later stream_->RemoveStreamListener(this); } set_destroyed(); // If we are writing we will get to make the callback in OnStreamAfterWrite. - if (!is_write_in_progress()) { + if (!is_write_in_progress() || !stream_) { Debug(this, "make done session callback"); HandleScope scope(env()->isolate()); MakeCallback(env()->ondone_string(), 0, nullptr); @@ -1314,11 +1316,7 @@ int Http2Session::OnDataChunkReceived(nghttp2_session* handle, } else { memcpy(buf.base, data, avail); } - if (buf.base == nullptr) [[likely]] { - buf.base = reinterpret_cast(const_cast(data)); - } else { - memcpy(buf.base, data, avail); - } + data += avail; len -= avail; stream->EmitRead(avail, buf); diff --git a/src/node_internals.h b/src/node_internals.h index 85b666e11f5654..000ba16303740d 100644 --- a/src/node_internals.h +++ b/src/node_internals.h @@ -321,11 +321,12 @@ class ThreadPoolWork { #endif // defined(__POSIX__) && !defined(__ANDROID__) && !defined(__CloudABI__) namespace credentials { -bool SafeGetenv(const char* key, - std::string* text, - std::shared_ptr env_vars = nullptr); +bool SafeGetenv(const char* key, std::string* text, Environment* env = nullptr); } // namespace credentials +void PrintTraceEnvStack(Environment* env); +void PrintTraceEnvStack(std::shared_ptr options); + void DefineZlibConstants(v8::Local target); v8::Isolate* NewIsolate(v8::Isolate::CreateParams* params, uv_loop_t* event_loop, diff --git a/src/node_options.cc b/src/node_options.cc index 7d1db6bcdcc40a..a03daec2bd74d7 100644 --- a/src/node_options.cc +++ b/src/node_options.cc @@ -142,6 +142,11 @@ void EnvironmentOptions::CheckOptions(std::vector* errors, errors->push_back("--heapsnapshot-near-heap-limit must not be negative"); } + if (!trace_require_module.empty() && trace_require_module != "all" && + trace_require_module != "no-node-modules") { + errors->push_back("invalid value for --trace-require-module"); + } + if (test_runner) { if (test_isolation == "none") { debug_options_.allow_attaching_debugger = true; @@ -430,7 +435,8 @@ EnvironmentOptionsParser::EnvironmentOptionsParser() { AddOption("--experimental-sqlite", "experimental node:sqlite module", &EnvironmentOptions::experimental_sqlite, - kAllowedInEnvvar); + kAllowedInEnvvar, + true); AddOption("--experimental-webstorage", "experimental Web Storage API", &EnvironmentOptions::experimental_webstorage, @@ -469,11 +475,12 @@ EnvironmentOptionsParser::EnvironmentOptionsParser() { "experimental ES Module import.meta.resolve() parentURL support", &EnvironmentOptions::experimental_import_meta_resolve, kAllowedInEnvvar); - AddOption("--experimental-permission", + AddOption("--permission", "enable the permission system", - &EnvironmentOptions::experimental_permission, + &EnvironmentOptions::permission, kAllowedInEnvvar, false); + AddAlias("--experimental-permission", "--permission"); AddOption("--allow-fs-read", "allow permissions to read the filesystem", &EnvironmentOptions::allow_fs_read, @@ -702,9 +709,7 @@ EnvironmentOptionsParser::EnvironmentOptionsParser() { AddOption("--experimental-test-module-mocks", "enable module mocking in the test runner", &EnvironmentOptions::test_runner_module_mocks); - AddOption("--experimental-test-snapshots", - "enable snapshot testing in the test runner", - &EnvironmentOptions::test_runner_snapshots); + AddOption("--experimental-test-snapshots", "", NoOp{}); AddOption("--test-name-pattern", "run tests whose name matches this regular expression", &EnvironmentOptions::test_name_pattern, @@ -776,10 +781,36 @@ EnvironmentOptionsParser::EnvironmentOptionsParser() { "show stack traces on promise initialization and resolution", &EnvironmentOptions::trace_promises, kAllowedInEnvvar); + + AddOption("--trace-env", + "Print accesses to the environment variables", + &EnvironmentOptions::trace_env, + kAllowedInEnvvar); + Implies("--trace-env-js-stack", "--trace-env"); + Implies("--trace-env-native-stack", "--trace-env"); + AddOption("--trace-env-js-stack", + "Print accesses to the environment variables and the JavaScript " + "stack trace", + &EnvironmentOptions::trace_env_js_stack, + kAllowedInEnvvar); + AddOption( + "--trace-env-native-stack", + "Print accesses to the environment variables and the native stack trace", + &EnvironmentOptions::trace_env_native_stack, + kAllowedInEnvvar); + AddOption("--experimental-default-type", "set module system to use by default", &EnvironmentOptions::type, kAllowedInEnvvar); + + AddOption( + "--trace-require-module", + "Print access to require(esm). Options are 'all' (print all usage) and " + "'no-node-modules' (excluding usage from the node_modules folder)", + &EnvironmentOptions::trace_require_module, + kAllowedInEnvvar); + AddOption("--extra-info-on-fatal-exception", "hide extra information on fatal exception that causes exit", &EnvironmentOptions::extra_info_on_fatal_exception, @@ -895,6 +926,11 @@ EnvironmentOptionsParser::EnvironmentOptionsParser() { &EnvironmentOptions::tls_max_v1_3, kAllowedInEnvvar); + AddOption("--report-exclude-env", + "Exclude environment variables when generating report" + " (default: false)", + &EnvironmentOptions::report_exclude_env, + kAllowedInEnvvar); AddOption("--report-exclude-network", "exclude network interface diagnostics." " (default: false)", diff --git a/src/node_options.h b/src/node_options.h index b3246f5dcb3ab1..ab6ea77b2e9ce5 100644 --- a/src/node_options.h +++ b/src/node_options.h @@ -123,7 +123,7 @@ class EnvironmentOptions : public Options { bool experimental_eventsource = false; bool experimental_fetch = true; bool experimental_websocket = true; - bool experimental_sqlite = false; + bool experimental_sqlite = true; bool experimental_webstorage = false; std::string localstorage_file; bool experimental_global_customevent = true; @@ -134,7 +134,7 @@ class EnvironmentOptions : public Options { std::string input_type; // Value of --input-type std::string type; // Value of --experimental-default-type bool entry_is_url = false; - bool experimental_permission = false; + bool permission = false; std::vector allow_fs_read; std::vector allow_fs_write; bool allow_addons = false; @@ -190,7 +190,6 @@ class EnvironmentOptions : public Options { uint64_t test_coverage_functions = 0; uint64_t test_coverage_lines = 0; bool test_runner_module_mocks = false; - bool test_runner_snapshots = false; bool test_runner_update_snapshots = false; std::vector test_name_pattern; std::vector test_reporter; @@ -211,6 +210,10 @@ class EnvironmentOptions : public Options { bool trace_uncaught = false; bool trace_warnings = false; bool trace_promises = false; + bool trace_env = false; + bool trace_env_js_stack = false; + bool trace_env_native_stack = false; + std::string trace_require_module; bool extra_info_on_fatal_exception = true; std::string unhandled_rejections; std::vector userland_loaders; @@ -251,6 +254,7 @@ class EnvironmentOptions : public Options { std::vector user_argv; + bool report_exclude_env = false; bool report_exclude_network = false; inline DebugOptions* get_debug_options() { return &debug_options_; } diff --git a/src/node_process.h b/src/node_process.h index d4f1c0d45dec50..461a92161ab2e5 100644 --- a/src/node_process.h +++ b/src/node_process.h @@ -24,25 +24,27 @@ void CreateEnvProxyTemplate(IsolateData* isolate_data); void RawDebug(const v8::FunctionCallbackInfo& args); v8::MaybeLocal ProcessEmit(Environment* env, - const char* event, + std::string_view event, v8::Local message); v8::Maybe ProcessEmitWarningGeneric(Environment* env, - const char* warning, - const char* type = nullptr, - const char* code = nullptr); + std::string_view warning, + std::string_view type = "", + std::string_view code = ""); template inline v8::Maybe ProcessEmitWarning(Environment* env, const char* fmt, Args&&... args); -v8::Maybe ProcessEmitWarningSync(Environment* env, const char* message); +v8::Maybe ProcessEmitWarningSync(Environment* env, + std::string_view message); v8::Maybe ProcessEmitExperimentalWarning(Environment* env, - const char* warning); -v8::Maybe ProcessEmitDeprecationWarning(Environment* env, - const char* warning, - const char* deprecation_code); + const std::string& warning); +v8::Maybe ProcessEmitDeprecationWarning( + Environment* env, + const std::string& warning, + std::string_view deprecation_code); v8::MaybeLocal CreateProcessObject(Realm* env); void PatchProcessObject(const v8::FunctionCallbackInfo& args); diff --git a/src/node_process_events.cc b/src/node_process_events.cc index 19774607830a93..128ad9fbb1f257 100644 --- a/src/node_process_events.cc +++ b/src/node_process_events.cc @@ -18,10 +18,11 @@ using v8::Object; using v8::String; using v8::Value; -Maybe ProcessEmitWarningSync(Environment* env, const char* message) { +Maybe ProcessEmitWarningSync(Environment* env, std::string_view message) { Isolate* isolate = env->isolate(); Local context = env->context(); - Local message_string = OneByteString(isolate, message); + Local message_string = + OneByteString(isolate, message.data(), message.size()); Local argv[] = {message_string}; Local emit_function = env->process_emit_warning_sync(); @@ -37,13 +38,14 @@ Maybe ProcessEmitWarningSync(Environment* env, const char* message) { } MaybeLocal ProcessEmit(Environment* env, - const char* event, + std::string_view event, Local message) { Isolate* isolate = env->isolate(); - Local event_string; - if (!String::NewFromOneByte(isolate, reinterpret_cast(event)) - .ToLocal(&event_string)) return MaybeLocal(); + Local event_string; + if (!ToV8Value(env->context(), event).ToLocal(&event_string)) { + return MaybeLocal(); + } Local process = env->process_object(); Local argv[] = {event_string, message}; @@ -51,10 +53,12 @@ MaybeLocal ProcessEmit(Environment* env, } Maybe ProcessEmitWarningGeneric(Environment* env, - const char* warning, - const char* type, - const char* code) { - if (!env->can_call_into_js()) return Just(false); + std::string_view warning, + std::string_view type, + std::string_view code) { + if (!env->can_call_into_js()) { + return Just(false); + } HandleScope handle_scope(env->isolate()); Context::Scope context_scope(env->context()); @@ -73,19 +77,16 @@ Maybe ProcessEmitWarningGeneric(Environment* env, // The caller has to be able to handle a failure anyway, so we might as well // do proper error checking for string creation. - if (!String::NewFromUtf8(env->isolate(), warning).ToLocal(&args[argc++])) + if (!ToV8Value(env->context(), warning).ToLocal(&args[argc++])) { return Nothing(); + } - if (type != nullptr) { - if (!String::NewFromOneByte(env->isolate(), - reinterpret_cast(type)) - .ToLocal(&args[argc++])) { + if (!type.empty()) { + if (!ToV8Value(env->context(), type).ToLocal(&args[argc++])) { return Nothing(); } - if (code != nullptr && - !String::NewFromOneByte(env->isolate(), - reinterpret_cast(code)) - .ToLocal(&args[argc++])) { + if (!code.empty() && + !ToV8Value(env->context(), code).ToLocal(&args[argc++])) { return Nothing(); } } @@ -100,13 +101,11 @@ Maybe ProcessEmitWarningGeneric(Environment* env, return Just(true); } - std::set experimental_warnings; Maybe ProcessEmitExperimentalWarning(Environment* env, - const char* warning) { - if (experimental_warnings.find(warning) != experimental_warnings.end()) - return Nothing(); + const std::string& warning) { + if (experimental_warnings.contains(warning)) return Nothing(); experimental_warnings.insert(warning); std::string message(warning); @@ -115,8 +114,8 @@ Maybe ProcessEmitExperimentalWarning(Environment* env, } Maybe ProcessEmitDeprecationWarning(Environment* env, - const char* warning, - const char* deprecation_code) { + const std::string& warning, + std::string_view deprecation_code) { return ProcessEmitWarningGeneric( env, warning, "DeprecationWarning", deprecation_code); } diff --git a/src/node_report.cc b/src/node_report.cc index 2fd4915f3d7e03..9ab66162ec32a6 100644 --- a/src/node_report.cc +++ b/src/node_report.cc @@ -23,7 +23,7 @@ #include #include -constexpr int NODE_REPORT_VERSION = 4; +constexpr int NODE_REPORT_VERSION = 5; constexpr int NANOS_PER_SEC = 1000 * 1000 * 1000; constexpr double SEC_PER_MICROS = 1e-6; constexpr int MAX_FRAME_COUNT = node::kMaxFrameCountForLogging; @@ -61,7 +61,8 @@ static void WriteNodeReport(Isolate* isolate, std::ostream& out, Local error, bool compact, - bool exclude_network = false); + bool exclude_network = false, + bool exclude_env = false); static void PrintVersionInformation(JSONWriter* writer, bool exclude_network = false); static void PrintJavaScriptErrorStack(JSONWriter* writer, @@ -78,6 +79,7 @@ static void PrintJavaScriptErrorProperties(JSONWriter* writer, static void PrintNativeStack(JSONWriter* writer); static void PrintResourceUsage(JSONWriter* writer); static void PrintGCStatistics(JSONWriter* writer, Isolate* isolate); +static void PrintEnvironmentVariables(JSONWriter* writer); static void PrintSystemInformation(JSONWriter* writer); static void PrintLoadedLibraries(JSONWriter* writer); static void PrintComponentVersions(JSONWriter* writer); @@ -95,7 +97,8 @@ static void WriteNodeReport(Isolate* isolate, std::ostream& out, Local error, bool compact, - bool exclude_network) { + bool exclude_network, + bool exclude_env) { // Obtain the current time and the pid. TIME_TYPE tm_struct; DiagnosticFilename::LocalTime(&tm_struct); @@ -251,6 +254,9 @@ static void WriteNodeReport(Isolate* isolate, writer.json_arrayend(); // Report operating system information + if (exclude_env == false) { + PrintEnvironmentVariables(&writer); + } PrintSystemInformation(&writer); writer.json_objectend(); @@ -696,8 +702,7 @@ static void PrintResourceUsage(JSONWriter* writer) { #endif // RUSAGE_THREAD } -// Report operating system information. -static void PrintSystemInformation(JSONWriter* writer) { +static void PrintEnvironmentVariables(JSONWriter* writer) { uv_env_item_t* envitems; int envcount; int r; @@ -717,20 +722,23 @@ static void PrintSystemInformation(JSONWriter* writer) { } writer->json_objectend(); +} +// Report operating system information. +static void PrintSystemInformation(JSONWriter* writer) { #ifndef _WIN32 static struct { const char* description; int id; } rlimit_strings[] = { {"core_file_size_blocks", RLIMIT_CORE}, - {"data_seg_size_kbytes", RLIMIT_DATA}, + {"data_seg_size_bytes", RLIMIT_DATA}, {"file_size_blocks", RLIMIT_FSIZE}, #if !(defined(_AIX) || defined(__sun)) {"max_locked_memory_bytes", RLIMIT_MEMLOCK}, #endif #ifndef __sun - {"max_memory_size_kbytes", RLIMIT_RSS}, + {"max_memory_size_bytes", RLIMIT_RSS}, #endif {"open_files", RLIMIT_NOFILE}, {"stack_size_bytes", RLIMIT_STACK}, @@ -739,7 +747,7 @@ static void PrintSystemInformation(JSONWriter* writer) { {"max_user_processes", RLIMIT_NPROC}, #endif #ifndef __OpenBSD__ - {"virtual_memory_kbytes", RLIMIT_AS} + {"virtual_memory_bytes", RLIMIT_AS} #endif }; @@ -917,6 +925,10 @@ std::string TriggerNodeReport(Isolate* isolate, bool exclude_network = env != nullptr ? env->options()->report_exclude_network : per_process::cli_options->per_isolate ->per_env->report_exclude_network; + bool exclude_env = + env != nullptr + ? env->report_exclude_env() + : per_process::cli_options->per_isolate->per_env->report_exclude_env; report::WriteNodeReport(isolate, env, @@ -926,7 +938,8 @@ std::string TriggerNodeReport(Isolate* isolate, *outstream, error, compact, - exclude_network); + exclude_network, + exclude_env); // Do not close stdout/stderr, only close files we opened. if (outfile.is_open()) { @@ -980,8 +993,20 @@ void GetNodeReport(Isolate* isolate, bool exclude_network = env != nullptr ? env->options()->report_exclude_network : per_process::cli_options->per_isolate ->per_env->report_exclude_network; - report::WriteNodeReport( - isolate, env, message, trigger, "", out, error, false, exclude_network); + bool exclude_env = + env != nullptr + ? env->report_exclude_env() + : per_process::cli_options->per_isolate->per_env->report_exclude_env; + report::WriteNodeReport(isolate, + env, + message, + trigger, + "", + out, + error, + false, + exclude_network, + exclude_env); } // External function to trigger a report, writing to a supplied stream. @@ -997,8 +1022,20 @@ void GetNodeReport(Environment* env, bool exclude_network = env != nullptr ? env->options()->report_exclude_network : per_process::cli_options->per_isolate ->per_env->report_exclude_network; - report::WriteNodeReport( - isolate, env, message, trigger, "", out, error, false, exclude_network); + bool exclude_env = + env != nullptr + ? env->report_exclude_env() + : per_process::cli_options->per_isolate->per_env->report_exclude_env; + report::WriteNodeReport(isolate, + env, + message, + trigger, + "", + out, + error, + false, + exclude_network, + exclude_env); } } // namespace node diff --git a/src/node_report_module.cc b/src/node_report_module.cc index 09ac89fc528d95..7a87a53203dcc7 100644 --- a/src/node_report_module.cc +++ b/src/node_report_module.cc @@ -95,6 +95,17 @@ static void SetExcludeNetwork(const FunctionCallbackInfo& info) { env->options()->report_exclude_network = info[0]->IsTrue(); } +static void GetExcludeEnv(const FunctionCallbackInfo& info) { + Environment* env = Environment::GetCurrent(info); + info.GetReturnValue().Set(env->report_exclude_env()); +} + +static void SetExcludeEnv(const FunctionCallbackInfo& info) { + Environment* env = Environment::GetCurrent(info); + CHECK(info[0]->IsBoolean()); + env->options()->report_exclude_env = info[0]->IsTrue(); +} + static void GetDirectory(const FunctionCallbackInfo& info) { Mutex::ScopedLock lock(per_process::cli_options_mutex); Environment* env = Environment::GetCurrent(info); @@ -187,6 +198,8 @@ static void Initialize(Local exports, SetMethod(context, exports, "setCompact", SetCompact); SetMethod(context, exports, "getExcludeNetwork", GetExcludeNetwork); SetMethod(context, exports, "setExcludeNetwork", SetExcludeNetwork); + SetMethod(context, exports, "getExcludeEnv", GetExcludeEnv); + SetMethod(context, exports, "setExcludeEnv", SetExcludeEnv); SetMethod(context, exports, "getDirectory", GetDirectory); SetMethod(context, exports, "setDirectory", SetDirectory); SetMethod(context, exports, "getFilename", GetFilename); @@ -215,6 +228,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(SetCompact); registry->Register(GetExcludeNetwork); registry->Register(SetExcludeNetwork); + registry->Register(GetExcludeEnv); + registry->Register(SetExcludeEnv); registry->Register(GetDirectory); registry->Register(SetDirectory); registry->Register(GetFilename); diff --git a/src/node_sockaddr-inl.h b/src/node_sockaddr-inl.h index e16a09b04c7d6f..87ff9b62268657 100644 --- a/src/node_sockaddr-inl.h +++ b/src/node_sockaddr-inl.h @@ -172,22 +172,9 @@ bool SocketAddress::operator!=(const SocketAddress& other) const { return !(*this == other); } -bool SocketAddress::operator<(const SocketAddress& other) const { - return compare(other) == CompareResult::LESS_THAN; -} - -bool SocketAddress::operator>(const SocketAddress& other) const { - return compare(other) == CompareResult::GREATER_THAN; -} - -bool SocketAddress::operator<=(const SocketAddress& other) const { - CompareResult c = compare(other); - return c == CompareResult::NOT_COMPARABLE ? false : - c <= CompareResult::SAME; -} - -bool SocketAddress::operator>=(const SocketAddress& other) const { - return compare(other) >= CompareResult::SAME; +std::partial_ordering SocketAddress::operator<=>( + const SocketAddress& other) const { + return compare(other); } template diff --git a/src/node_sockaddr.cc b/src/node_sockaddr.cc index e1572187437f1b..19fcc6b89ac145 100644 --- a/src/node_sockaddr.cc +++ b/src/node_sockaddr.cc @@ -154,9 +154,8 @@ bool is_match_ipv4_ipv6( sizeof(uint32_t)) == 0; } -SocketAddress::CompareResult compare_ipv4( - const SocketAddress& one, - const SocketAddress& two) { +std::partial_ordering compare_ipv4(const SocketAddress& one, + const SocketAddress& two) { const sockaddr_in* one_in = reinterpret_cast(one.data()); const sockaddr_in* two_in = @@ -165,31 +164,29 @@ SocketAddress::CompareResult compare_ipv4( const uint32_t s_addr_two = ntohl(two_in->sin_addr.s_addr); if (s_addr_one < s_addr_two) - return SocketAddress::CompareResult::LESS_THAN; + return std::partial_ordering::less; else if (s_addr_one == s_addr_two) - return SocketAddress::CompareResult::SAME; + return std::partial_ordering::equivalent; else - return SocketAddress::CompareResult::GREATER_THAN; + return std::partial_ordering::greater; } -SocketAddress::CompareResult compare_ipv6( - const SocketAddress& one, - const SocketAddress& two) { +std::partial_ordering compare_ipv6(const SocketAddress& one, + const SocketAddress& two) { const sockaddr_in6* one_in = reinterpret_cast(one.data()); const sockaddr_in6* two_in = reinterpret_cast(two.data()); int ret = memcmp(&one_in->sin6_addr, &two_in->sin6_addr, 16); if (ret < 0) - return SocketAddress::CompareResult::LESS_THAN; + return std::partial_ordering::less; else if (ret > 0) - return SocketAddress::CompareResult::GREATER_THAN; - return SocketAddress::CompareResult::SAME; + return std::partial_ordering::greater; + return std::partial_ordering::equivalent; } -SocketAddress::CompareResult compare_ipv4_ipv6( - const SocketAddress& ipv4, - const SocketAddress& ipv6) { +std::partial_ordering compare_ipv4_ipv6(const SocketAddress& ipv4, + const SocketAddress& ipv6) { const sockaddr_in* ipv4_in = reinterpret_cast(ipv4.data()); const sockaddr_in6 * ipv6_in = @@ -199,7 +196,7 @@ SocketAddress::CompareResult compare_ipv4_ipv6( reinterpret_cast(&ipv6_in->sin6_addr); if (memcmp(ptr, mask, sizeof(mask)) != 0) - return SocketAddress::CompareResult::NOT_COMPARABLE; + return std::partial_ordering::unordered; int ret = memcmp( &ipv4_in->sin_addr, @@ -207,10 +204,10 @@ SocketAddress::CompareResult compare_ipv4_ipv6( sizeof(uint32_t)); if (ret < 0) - return SocketAddress::CompareResult::LESS_THAN; + return std::partial_ordering::less; else if (ret > 0) - return SocketAddress::CompareResult::GREATER_THAN; - return SocketAddress::CompareResult::SAME; + return std::partial_ordering::greater; + return std::partial_ordering::equivalent; } bool in_network_ipv4( @@ -235,7 +232,7 @@ bool in_network_ipv6( // Special case, if prefix == 128, then just do a // straight comparison. if (prefix == 128) - return compare_ipv6(ip, net) == SocketAddress::CompareResult::SAME; + return compare_ipv6(ip, net) == std::partial_ordering::equivalent; uint8_t r = prefix % 8; int len = (prefix - r) / 8; @@ -263,7 +260,7 @@ bool in_network_ipv4_ipv6( int prefix) { if (prefix == 128) - return compare_ipv4_ipv6(ip, net) == SocketAddress::CompareResult::SAME; + return compare_ipv4_ipv6(ip, net) == std::partial_ordering::equivalent; uint8_t r = prefix % 8; int len = (prefix - r) / 8; @@ -293,7 +290,7 @@ bool in_network_ipv6_ipv4( const SocketAddress& net, int prefix) { if (prefix == 32) - return compare_ipv4_ipv6(net, ip) == SocketAddress::CompareResult::SAME; + return compare_ipv4_ipv6(net, ip) == std::partial_ordering::equivalent; uint32_t m = ((1ull << prefix) - 1) << (32 - prefix); @@ -337,8 +334,7 @@ bool SocketAddress::is_match(const SocketAddress& other) const { return false; } -SocketAddress::CompareResult SocketAddress::compare( - const SocketAddress& other) const { +std::partial_ordering SocketAddress::compare(const SocketAddress& other) const { switch (family()) { case AF_INET: switch (other.family()) { @@ -349,16 +345,15 @@ SocketAddress::CompareResult SocketAddress::compare( case AF_INET6: switch (other.family()) { case AF_INET: { - CompareResult c = compare_ipv4_ipv6(other, *this); - switch (c) { - case SocketAddress::CompareResult::NOT_COMPARABLE: - // Fall through - case SocketAddress::CompareResult::SAME: - return c; - case SocketAddress::CompareResult::GREATER_THAN: - return SocketAddress::CompareResult::LESS_THAN; - case SocketAddress::CompareResult::LESS_THAN: - return SocketAddress::CompareResult::GREATER_THAN; + auto c = compare_ipv4_ipv6(other, *this); + if (c == std::partial_ordering::unordered) { + return std::partial_ordering::unordered; + } else if (c == std::partial_ordering::equivalent) { + return std::partial_ordering::equivalent; + } else if (c == std::partial_ordering::less) { + return std::partial_ordering::greater; + } else if (c == std::partial_ordering::greater) { + return std::partial_ordering::less; } break; } @@ -366,7 +361,7 @@ SocketAddress::CompareResult SocketAddress::compare( } break; } - return SocketAddress::CompareResult::NOT_COMPARABLE; + return std::partial_ordering::unordered; } bool SocketAddress::is_in_network( diff --git a/src/node_sockaddr.h b/src/node_sockaddr.h index 84aa3adf5fc72a..b822e186969917 100644 --- a/src/node_sockaddr.h +++ b/src/node_sockaddr.h @@ -11,9 +11,10 @@ #include "uv.h" #include "v8.h" +#include +#include #include #include -#include #include namespace node { @@ -22,13 +23,6 @@ class Environment; class SocketAddress : public MemoryRetainer { public: - enum class CompareResult { - NOT_COMPARABLE = -2, - LESS_THAN, - SAME, - GREATER_THAN - }; - struct Hash { size_t operator()(const SocketAddress& addr) const; }; @@ -36,10 +30,7 @@ class SocketAddress : public MemoryRetainer { inline bool operator==(const SocketAddress& other) const; inline bool operator!=(const SocketAddress& other) const; - inline bool operator<(const SocketAddress& other) const; - inline bool operator>(const SocketAddress& other) const; - inline bool operator<=(const SocketAddress& other) const; - inline bool operator>=(const SocketAddress& other) const; + inline std::partial_ordering operator<=>(const SocketAddress& other) const; inline static bool is_numeric_host(const char* hostname); inline static bool is_numeric_host(const char* hostname, int family); @@ -102,7 +93,7 @@ class SocketAddress : public MemoryRetainer { bool is_match(const SocketAddress& other) const; // Compares this SocketAddress to the given other SocketAddress. - CompareResult compare(const SocketAddress& other) const; + std::partial_ordering compare(const SocketAddress& other) const; // Returns true if this SocketAddress is within the subnet // identified by the given network address and CIDR prefix. diff --git a/src/node_sqlite.cc b/src/node_sqlite.cc index e138d120197e33..7f5e2f89ce9dba 100644 --- a/src/node_sqlite.cc +++ b/src/node_sqlite.cc @@ -1,4 +1,5 @@ #include "node_sqlite.h" +#include #include "base_object-inl.h" #include "debug_utils-inl.h" #include "env-inl.h" @@ -22,10 +23,13 @@ using v8::ConstructorBehavior; using v8::Context; using v8::DontDelete; using v8::Exception; +using v8::External; using v8::Function; using v8::FunctionCallback; using v8::FunctionCallbackInfo; using v8::FunctionTemplate; +using v8::Global; +using v8::Int32; using v8::Integer; using v8::Isolate; using v8::Local; @@ -110,13 +114,133 @@ inline void THROW_ERR_SQLITE_ERROR(Isolate* isolate, const char* message) { } } +class UserDefinedFunction { + public: + explicit UserDefinedFunction(Environment* env, + Local fn, + bool use_bigint_args) + : env_(env), fn_(env->isolate(), fn), use_bigint_args_(use_bigint_args) {} + virtual ~UserDefinedFunction() {} + + static void xFunc(sqlite3_context* ctx, int argc, sqlite3_value** argv) { + UserDefinedFunction* self = + static_cast(sqlite3_user_data(ctx)); + Environment* env = self->env_; + Isolate* isolate = env->isolate(); + auto recv = Undefined(isolate); + auto fn = self->fn_.Get(isolate); + LocalVector js_argv(isolate); + + for (int i = 0; i < argc; ++i) { + sqlite3_value* value = argv[i]; + MaybeLocal js_val; + + switch (sqlite3_value_type(value)) { + case SQLITE_INTEGER: { + sqlite3_int64 val = sqlite3_value_int64(value); + if (self->use_bigint_args_) { + js_val = BigInt::New(isolate, val); + } else if (std::abs(val) <= kMaxSafeJsInteger) { + js_val = Number::New(isolate, val); + } else { + THROW_ERR_OUT_OF_RANGE(isolate, + "Value is too large to be represented as a " + "JavaScript number: %" PRId64, + val); + return; + } + break; + } + case SQLITE_FLOAT: + js_val = Number::New(isolate, sqlite3_value_double(value)); + break; + case SQLITE_TEXT: { + const char* v = + reinterpret_cast(sqlite3_value_text(value)); + js_val = String::NewFromUtf8(isolate, v).As(); + break; + } + case SQLITE_NULL: + js_val = Null(isolate); + break; + case SQLITE_BLOB: { + size_t size = static_cast(sqlite3_value_bytes(value)); + auto data = + reinterpret_cast(sqlite3_value_blob(value)); + auto store = ArrayBuffer::NewBackingStore(isolate, size); + memcpy(store->Data(), data, size); + auto ab = ArrayBuffer::New(isolate, std::move(store)); + js_val = Uint8Array::New(ab, 0, size); + break; + } + default: + UNREACHABLE("Bad SQLite value"); + } + + Local local; + if (!js_val.ToLocal(&local)) { + return; + } + + js_argv.emplace_back(local); + } + + MaybeLocal retval = + fn->Call(env->context(), recv, argc, js_argv.data()); + Local result; + if (!retval.ToLocal(&result)) { + return; + } + + if (result->IsUndefined() || result->IsNull()) { + sqlite3_result_null(ctx); + } else if (result->IsNumber()) { + sqlite3_result_double(ctx, result.As()->Value()); + } else if (result->IsString()) { + Utf8Value val(isolate, result.As()); + sqlite3_result_text(ctx, *val, val.length(), SQLITE_TRANSIENT); + } else if (result->IsUint8Array()) { + ArrayBufferViewContents buf(result); + sqlite3_result_blob(ctx, buf.data(), buf.length(), SQLITE_TRANSIENT); + } else if (result->IsBigInt()) { + bool lossless; + int64_t as_int = result.As()->Int64Value(&lossless); + if (!lossless) { + sqlite3_result_error(ctx, "BigInt value is too large for SQLite", -1); + return; + } + sqlite3_result_int64(ctx, as_int); + } else if (result->IsPromise()) { + sqlite3_result_error( + ctx, "Asynchronous user-defined functions are not supported", -1); + } else { + sqlite3_result_error( + ctx, + "Returned JavaScript value cannot be converted to a SQLite value", + -1); + } + } + + static void xDestroy(void* self) { + delete static_cast(self); + } + + private: + Environment* env_; + Global fn_; + bool use_bigint_args_; +}; + DatabaseSync::DatabaseSync(Environment* env, Local object, DatabaseOpenConfiguration&& open_config, - bool open) + bool open, + bool allow_load_extension) : BaseObject(env, object), open_config_(std::move(open_config)) { MakeWeak(); connection_ = nullptr; + allow_load_extension_ = allow_load_extension; + enable_load_extension_ = allow_load_extension; if (open) { Open(); @@ -181,6 +305,19 @@ bool DatabaseSync::Open() { CHECK_ERROR_OR_THROW(env()->isolate(), connection_, r, SQLITE_OK, false); CHECK_EQ(foreign_keys_enabled, open_config_.get_enable_foreign_keys()); + if (allow_load_extension_) { + if (env()->permission()->enabled()) [[unlikely]] { + THROW_ERR_LOAD_SQLITE_EXTENSION(env(), + "Cannot load SQLite extensions when the " + "permission model is enabled."); + return false; + } + const int load_extension_ret = sqlite3_db_config( + connection_, SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION, 1, nullptr); + CHECK_ERROR_OR_THROW( + env()->isolate(), connection_, load_extension_ret, SQLITE_OK, false); + } + return true; } @@ -226,6 +363,7 @@ void DatabaseSync::New(const FunctionCallbackInfo& args) { DatabaseOpenConfiguration open_config(std::move(location)); bool open = true; + bool allow_load_extension = false; if (args.Length() > 1) { if (!args[1]->IsObject()) { @@ -301,9 +439,28 @@ void DatabaseSync::New(const FunctionCallbackInfo& args) { } open_config.set_enable_dqs(enable_dqs_v.As()->Value()); } + + Local allow_extension_string = + FIXED_ONE_BYTE_STRING(env->isolate(), "allowExtension"); + Local allow_extension_v; + if (!options->Get(env->context(), allow_extension_string) + .ToLocal(&allow_extension_v)) { + return; + } + + if (!allow_extension_v->IsUndefined()) { + if (!allow_extension_v->IsBoolean()) { + THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"options.allowExtension\" argument must be a boolean."); + return; + } + allow_load_extension = allow_extension_v.As()->Value(); + } } - new DatabaseSync(env, args.This(), std::move(open_config), open); + new DatabaseSync( + env, args.This(), std::move(open_config), open, allow_load_extension); } void DatabaseSync::Open(const FunctionCallbackInfo& args) { @@ -362,6 +519,151 @@ void DatabaseSync::Exec(const FunctionCallbackInfo& args) { CHECK_ERROR_OR_THROW(env->isolate(), db->connection_, r, SQLITE_OK, void()); } +void DatabaseSync::CustomFunction(const FunctionCallbackInfo& args) { + DatabaseSync* db; + ASSIGN_OR_RETURN_UNWRAP(&db, args.This()); + Environment* env = Environment::GetCurrent(args); + THROW_AND_RETURN_ON_BAD_STATE(env, !db->IsOpen(), "database is not open"); + + if (!args[0]->IsString()) { + THROW_ERR_INVALID_ARG_TYPE(env->isolate(), + "The \"name\" argument must be a string."); + return; + } + + int fn_index = args.Length() < 3 ? 1 : 2; + bool use_bigint_args = false; + bool varargs = false; + bool deterministic = false; + bool direct_only = false; + + if (fn_index > 1) { + if (!args[1]->IsObject()) { + THROW_ERR_INVALID_ARG_TYPE(env->isolate(), + "The \"options\" argument must be an object."); + return; + } + + Local options = args[1].As(); + Local use_bigint_args_v; + if (!options + ->Get(env->context(), + FIXED_ONE_BYTE_STRING(env->isolate(), "useBigIntArguments")) + .ToLocal(&use_bigint_args_v)) { + return; + } + + if (!use_bigint_args_v->IsUndefined()) { + if (!use_bigint_args_v->IsBoolean()) { + THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"options.useBigIntArguments\" argument must be a boolean."); + return; + } + use_bigint_args = use_bigint_args_v.As()->Value(); + } + + Local varargs_v; + if (!options + ->Get(env->context(), + FIXED_ONE_BYTE_STRING(env->isolate(), "varargs")) + .ToLocal(&varargs_v)) { + return; + } + + if (!varargs_v->IsUndefined()) { + if (!varargs_v->IsBoolean()) { + THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"options.varargs\" argument must be a boolean."); + return; + } + varargs = varargs_v.As()->Value(); + } + + Local deterministic_v; + if (!options + ->Get(env->context(), + FIXED_ONE_BYTE_STRING(env->isolate(), "deterministic")) + .ToLocal(&deterministic_v)) { + return; + } + + if (!deterministic_v->IsUndefined()) { + if (!deterministic_v->IsBoolean()) { + THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"options.deterministic\" argument must be a boolean."); + return; + } + deterministic = deterministic_v.As()->Value(); + } + + Local direct_only_v; + if (!options + ->Get(env->context(), + FIXED_ONE_BYTE_STRING(env->isolate(), "directOnly")) + .ToLocal(&direct_only_v)) { + return; + } + + if (!direct_only_v->IsUndefined()) { + if (!direct_only_v->IsBoolean()) { + THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"options.directOnly\" argument must be a boolean."); + return; + } + direct_only = direct_only_v.As()->Value(); + } + } + + if (!args[fn_index]->IsFunction()) { + THROW_ERR_INVALID_ARG_TYPE(env->isolate(), + "The \"function\" argument must be a function."); + return; + } + + Utf8Value name(env->isolate(), args[0].As()); + Local fn = args[fn_index].As(); + + int argc = 0; + if (varargs) { + argc = -1; + } else { + Local js_len; + if (!fn->Get(env->context(), + FIXED_ONE_BYTE_STRING(env->isolate(), "length")) + .ToLocal(&js_len)) { + return; + } + argc = js_len.As()->Value(); + } + + UserDefinedFunction* user_data = + new UserDefinedFunction(env, fn, use_bigint_args); + int text_rep = SQLITE_UTF8; + + if (deterministic) { + text_rep |= SQLITE_DETERMINISTIC; + } + + if (direct_only) { + text_rep |= SQLITE_DIRECTONLY; + } + + int r = sqlite3_create_function_v2(db->connection_, + *name, + argc, + text_rep, + user_data, + UserDefinedFunction::xFunc, + nullptr, + nullptr, + UserDefinedFunction::xDestroy); + CHECK_ERROR_OR_THROW(env->isolate(), db->connection_, r, SQLITE_OK, void()); +} + void DatabaseSync::CreateSession(const FunctionCallbackInfo& args) { std::string table; std::string db_name = "main"; @@ -525,6 +827,70 @@ void DatabaseSync::ApplyChangeset(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(true); } +void DatabaseSync::EnableLoadExtension( + const FunctionCallbackInfo& args) { + DatabaseSync* db; + ASSIGN_OR_RETURN_UNWRAP(&db, args.This()); + Environment* env = Environment::GetCurrent(args); + if (!args[0]->IsBoolean()) { + THROW_ERR_INVALID_ARG_TYPE(env->isolate(), + "The \"allow\" argument must be a boolean."); + return; + } + + const int enable = args[0].As()->Value(); + auto isolate = env->isolate(); + + if (db->allow_load_extension_ == false && enable == true) { + THROW_ERR_INVALID_STATE( + isolate, + "Cannot enable extension loading because it was disabled at database " + "creation."); + return; + } + db->enable_load_extension_ = enable; + const int load_extension_ret = sqlite3_db_config( + db->connection_, SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION, enable, nullptr); + CHECK_ERROR_OR_THROW( + isolate, db->connection_, load_extension_ret, SQLITE_OK, void()); +} + +void DatabaseSync::LoadExtension(const FunctionCallbackInfo& args) { + DatabaseSync* db; + ASSIGN_OR_RETURN_UNWRAP(&db, args.This()); + Environment* env = Environment::GetCurrent(args); + THROW_AND_RETURN_ON_BAD_STATE( + env, db->connection_ == nullptr, "database is not open"); + THROW_AND_RETURN_ON_BAD_STATE( + env, !db->allow_load_extension_, "extension loading is not allowed"); + THROW_AND_RETURN_ON_BAD_STATE( + env, !db->enable_load_extension_, "extension loading is not allowed"); + + if (!args[0]->IsString()) { + THROW_ERR_INVALID_ARG_TYPE(env->isolate(), + "The \"path\" argument must be a string."); + return; + } + + auto isolate = env->isolate(); + + BufferValue path(isolate, args[0]); + BufferValue entryPoint(isolate, args[1]); + CHECK_NOT_NULL(*path); + ToNamespacedPath(env, &path); + if (*entryPoint == nullptr) { + ToNamespacedPath(env, &entryPoint); + } + THROW_IF_INSUFFICIENT_PERMISSIONS( + env, permission::PermissionScope::kFileSystemRead, path.ToStringView()); + char* errmsg = nullptr; + const int r = + sqlite3_load_extension(db->connection_, *path, *entryPoint, &errmsg); + if (r != SQLITE_OK) { + isolate->ThrowException(ERR_LOAD_SQLITE_EXTENSION(isolate, errmsg)); + } +} + StatementSync::StatementSync(Environment* env, Local object, DatabaseSync* db, @@ -790,6 +1156,180 @@ void StatementSync::All(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(Array::New(isolate, rows.data(), rows.size())); } +void StatementSync::IterateReturnCallback( + const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + auto isolate = env->isolate(); + auto context = isolate->GetCurrentContext(); + + auto self = args.This(); + // iterator has fetch all result or break, prevent next func to return result + self->Set(context, env->isfinished_string(), Boolean::New(isolate, true)) + .ToChecked(); + + auto external_stmt = Local::Cast( + self->Get(context, env->statement_string()).ToLocalChecked()); + auto stmt = static_cast(external_stmt->Value()); + if (!stmt->IsFinalized()) { + sqlite3_reset(stmt->statement_); + } + + LocalVector keys(isolate, {env->done_string(), env->value_string()}); + LocalVector values(isolate, + {Boolean::New(isolate, true), Null(isolate)}); + + DCHECK_EQ(keys.size(), values.size()); + Local result = Object::New( + isolate, Null(isolate), keys.data(), values.data(), keys.size()); + args.GetReturnValue().Set(result); +} + +void StatementSync::IterateNextCallback( + const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + auto isolate = env->isolate(); + auto context = isolate->GetCurrentContext(); + + auto self = args.This(); + + // skip iteration if is_finished + auto is_finished = Local::Cast( + self->Get(context, env->isfinished_string()).ToLocalChecked()); + if (is_finished->Value()) { + LocalVector keys(isolate, {env->done_string(), env->value_string()}); + LocalVector values(isolate, + {Boolean::New(isolate, true), Null(isolate)}); + + DCHECK_EQ(keys.size(), values.size()); + Local result = Object::New( + isolate, Null(isolate), keys.data(), values.data(), keys.size()); + args.GetReturnValue().Set(result); + return; + } + + auto external_stmt = Local::Cast( + self->Get(context, env->statement_string()).ToLocalChecked()); + auto stmt = static_cast(external_stmt->Value()); + auto num_cols = + Local::Cast( + self->Get(context, env->num_cols_string()).ToLocalChecked()) + ->Value(); + + THROW_AND_RETURN_ON_BAD_STATE( + env, stmt->IsFinalized(), "statement has been finalized"); + + int r = sqlite3_step(stmt->statement_); + if (r != SQLITE_ROW) { + CHECK_ERROR_OR_THROW( + env->isolate(), stmt->db_->Connection(), r, SQLITE_DONE, void()); + + // cleanup when no more rows to fetch + sqlite3_reset(stmt->statement_); + self->Set(context, env->isfinished_string(), Boolean::New(isolate, true)) + .ToChecked(); + + LocalVector keys(isolate, {env->done_string(), env->value_string()}); + LocalVector values(isolate, + {Boolean::New(isolate, true), Null(isolate)}); + + DCHECK_EQ(keys.size(), values.size()); + Local result = Object::New( + isolate, Null(isolate), keys.data(), values.data(), keys.size()); + args.GetReturnValue().Set(result); + return; + } + + LocalVector row_keys(isolate); + row_keys.reserve(num_cols); + LocalVector row_values(isolate); + row_values.reserve(num_cols); + for (int i = 0; i < num_cols; ++i) { + Local key; + if (!stmt->ColumnNameToName(i).ToLocal(&key)) return; + Local val; + if (!stmt->ColumnToValue(i).ToLocal(&val)) return; + row_keys.emplace_back(key); + row_values.emplace_back(val); + } + + Local row = Object::New( + isolate, Null(isolate), row_keys.data(), row_values.data(), num_cols); + + LocalVector keys(isolate, {env->done_string(), env->value_string()}); + LocalVector values(isolate, {Boolean::New(isolate, false), row}); + + DCHECK_EQ(keys.size(), values.size()); + Local result = Object::New( + isolate, Null(isolate), keys.data(), values.data(), keys.size()); + args.GetReturnValue().Set(result); +} + +void StatementSync::Iterate(const FunctionCallbackInfo& args) { + StatementSync* stmt; + ASSIGN_OR_RETURN_UNWRAP(&stmt, args.This()); + Environment* env = Environment::GetCurrent(args); + THROW_AND_RETURN_ON_BAD_STATE( + env, stmt->IsFinalized(), "statement has been finalized"); + auto isolate = env->isolate(); + auto context = env->context(); + int r = sqlite3_reset(stmt->statement_); + CHECK_ERROR_OR_THROW( + env->isolate(), stmt->db_->Connection(), r, SQLITE_OK, void()); + + if (!stmt->BindParams(args)) { + return; + } + + Local next_func = + Function::New(context, StatementSync::IterateNextCallback) + .ToLocalChecked(); + Local return_func = + Function::New(context, StatementSync::IterateReturnCallback) + .ToLocalChecked(); + + LocalVector keys(isolate, {env->next_string(), env->return_string()}); + LocalVector values(isolate, {next_func, return_func}); + + Local global = context->Global(); + Local js_iterator; + Local js_iterator_prototype; + if (!global->Get(context, env->iterator_string()).ToLocal(&js_iterator)) + return; + if (!js_iterator.As() + ->Get(context, env->prototype_string()) + .ToLocal(&js_iterator_prototype)) + return; + + DCHECK_EQ(keys.size(), values.size()); + Local iterable_iterator = Object::New( + isolate, js_iterator_prototype, keys.data(), values.data(), keys.size()); + + auto num_cols_pd = v8::PropertyDescriptor( + v8::Integer::New(isolate, sqlite3_column_count(stmt->statement_)), false); + num_cols_pd.set_enumerable(false); + num_cols_pd.set_configurable(false); + iterable_iterator + ->DefineProperty(context, env->num_cols_string(), num_cols_pd) + .ToChecked(); + + auto stmt_pd = + v8::PropertyDescriptor(v8::External::New(isolate, stmt), false); + stmt_pd.set_enumerable(false); + stmt_pd.set_configurable(false); + iterable_iterator->DefineProperty(context, env->statement_string(), stmt_pd) + .ToChecked(); + + auto is_finished_pd = + v8::PropertyDescriptor(v8::Boolean::New(isolate, false), true); + stmt_pd.set_enumerable(false); + stmt_pd.set_configurable(false); + iterable_iterator + ->DefineProperty(context, env->isfinished_string(), is_finished_pd) + .ToChecked(); + + args.GetReturnValue().Set(iterable_iterator); +} + void StatementSync::Get(const FunctionCallbackInfo& args) { StatementSync* stmt; ASSIGN_OR_RETURN_UNWRAP(&stmt, args.This()); @@ -987,6 +1527,7 @@ Local StatementSync::GetConstructorTemplate( tmpl->SetClassName(FIXED_ONE_BYTE_STRING(isolate, "StatementSync")); tmpl->InstanceTemplate()->SetInternalFieldCount( StatementSync::kInternalFieldCount); + SetProtoMethod(isolate, tmpl, "iterate", StatementSync::Iterate); SetProtoMethod(isolate, tmpl, "all", StatementSync::All); SetProtoMethod(isolate, tmpl, "get", StatementSync::Get); SetProtoMethod(isolate, tmpl, "run", StatementSync::Run); @@ -1117,6 +1658,12 @@ void Session::Delete() { session_ = nullptr; } +void DefineConstants(Local target) { + NODE_DEFINE_CONSTANT(target, SQLITE_CHANGESET_OMIT); + NODE_DEFINE_CONSTANT(target, SQLITE_CHANGESET_REPLACE); + NODE_DEFINE_CONSTANT(target, SQLITE_CHANGESET_ABORT); +} + static void Initialize(Local target, Local unused, Local context, @@ -1127,24 +1674,32 @@ static void Initialize(Local target, NewFunctionTemplate(isolate, DatabaseSync::New); db_tmpl->InstanceTemplate()->SetInternalFieldCount( DatabaseSync::kInternalFieldCount); + Local constants = Object::New(isolate); + + DefineConstants(constants); SetProtoMethod(isolate, db_tmpl, "open", DatabaseSync::Open); SetProtoMethod(isolate, db_tmpl, "close", DatabaseSync::Close); SetProtoMethod(isolate, db_tmpl, "prepare", DatabaseSync::Prepare); SetProtoMethod(isolate, db_tmpl, "exec", DatabaseSync::Exec); + SetProtoMethod(isolate, db_tmpl, "function", DatabaseSync::CustomFunction); SetProtoMethod( isolate, db_tmpl, "createSession", DatabaseSync::CreateSession); SetProtoMethod( isolate, db_tmpl, "applyChangeset", DatabaseSync::ApplyChangeset); + SetProtoMethod(isolate, + db_tmpl, + "enableLoadExtension", + DatabaseSync::EnableLoadExtension); + SetProtoMethod( + isolate, db_tmpl, "loadExtension", DatabaseSync::LoadExtension); SetConstructorFunction(context, target, "DatabaseSync", db_tmpl); SetConstructorFunction(context, target, "StatementSync", StatementSync::GetConstructorTemplate(env)); - NODE_DEFINE_CONSTANT(target, SQLITE_CHANGESET_OMIT); - NODE_DEFINE_CONSTANT(target, SQLITE_CHANGESET_REPLACE); - NODE_DEFINE_CONSTANT(target, SQLITE_CHANGESET_ABORT); + target->Set(context, OneByteString(isolate, "constants"), constants).Check(); } } // namespace sqlite diff --git a/src/node_sqlite.h b/src/node_sqlite.h index d778d28e9ac289..e78aa39abb3ba5 100644 --- a/src/node_sqlite.h +++ b/src/node_sqlite.h @@ -49,15 +49,20 @@ class DatabaseSync : public BaseObject { DatabaseSync(Environment* env, v8::Local object, DatabaseOpenConfiguration&& open_config, - bool open); + bool open, + bool allow_load_extension); void MemoryInfo(MemoryTracker* tracker) const override; static void New(const v8::FunctionCallbackInfo& args); static void Open(const v8::FunctionCallbackInfo& args); static void Close(const v8::FunctionCallbackInfo& args); static void Prepare(const v8::FunctionCallbackInfo& args); static void Exec(const v8::FunctionCallbackInfo& args); + static void CustomFunction(const v8::FunctionCallbackInfo& args); static void CreateSession(const v8::FunctionCallbackInfo& args); static void ApplyChangeset(const v8::FunctionCallbackInfo& args); + static void EnableLoadExtension( + const v8::FunctionCallbackInfo& args); + static void LoadExtension(const v8::FunctionCallbackInfo& args); void FinalizeStatements(); void UntrackStatement(StatementSync* statement); bool IsOpen(); @@ -72,6 +77,8 @@ class DatabaseSync : public BaseObject { ~DatabaseSync() override; DatabaseOpenConfiguration open_config_; + bool allow_load_extension_; + bool enable_load_extension_; sqlite3* connection_; std::set sessions_; @@ -93,6 +100,7 @@ class StatementSync : public BaseObject { DatabaseSync* db, sqlite3_stmt* stmt); static void All(const v8::FunctionCallbackInfo& args); + static void Iterate(const v8::FunctionCallbackInfo& args); static void Get(const v8::FunctionCallbackInfo& args); static void Run(const v8::FunctionCallbackInfo& args); static void SourceSQLGetter(const v8::FunctionCallbackInfo& args); @@ -118,6 +126,11 @@ class StatementSync : public BaseObject { bool BindValue(const v8::Local& value, const int index); v8::MaybeLocal ColumnToValue(const int column); v8::MaybeLocal ColumnNameToName(const int column); + + static void IterateNextCallback( + const v8::FunctionCallbackInfo& args); + static void IterateReturnCallback( + const v8::FunctionCallbackInfo& args); }; using Sqlite3ChangesetGenFunc = int (*)(sqlite3_session*, int*, void**); diff --git a/src/node_version.h b/src/node_version.h index 5e33f94d33c44f..51d1bdd99eed74 100644 --- a/src/node_version.h +++ b/src/node_version.h @@ -23,13 +23,13 @@ #define SRC_NODE_VERSION_H_ #define NODE_MAJOR_VERSION 22 -#define NODE_MINOR_VERSION 12 -#define NODE_PATCH_VERSION 1 +#define NODE_MINOR_VERSION 13 +#define NODE_PATCH_VERSION 0 #define NODE_VERSION_IS_LTS 1 #define NODE_VERSION_LTS_CODENAME "Jod" -#define NODE_VERSION_IS_RELEASE 0 +#define NODE_VERSION_IS_RELEASE 1 #ifndef NODE_STRINGIFY #define NODE_STRINGIFY(n) NODE_STRINGIFY_HELPER(n) diff --git a/src/node_worker.cc b/src/node_worker.cc index e8026fe24c7021..f64609cf045441 100644 --- a/src/node_worker.cc +++ b/src/node_worker.cc @@ -449,6 +449,9 @@ void Worker::JoinThread() { env()->remove_sub_worker_context(this); + // Join may happen after the worker exits and disposes the isolate + if (!env()->can_call_into_js()) return; + { HandleScope handle_scope(env()->isolate()); Context::Scope context_scope(env()->context()); diff --git a/src/path.cc b/src/path.cc index 4068e1d892d6b7..5fb745832e492d 100644 --- a/src/path.cc +++ b/src/path.cc @@ -114,7 +114,7 @@ std::string PathResolve(Environment* env, // a UNC path at this points, because UNC paths are always absolute. std::string resolvedDevicePath; const std::string envvar = "=" + resolvedDevice; - credentials::SafeGetenv(envvar.c_str(), &resolvedDevicePath); + credentials::SafeGetenv(envvar.c_str(), &resolvedDevicePath, env); path = resolvedDevicePath.empty() ? cwd : resolvedDevicePath; // Verify that a cwd was found and that it actually points diff --git a/src/process_wrap.cc b/src/process_wrap.cc index 14c9e99934e27b..27a294eb384960 100644 --- a/src/process_wrap.cc +++ b/src/process_wrap.cc @@ -314,6 +314,12 @@ class ProcessWrap : public HandleWrap { ProcessWrap* wrap; ASSIGN_OR_RETURN_UNWRAP(&wrap, args.This()); int signal = args[0]->Int32Value(env->context()).FromJust(); +#ifdef _WIN32 + if (signal != SIGKILL && signal != SIGTERM && signal != SIGINT && + signal != SIGQUIT) { + signal = SIGKILL; + } +#endif int err = uv_process_kill(&wrap->process_, signal); args.GetReturnValue().Set(err); } diff --git a/src/quic/session.cc b/src/quic/session.cc index b98ce4a132af60..4323c9268fdac2 100644 --- a/src/quic/session.cc +++ b/src/quic/session.cc @@ -1531,7 +1531,7 @@ void Session::EmitDatagram(Store&& datagram, DatagramReceivedFlags flag) { DCHECK(!is_destroyed()); if (!env()->can_call_into_js()) return; - CallbackScope cbv_scope(this); + CallbackScope cbv_scope(this); Local argv[] = {datagram.ToUint8Array(env()), v8::Boolean::New(env()->isolate(), flag.early)}; diff --git a/src/util.cc b/src/util.cc index 34263cc9f3dc06..3e9dfb4392fb3e 100644 --- a/src/util.cc +++ b/src/util.cc @@ -760,6 +760,16 @@ std::string DetermineSpecificErrorType(Environment* env, input.As()->GetConstructorName(); Utf8Value name(env->isolate(), constructor_name); return SPrintF("an instance of %s", name.out()); + } else if (input->IsSymbol()) { + v8::MaybeLocal str = + input.As()->ToDetailString(env->context()); + v8::Local js_str; + if (!str.ToLocal(&js_str)) { + return "Symbol"; + } + Utf8Value name(env->isolate(), js_str); + // Symbol(xxx) + return name.out(); } Utf8Value utf8_value(env->isolate(), diff --git a/src/zlib_version.h b/src/zlib_version.h index f9db56d4f51d37..3b53884aae2206 100644 --- a/src/zlib_version.h +++ b/src/zlib_version.h @@ -2,5 +2,5 @@ // Refer to tools/dep_updaters/update-zlib.sh #ifndef SRC_ZLIB_VERSION_H_ #define SRC_ZLIB_VERSION_H_ -#define ZLIB_VERSION "1.3.0.1-motley-71660e1" +#define ZLIB_VERSION "1.3.0.1-motley-82a5fec" #endif // SRC_ZLIB_VERSION_H_ diff --git a/test/addons/no-addons/permission.js b/test/addons/no-addons/permission.js index 0fbcd2bb1ee782..1d1bbf6e95468e 100644 --- a/test/addons/no-addons/permission.js +++ b/test/addons/no-addons/permission.js @@ -1,4 +1,4 @@ -// Flags: --experimental-permission --allow-fs-read=* +// Flags: --permission --allow-fs-read=* 'use strict'; diff --git a/test/cctest/test_encoding_binding.cc b/test/cctest/test_encoding_binding.cc new file mode 100644 index 00000000000000..d5d14c60fedf7e --- /dev/null +++ b/test/cctest/test_encoding_binding.cc @@ -0,0 +1,176 @@ +#include "encoding_binding.h" +#include "env-inl.h" +#include "gtest/gtest.h" +#include "node_test_fixture.h" +#include "v8.h" + +namespace node { +namespace encoding_binding { + +bool RunDecodeLatin1(Environment* env, + Local args[], + bool ignore_bom, + bool has_fatal, + Local* result) { + Isolate* isolate = env->isolate(); + TryCatch try_catch(isolate); + + Local ignoreBOMValue = Boolean::New(isolate, ignore_bom); + Local fatalValue = Boolean::New(isolate, has_fatal); + + Local updatedArgs[] = {args[0], ignoreBOMValue, fatalValue}; + + BindingData::DecodeLatin1(FunctionCallbackInfo(updatedArgs)); + + if (try_catch.HasCaught()) { + return false; + } + + *result = args[0]; + return true; +} + +class EncodingBindingTest : public NodeTestFixture {}; + +TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + Local ab = ArrayBuffer::New(isolate, 0); + Local array = Uint8Array::New(ab, 0, 0); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, ""); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + Local args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")}; + + Local result; + EXPECT_FALSE(RunDecodeLatin1(env, args, false, false, &result)); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOM) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_FatalInvalidInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t invalid_data[] = {0xFF, 0xFF, 0xFF}; + Local ab = ArrayBuffer::New(isolate, sizeof(invalid_data)); + memcpy(ab->GetBackingStore()->Data(), invalid_data, sizeof(invalid_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(invalid_data)); + Local args[] = {array}; + + Local result; + EXPECT_FALSE(RunDecodeLatin1(env, args, false, true, &result)); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOMAndFatal) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, true, true, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_BOMPresent) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xFF, 0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_ReturnsString) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + ASSERT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); + + ASSERT_TRUE(result->IsString()); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +} // namespace encoding_binding +} // namespace node diff --git a/test/cctest/test_sockaddr.cc b/test/cctest/test_sockaddr.cc index bd80d59f821e5d..68b8739f97e1fc 100644 --- a/test/cctest/test_sockaddr.cc +++ b/test/cctest/test_sockaddr.cc @@ -145,9 +145,9 @@ TEST(SocketAddress, Comparison) { SocketAddress addr5(reinterpret_cast(&storage[4])); SocketAddress addr6(reinterpret_cast(&storage[5])); - CHECK_EQ(addr1.compare(addr1), SocketAddress::CompareResult::SAME); - CHECK_EQ(addr1.compare(addr2), SocketAddress::CompareResult::LESS_THAN); - CHECK_EQ(addr2.compare(addr1), SocketAddress::CompareResult::GREATER_THAN); + CHECK_EQ(addr1.compare(addr1), std::partial_ordering::equivalent); + CHECK_EQ(addr1.compare(addr2), std::partial_ordering::less); + CHECK_EQ(addr2.compare(addr1), std::partial_ordering::greater); CHECK(addr1 <= addr1); CHECK(addr1 < addr2); CHECK(addr1 <= addr2); @@ -155,9 +155,9 @@ TEST(SocketAddress, Comparison) { CHECK(addr2 > addr1); CHECK(addr2 >= addr1); - CHECK_EQ(addr3.compare(addr3), SocketAddress::CompareResult::SAME); - CHECK_EQ(addr3.compare(addr4), SocketAddress::CompareResult::LESS_THAN); - CHECK_EQ(addr4.compare(addr3), SocketAddress::CompareResult::GREATER_THAN); + CHECK_EQ(addr3.compare(addr3), std::partial_ordering::equivalent); + CHECK_EQ(addr3.compare(addr4), std::partial_ordering::less); + CHECK_EQ(addr4.compare(addr3), std::partial_ordering::greater); CHECK(addr3 <= addr3); CHECK(addr3 < addr4); CHECK(addr3 <= addr4); @@ -166,8 +166,8 @@ TEST(SocketAddress, Comparison) { CHECK(addr4 >= addr3); // Not comparable - CHECK_EQ(addr1.compare(addr3), SocketAddress::CompareResult::NOT_COMPARABLE); - CHECK_EQ(addr3.compare(addr1), SocketAddress::CompareResult::NOT_COMPARABLE); + CHECK_EQ(addr1.compare(addr3), std::partial_ordering::unordered); + CHECK_EQ(addr3.compare(addr1), std::partial_ordering::unordered); CHECK(!(addr1 < addr3)); CHECK(!(addr1 > addr3)); CHECK(!(addr1 >= addr3)); @@ -178,10 +178,10 @@ TEST(SocketAddress, Comparison) { CHECK(!(addr3 <= addr1)); // Comparable - CHECK_EQ(addr1.compare(addr5), SocketAddress::CompareResult::SAME); - CHECK_EQ(addr2.compare(addr6), SocketAddress::CompareResult::SAME); - CHECK_EQ(addr1.compare(addr6), SocketAddress::CompareResult::LESS_THAN); - CHECK_EQ(addr6.compare(addr1), SocketAddress::CompareResult::GREATER_THAN); + CHECK_EQ(addr1.compare(addr5), std::partial_ordering::equivalent); + CHECK_EQ(addr2.compare(addr6), std::partial_ordering::equivalent); + CHECK_EQ(addr1.compare(addr6), std::partial_ordering::less); + CHECK_EQ(addr6.compare(addr1), std::partial_ordering::greater); CHECK(addr1 <= addr5); CHECK(addr1 <= addr6); CHECK(addr1 < addr6); diff --git a/test/common/index.js b/test/common/index.js index 50595945b193f4..d1eaf6e69f603b 100644 --- a/test/common/index.js +++ b/test/common/index.js @@ -1051,14 +1051,6 @@ const common = { return hasOpenSSL(3); }, - get hasOpenSSL31() { - return hasOpenSSL(3, 1); - }, - - get hasOpenSSL32() { - return hasOpenSSL(3, 2); - }, - get inFreeBSDJail() { if (inFreeBSDJail !== null) return inFreeBSDJail; diff --git a/test/common/report.js b/test/common/report.js index f7093148ef844c..ade0f9aa94a023 100644 --- a/test/common/report.js +++ b/test/common/report.js @@ -59,7 +59,12 @@ function _validateContent(report, fields = []) { // Verify that all sections are present as own properties of the report. const sections = ['header', 'nativeStack', 'javascriptStack', 'libuv', - 'environmentVariables', 'sharedObjects', 'resourceUsage', 'workers']; + 'sharedObjects', 'resourceUsage', 'workers']; + + if (!process.report.excludeEnv) { + sections.push('environmentVariables'); + } + if (!isWindows) sections.push('userLimits'); @@ -105,7 +110,7 @@ function _validateContent(report, fields = []) { 'glibcVersionRuntime', 'glibcVersionCompiler', 'cwd', 'reportVersion', 'networkInterfaces', 'threadId']; checkForUnknownFields(header, headerFields); - assert.strictEqual(header.reportVersion, 4); // Increment as needed. + assert.strictEqual(header.reportVersion, 5); // Increment as needed. assert.strictEqual(typeof header.event, 'string'); assert.strictEqual(typeof header.trigger, 'string'); assert(typeof header.filename === 'string' || header.filename === null); @@ -294,19 +299,21 @@ function _validateContent(report, fields = []) { resource.type === 'loop' ? 'undefined' : 'boolean'); }); - // Verify the format of the environmentVariables section. - for (const [key, value] of Object.entries(report.environmentVariables)) { - assert.strictEqual(typeof key, 'string'); - assert.strictEqual(typeof value, 'string'); + if (!process.report.excludeEnv) { + // Verify the format of the environmentVariables section. + for (const [key, value] of Object.entries(report.environmentVariables)) { + assert.strictEqual(typeof key, 'string'); + assert.strictEqual(typeof value, 'string'); + } } // Verify the format of the userLimits section on non-Windows platforms. if (!isWindows) { - const userLimitsFields = ['core_file_size_blocks', 'data_seg_size_kbytes', + const userLimitsFields = ['core_file_size_blocks', 'data_seg_size_bytes', 'file_size_blocks', 'max_locked_memory_bytes', - 'max_memory_size_kbytes', 'open_files', + 'max_memory_size_bytes', 'open_files', 'stack_size_bytes', 'cpu_time_seconds', - 'max_user_processes', 'virtual_memory_kbytes']; + 'max_user_processes', 'virtual_memory_bytes']; checkForUnknownFields(report.userLimits, userLimitsFields); for (const [type, limits] of Object.entries(report.userLimits)) { assert.strictEqual(typeof type, 'string'); diff --git a/test/es-module/test-cjs-legacyMainResolve-permission.js b/test/es-module/test-cjs-legacyMainResolve-permission.js index 392bfb753d7764..fcebc22ccf2929 100644 --- a/test/es-module/test-cjs-legacyMainResolve-permission.js +++ b/test/es-module/test-cjs-legacyMainResolve-permission.js @@ -1,6 +1,6 @@ 'use strict'; -// Flags: --expose-internals --experimental-permission --allow-fs-read=* --allow-child-process +// Flags: --expose-internals --permission --allow-fs-read=* --allow-child-process require('../common'); @@ -40,7 +40,7 @@ describe('legacyMainResolve', () => { process.execPath, [ '--expose-internals', - '--experimental-permission', + '--permission', ...allowReadFiles, '-e', ` @@ -98,7 +98,7 @@ describe('legacyMainResolve', () => { process.execPath, [ '--expose-internals', - '--experimental-permission', + '--permission', ...allowReadFiles, '-e', ` diff --git a/test/es-module/test-esm-loader-hooks.mjs b/test/es-module/test-esm-loader-hooks.mjs index 4a4d15648a79b5..ed5c27cbc4b84f 100644 --- a/test/es-module/test-esm-loader-hooks.mjs +++ b/test/es-module/test-esm-loader-hooks.mjs @@ -182,7 +182,7 @@ describe('Loader hooks', { concurrency: !process.env.TEST_PARALLEL }, () => { it('should work without worker permission', async () => { const { code, signal, stdout, stderr } = await spawnPromisified(execPath, [ '--no-warnings', - '--experimental-permission', + '--permission', '--allow-fs-read', '*', '--experimental-loader', @@ -199,7 +199,7 @@ describe('Loader hooks', { concurrency: !process.env.TEST_PARALLEL }, () => { it('should allow loader hooks to spawn workers when allowed by the CLI flags', async () => { const { code, signal, stdout, stderr } = await spawnPromisified(execPath, [ '--no-warnings', - '--experimental-permission', + '--permission', '--allow-worker', '--allow-fs-read', '*', @@ -217,7 +217,7 @@ describe('Loader hooks', { concurrency: !process.env.TEST_PARALLEL }, () => { it('should not allow loader hooks to spawn workers if restricted by the CLI flags', async () => { const { code, signal, stdout, stderr } = await spawnPromisified(execPath, [ '--no-warnings', - '--experimental-permission', + '--permission', '--allow-fs-read', '*', '--experimental-loader', diff --git a/test/es-module/test-esm-loader-spawn-promisified.mjs b/test/es-module/test-esm-loader-spawn-promisified.mjs index 628ff3f0d423e5..2f27f7850f646e 100644 --- a/test/es-module/test-esm-loader-spawn-promisified.mjs +++ b/test/es-module/test-esm-loader-spawn-promisified.mjs @@ -285,4 +285,20 @@ describe('Loader hooks parsing modules', { concurrency: !process.env.TEST_PARALL assert.strictEqual(code, 0); assert.strictEqual(signal, null); }); + + it('throw maximum call stack error on the loader', async () => { + const { code, signal, stdout, stderr } = await spawnPromisified(execPath, [ + '--no-warnings', + '--experimental-loader', + fixtures.fileURL('/es-module-loaders/hooks-custom.mjs'), + '--input-type=module', + '--eval', + 'await import("esmHook/maximumCallStack.mjs")', + ]); + + assert(stderr.includes('Maximum call stack size exceeded')); + assert.strictEqual(stdout, ''); + assert.strictEqual(code, 1); + assert.strictEqual(signal, null); + }); }); diff --git a/test/es-module/test-require-module-error-catching.js b/test/es-module/test-require-module-error-catching.js new file mode 100644 index 00000000000000..c314513d9bbf04 --- /dev/null +++ b/test/es-module/test-require-module-error-catching.js @@ -0,0 +1,21 @@ +// This tests synchronous errors in ESM from require(esm) can be caught. + +'use strict'; + +require('../common'); +const assert = require('assert'); + +// Runtime errors from throw should be caught. +assert.throws(() => { + require('../fixtures/es-modules/runtime-error-esm.js'); +}, { + message: 'hello' +}); + +// References errors should be caught too. +assert.throws(() => { + require('../fixtures/es-modules/reference-error-esm.js'); +}, { + name: 'ReferenceError', + message: 'exports is not defined' +}); diff --git a/test/es-module/test-require-module-preload.js b/test/es-module/test-require-module-preload.js index 7a8a09486b43d7..0b8b79118ee8e6 100644 --- a/test/es-module/test-require-module-preload.js +++ b/test/es-module/test-require-module-preload.js @@ -5,8 +5,6 @@ const { spawnSyncAndAssert } = require('../common/child_process'); const { fixturesDir } = require('../common/fixtures'); function testPreload(preloadFlag) { - // The warning is only emitted when ESM is loaded by --require. - const isRequire = preloadFlag === '--require'; // Test named exports. { spawnSyncAndAssert( @@ -22,8 +20,6 @@ function testPreload(preloadFlag) { }, { stdout: 'A', - stderr: isRequire ? - /ExperimentalWarning: --require is loading ES Module .*module-named-exports\.mjs using require/ : undefined, trim: true, } ); @@ -43,8 +39,6 @@ function testPreload(preloadFlag) { cwd: fixturesDir }, { - stderr: isRequire ? - /ExperimentalWarning: --require is loading ES Module .*import-esm\.mjs using require/ : undefined, stdout: /^world\s+A$/, trim: true, } @@ -66,8 +60,6 @@ function testPreload(preloadFlag) { }, { stdout: /^ok\s+A$/, - stderr: isRequire ? - /ExperimentalWarning: --require is loading ES Module .*cjs-exports\.mjs using require/ : undefined, trim: true, } ); @@ -90,8 +82,6 @@ function testPreload(preloadFlag) { }, { stdout: /^world\s+A$/, - stderr: isRequire ? - /ExperimentalWarning: --require is loading ES Module .*require-cjs\.mjs using require/ : undefined, trim: true, } ); @@ -117,7 +107,6 @@ testPreload('--import'); }, { stdout: /^package-type-module\s+A$/, - stderr: /ExperimentalWarning: --require is loading ES Module .*package-type-module[\\/]index\.js using require/, trim: true, } ); diff --git a/test/es-module/test-require-module-synchronous-rejection-handling.js b/test/es-module/test-require-module-synchronous-rejection-handling.js new file mode 100644 index 00000000000000..76a25e742bb900 --- /dev/null +++ b/test/es-module/test-require-module-synchronous-rejection-handling.js @@ -0,0 +1,13 @@ +// This synchronous rejections from require(esm) still go to the unhandled rejection +// handler. + +'use strict'; + +const common = require('../common'); +const assert = require('assert'); + +process.on('unhandledRejection', common.mustCall((reason, promise) => { + assert.strictEqual(reason, 'reject!'); +})); + +require('../fixtures/es-modules/synchronous-rejection-esm.js'); diff --git a/test/es-module/test-require-module-warning.js b/test/es-module/test-require-module-warning.js index d5be2fc6da8755..4c651efd5c0cc7 100644 --- a/test/es-module/test-require-module-warning.js +++ b/test/es-module/test-require-module-warning.js @@ -1,8 +1,6 @@ 'use strict'; -// This checks the warning and the stack trace emitted by the require(esm) -// experimental warning. It can get removed when `require(esm)` becomes stable. - +// This checks the warning and the stack trace emitted by --trace-require-module=all. require('../common'); const { spawnSyncAndAssert } = require('../common/child_process'); const fixtures = require('../common/fixtures'); @@ -10,6 +8,7 @@ const assert = require('assert'); spawnSyncAndAssert(process.execPath, [ '--trace-warnings', + '--trace-require-module=all', fixtures.path('es-modules', 'require-module.js'), ], { trim: true, @@ -33,3 +32,12 @@ spawnSyncAndAssert(process.execPath, [ ); } }); + +spawnSyncAndAssert(process.execPath, [ + '--trace-require-module=1', + fixtures.path('es-modules', 'require-module.js'), +], { + status: 9, + trim: true, + stderr: /invalid value for --trace-require-module/ +}); diff --git a/test/es-module/test-require-module.js b/test/es-module/test-require-module.js index 4987fbf7b07d43..7e05fbd4a5bf88 100644 --- a/test/es-module/test-require-module.js +++ b/test/es-module/test-require-module.js @@ -3,16 +3,6 @@ const common = require('../common'); const assert = require('assert'); -const path = require('path'); - -// Only the first load will trigger the warning. -common.expectWarning( - 'ExperimentalWarning', - `CommonJS module ${__filename} is loading ES Module ` + - `${path.resolve(__dirname, '../fixtures/es-module-loaders/module-named-exports.mjs')} using require().\n` + - 'Support for loading ES Module in require() is an experimental feature ' + - 'and might change at any time' -); // Test named exports. { diff --git a/test/es-module/test-require-node-modules-warning.js b/test/es-module/test-require-node-modules-warning.js index 837f174fd28950..bb3e38b7d92071 100644 --- a/test/es-module/test-require-node-modules-warning.js +++ b/test/es-module/test-require-node-modules-warning.js @@ -1,7 +1,7 @@ 'use strict'; -// This checks the experimental warning for require(esm) is disabled when the -// require() comes from node_modules. +// This checks the warning and the stack trace emitted by +// --trace-require-module=no-node-modules. require('../common'); const { spawnSyncAndAssert } = require('../common/child_process'); const fixtures = require('../common/fixtures'); @@ -14,7 +14,10 @@ const warningRE = /Support for loading ES Module in require\(\)/; // require() in non-node_modules -> esm in node_modules should warn. spawnSyncAndAssert( process.execPath, - [fixtures.path('es-modules', 'test_node_modules', 'require-esm.js')], + [ + '--trace-require-module=no-node-modules', + fixtures.path('es-modules', 'test_node_modules', 'require-esm.js'), + ], { trim: true, stderr: warningRE, @@ -26,7 +29,10 @@ spawnSyncAndAssert( // should not warn. spawnSyncAndAssert( process.execPath, - [fixtures.path('es-modules', 'test_node_modules', 'require-require-esm.js')], + [ + '--trace-require-module=no-node-modules', + fixtures.path('es-modules', 'test_node_modules', 'require-require-esm.js'), + ], { trim: true, stderr: '', @@ -38,7 +44,10 @@ spawnSyncAndAssert( // should not warn. spawnSyncAndAssert( process.execPath, - [fixtures.path('es-modules', 'test_node_modules', 'import-require-esm.mjs')], + [ + '--trace-require-module=no-node-modules', + fixtures.path('es-modules', 'test_node_modules', 'import-require-esm.mjs'), + ], { trim: true, stderr: '', @@ -50,7 +59,10 @@ spawnSyncAndAssert( // require() in node_modules -> esm in node_modules should not warn. spawnSyncAndAssert( process.execPath, - [fixtures.path('es-modules', 'test_node_modules', 'import-import-require-esm.mjs')], + [ + '--trace-require-module=no-node-modules', + fixtures.path('es-modules', 'test_node_modules', 'import-import-require-esm.mjs'), + ], { trim: true, stderr: '', diff --git a/test/es-module/test-typescript-commonjs.mjs b/test/es-module/test-typescript-commonjs.mjs index c24576c4a0c392..b31f235b4e8e9a 100644 --- a/test/es-module/test-typescript-commonjs.mjs +++ b/test/es-module/test-typescript-commonjs.mjs @@ -116,7 +116,6 @@ test('execute a .cts file importing a .mts file export', async () => { fixtures.path('typescript/cts/test-require-mts-module.cts'), ]); - match(result.stderr, /Support for loading ES Module in require\(\) is an experimental feature and might change at any time/); match(result.stdout, /Hello, TypeScript!/); strictEqual(result.code, 0); }); diff --git a/test/es-module/test-typescript.mjs b/test/es-module/test-typescript.mjs index a7ca6d70dd5e10..44a5431c37b479 100644 --- a/test/es-module/test-typescript.mjs +++ b/test/es-module/test-typescript.mjs @@ -239,7 +239,6 @@ test('expect failure of a TypeScript file requiring ES module syntax', async () fixtures.path('typescript/ts/test-require-module.ts'), ]); - match(result.stderr, /Support for loading ES Module in require\(\) is an experimental feature and might change at any time/); match(result.stdout, /Hello, TypeScript!/); strictEqual(result.code, 0); }); @@ -319,7 +318,6 @@ test('execute a TypeScript file with CommonJS syntax requiring .mts using requir fixtures.path('typescript/ts/test-require-mts.ts'), ]); - match(result.stderr, /Support for loading ES Module in require\(\) is an experimental feature and might change at any time/); match(result.stdout, /Hello, TypeScript!/); strictEqual(result.code, 0); }); diff --git a/test/fixtures/dotenv/no-final-newline-single-quotes.env b/test/fixtures/dotenv/no-final-newline-single-quotes.env new file mode 100644 index 00000000000000..4f1b37d7741e29 --- /dev/null +++ b/test/fixtures/dotenv/no-final-newline-single-quotes.env @@ -0,0 +1 @@ +BASIC='basic' \ No newline at end of file diff --git a/test/fixtures/dotenv/no-final-newline.env b/test/fixtures/dotenv/no-final-newline.env new file mode 100644 index 00000000000000..ef996552bd9c90 --- /dev/null +++ b/test/fixtures/dotenv/no-final-newline.env @@ -0,0 +1 @@ +BASIC="basic" \ No newline at end of file diff --git a/test/fixtures/dotenv/node-options.env b/test/fixtures/dotenv/node-options.env index f74ac01bc28de7..bd3be820f64e2b 100644 --- a/test/fixtures/dotenv/node-options.env +++ b/test/fixtures/dotenv/node-options.env @@ -1,6 +1,6 @@ CUSTOM_VARIABLE=hello-world NODE_NO_WARNINGS=1 -NODE_OPTIONS="--experimental-permission --allow-fs-read=*" +NODE_OPTIONS="--permission --allow-fs-read=*" TZ=Pacific/Honolulu UV_THREADPOOL_SIZE=5 BASIC=overridden diff --git a/test/fixtures/es-module-loaders/hooks-custom.mjs b/test/fixtures/es-module-loaders/hooks-custom.mjs index 3c38649a88794f..5109d20f4d3711 100644 --- a/test/fixtures/es-module-loaders/hooks-custom.mjs +++ b/test/fixtures/es-module-loaders/hooks-custom.mjs @@ -105,5 +105,12 @@ export function load(url, context, next) { }; } + if (url.endsWith('esmHook/maximumCallStack.mjs')) { + function recurse() { + recurse(); + } + recurse(); + } + return next(url); } diff --git a/test/fixtures/es-modules/reference-error-esm.js b/test/fixtures/es-modules/reference-error-esm.js new file mode 100644 index 00000000000000..baf773c78970ad --- /dev/null +++ b/test/fixtures/es-modules/reference-error-esm.js @@ -0,0 +1,5 @@ +// This module is invalid in both ESM and CJS, because +// 'exports' are not defined in ESM, while require cannot be +// redeclared in CJS. +Object.defineProperty(exports, "__esModule", { value: true }); +const require = () => {}; diff --git a/test/fixtures/es-modules/runtime-error-esm.js b/test/fixtures/es-modules/runtime-error-esm.js new file mode 100644 index 00000000000000..1df3cc469adbfc --- /dev/null +++ b/test/fixtures/es-modules/runtime-error-esm.js @@ -0,0 +1,2 @@ +import 'node:fs'; // Forces it to be recognized as ESM. +throw new Error('hello'); diff --git a/test/fixtures/es-modules/synchronous-rejection-esm.js b/test/fixtures/es-modules/synchronous-rejection-esm.js new file mode 100644 index 00000000000000..34d066037e2140 --- /dev/null +++ b/test/fixtures/es-modules/synchronous-rejection-esm.js @@ -0,0 +1,2 @@ +import 'node:fs'; // Forces it to be recognized as ESM. +Promise.reject('reject!'); diff --git a/test/fixtures/icu/localizationData-v74.2.json b/test/fixtures/icu/localizationData-v74.2.json new file mode 100644 index 00000000000000..65671ba5acb299 --- /dev/null +++ b/test/fixtures/icu/localizationData-v74.2.json @@ -0,0 +1,128 @@ +{ + "dateStrings": { + "en": "Fri Jul 25 1980 01:35:33 GMT+0100 (Central European Standard Time)", + "zh": "Fri Jul 25 1980 01:35:33 GMT+0100 (中欧标准时间)", + "hi": "Fri Jul 25 1980 01:35:33 GMT+0100 (मध्य यूरोपीय मानक समय)", + "es": "Fri Jul 25 1980 01:35:33 GMT+0100 (hora estándar de Europa central)", + "fr": "Fri Jul 25 1980 01:35:33 GMT+0100 (heure normale d’Europe centrale)", + "ar": "Fri Jul 25 1980 01:35:33 GMT+0100 (توقيت وسط أوروبا الرسمي)", + "bn": "Fri Jul 25 1980 01:35:33 GMT+0100 (মধ্য ইউরোপীয় মানক সময়)", + "ru": "Fri Jul 25 1980 01:35:33 GMT+0100 (Центральная Европа, стандартное время)", + "pt": "Fri Jul 25 1980 01:35:33 GMT+0100 (Horário Padrão da Europa Central)", + "ur": "Fri Jul 25 1980 01:35:33 GMT+0100 (وسطی یورپ کا معیاری وقت)", + "id": "Fri Jul 25 1980 01:35:33 GMT+0100 (Waktu Standar Eropa Tengah)", + "de": "Fri Jul 25 1980 01:35:33 GMT+0100 (Mitteleuropäische Normalzeit)", + "ja": "Fri Jul 25 1980 01:35:33 GMT+0100 (中央ヨーロッパ標準時)", + "pcm": "Fri Jul 25 1980 01:35:33 GMT+0100 (Mídúl Yúrop Fíksd Taim)", + "mr": "Fri Jul 25 1980 01:35:33 GMT+0100 (मध्‍य युरोपियन प्रमाण वेळ)", + "te": "Fri Jul 25 1980 01:35:33 GMT+0100 (సెంట్రల్ యూరోపియన్ ప్రామాణిక సమయం)" + }, + "dateTimeFormats": { + "en": "7/25/1980, 1:35:33 AM", + "zh": "1980/7/25 01:35:33", + "hi": "25/7/1980, 1:35:33 am", + "es": "25/7/1980, 1:35:33", + "fr": "25/07/1980 01:35:33", + "ar": "٢٥‏/٧‏/١٩٨٠، ١:٣٥:٣٣ ص", + "bn": "২৫/৭/১৯৮০, ১:৩৫:৩৩ AM", + "ru": "25.07.1980, 01:35:33", + "pt": "25/07/1980, 01:35:33", + "ur": "25/7/1980، 1:35:33 AM", + "id": "25/7/1980, 01.35.33", + "de": "25.7.1980, 01:35:33", + "ja": "1980/7/25 1:35:33", + "pcm": "25/7/1980 01:35:33", + "mr": "२५/७/१९८०, १:३५:३३ AM", + "te": "25/7/1980 1:35:33 AM" + }, + "dateFormats": { + "en": "7/25/1980", + "zh": "1980/7/25", + "hi": "25/7/1980", + "es": "25/7/1980", + "fr": "25/07/1980", + "ar": "٢٥‏/٧‏/١٩٨٠", + "bn": "২৫/৭/১৯৮০", + "ru": "25.07.1980", + "pt": "25/07/1980", + "ur": "25/7/1980", + "id": "25/7/1980", + "de": "25.7.1980", + "ja": "1980/7/25", + "pcm": "25/7/1980", + "mr": "२५/७/१९८०", + "te": "25/7/1980" + }, + "displayNames": { + "en": "Switzerland", + "zh": "瑞士", + "hi": "स्विट्ज़रलैंड", + "es": "Suiza", + "fr": "Suisse", + "ar": "سويسرا", + "bn": "সুইজারল্যান্ড", + "ru": "Швейцария", + "pt": "Suíça", + "ur": "سوئٹزر لینڈ", + "id": "Swiss", + "de": "Schweiz", + "ja": "スイス", + "pcm": "Swítsaland", + "mr": "स्वित्झर्लंड", + "te": "స్విట్జర్లాండ్" + }, + "numberFormats": { + "en": "275,760.913", + "zh": "275,760.913", + "hi": "2,75,760.913", + "es": "275.760,913", + "fr": "275 760,913", + "ar": "٢٧٥٬٧٦٠٫٩١٣", + "bn": "২,৭৫,৭৬০.৯১৩", + "ru": "275 760,913", + "pt": "275.760,913", + "ur": "275,760.913", + "id": "275.760,913", + "de": "275.760,913", + "ja": "275,760.913", + "pcm": "275,760.913", + "mr": "२,७५,७६०.९१३", + "te": "2,75,760.913" + }, + "pluralRules": { + "en": "other", + "zh": "other", + "hi": "one", + "es": "other", + "fr": "one", + "ar": "zero", + "bn": "one", + "ru": "many", + "pt": "one", + "ur": "other", + "id": "other", + "de": "other", + "ja": "other", + "pcm": "one", + "mr": "other", + "te": "other" + }, + "relativeTime": { + "en": "586,920.617 hours ago", + "zh": "586,920.617小时前", + "hi": "5,86,920.617 घंटे पहले", + "es": "hace 586.920,617 horas", + "fr": "il y a 586 920,617 heures", + "ar": "قبل ٥٨٦٬٩٢٠٫٦١٧ ساعة", + "bn": "৫,৮৬,৯২০.৬১৭ ঘন্টা আগে", + "ru": "586 920,617 часа назад", + "pt": "há 586.920,617 horas", + "ur": "586,920.617 گھنٹے پہلے", + "id": "586.920,617 jam yang lalu", + "de": "vor 586.920,617 Stunden", + "ja": "586,920.617 時間前", + "pcm": "586,920.617 áwa wé dọ́n pas", + "mr": "५,८६,९२०.६१७ तासांपूर्वी", + "te": "5,86,920.617 గంటల క్రితం" + } +} diff --git a/test/fixtures/icu/localizationData-v75.1.json b/test/fixtures/icu/localizationData-v75.1.json new file mode 100644 index 00000000000000..65671ba5acb299 --- /dev/null +++ b/test/fixtures/icu/localizationData-v75.1.json @@ -0,0 +1,128 @@ +{ + "dateStrings": { + "en": "Fri Jul 25 1980 01:35:33 GMT+0100 (Central European Standard Time)", + "zh": "Fri Jul 25 1980 01:35:33 GMT+0100 (中欧标准时间)", + "hi": "Fri Jul 25 1980 01:35:33 GMT+0100 (मध्य यूरोपीय मानक समय)", + "es": "Fri Jul 25 1980 01:35:33 GMT+0100 (hora estándar de Europa central)", + "fr": "Fri Jul 25 1980 01:35:33 GMT+0100 (heure normale d’Europe centrale)", + "ar": "Fri Jul 25 1980 01:35:33 GMT+0100 (توقيت وسط أوروبا الرسمي)", + "bn": "Fri Jul 25 1980 01:35:33 GMT+0100 (মধ্য ইউরোপীয় মানক সময়)", + "ru": "Fri Jul 25 1980 01:35:33 GMT+0100 (Центральная Европа, стандартное время)", + "pt": "Fri Jul 25 1980 01:35:33 GMT+0100 (Horário Padrão da Europa Central)", + "ur": "Fri Jul 25 1980 01:35:33 GMT+0100 (وسطی یورپ کا معیاری وقت)", + "id": "Fri Jul 25 1980 01:35:33 GMT+0100 (Waktu Standar Eropa Tengah)", + "de": "Fri Jul 25 1980 01:35:33 GMT+0100 (Mitteleuropäische Normalzeit)", + "ja": "Fri Jul 25 1980 01:35:33 GMT+0100 (中央ヨーロッパ標準時)", + "pcm": "Fri Jul 25 1980 01:35:33 GMT+0100 (Mídúl Yúrop Fíksd Taim)", + "mr": "Fri Jul 25 1980 01:35:33 GMT+0100 (मध्‍य युरोपियन प्रमाण वेळ)", + "te": "Fri Jul 25 1980 01:35:33 GMT+0100 (సెంట్రల్ యూరోపియన్ ప్రామాణిక సమయం)" + }, + "dateTimeFormats": { + "en": "7/25/1980, 1:35:33 AM", + "zh": "1980/7/25 01:35:33", + "hi": "25/7/1980, 1:35:33 am", + "es": "25/7/1980, 1:35:33", + "fr": "25/07/1980 01:35:33", + "ar": "٢٥‏/٧‏/١٩٨٠، ١:٣٥:٣٣ ص", + "bn": "২৫/৭/১৯৮০, ১:৩৫:৩৩ AM", + "ru": "25.07.1980, 01:35:33", + "pt": "25/07/1980, 01:35:33", + "ur": "25/7/1980، 1:35:33 AM", + "id": "25/7/1980, 01.35.33", + "de": "25.7.1980, 01:35:33", + "ja": "1980/7/25 1:35:33", + "pcm": "25/7/1980 01:35:33", + "mr": "२५/७/१९८०, १:३५:३३ AM", + "te": "25/7/1980 1:35:33 AM" + }, + "dateFormats": { + "en": "7/25/1980", + "zh": "1980/7/25", + "hi": "25/7/1980", + "es": "25/7/1980", + "fr": "25/07/1980", + "ar": "٢٥‏/٧‏/١٩٨٠", + "bn": "২৫/৭/১৯৮০", + "ru": "25.07.1980", + "pt": "25/07/1980", + "ur": "25/7/1980", + "id": "25/7/1980", + "de": "25.7.1980", + "ja": "1980/7/25", + "pcm": "25/7/1980", + "mr": "२५/७/१९८०", + "te": "25/7/1980" + }, + "displayNames": { + "en": "Switzerland", + "zh": "瑞士", + "hi": "स्विट्ज़रलैंड", + "es": "Suiza", + "fr": "Suisse", + "ar": "سويسرا", + "bn": "সুইজারল্যান্ড", + "ru": "Швейцария", + "pt": "Suíça", + "ur": "سوئٹزر لینڈ", + "id": "Swiss", + "de": "Schweiz", + "ja": "スイス", + "pcm": "Swítsaland", + "mr": "स्वित्झर्लंड", + "te": "స్విట్జర్లాండ్" + }, + "numberFormats": { + "en": "275,760.913", + "zh": "275,760.913", + "hi": "2,75,760.913", + "es": "275.760,913", + "fr": "275 760,913", + "ar": "٢٧٥٬٧٦٠٫٩١٣", + "bn": "২,৭৫,৭৬০.৯১৩", + "ru": "275 760,913", + "pt": "275.760,913", + "ur": "275,760.913", + "id": "275.760,913", + "de": "275.760,913", + "ja": "275,760.913", + "pcm": "275,760.913", + "mr": "२,७५,७६०.९१३", + "te": "2,75,760.913" + }, + "pluralRules": { + "en": "other", + "zh": "other", + "hi": "one", + "es": "other", + "fr": "one", + "ar": "zero", + "bn": "one", + "ru": "many", + "pt": "one", + "ur": "other", + "id": "other", + "de": "other", + "ja": "other", + "pcm": "one", + "mr": "other", + "te": "other" + }, + "relativeTime": { + "en": "586,920.617 hours ago", + "zh": "586,920.617小时前", + "hi": "5,86,920.617 घंटे पहले", + "es": "hace 586.920,617 horas", + "fr": "il y a 586 920,617 heures", + "ar": "قبل ٥٨٦٬٩٢٠٫٦١٧ ساعة", + "bn": "৫,৮৬,৯২০.৬১৭ ঘন্টা আগে", + "ru": "586 920,617 часа назад", + "pt": "há 586.920,617 horas", + "ur": "586,920.617 گھنٹے پہلے", + "id": "586.920,617 jam yang lalu", + "de": "vor 586.920,617 Stunden", + "ja": "586,920.617 時間前", + "pcm": "586,920.617 áwa wé dọ́n pas", + "mr": "५,८६,९२०.६१७ तासांपूर्वी", + "te": "5,86,920.617 గంటల క్రితం" + } +} diff --git a/test/fixtures/icu/localizationData-v76.1.json b/test/fixtures/icu/localizationData-v76.1.json new file mode 100644 index 00000000000000..cb519d2bea2faa --- /dev/null +++ b/test/fixtures/icu/localizationData-v76.1.json @@ -0,0 +1,128 @@ +{ + "dateStrings": { + "en": "Fri Jul 25 1980 01:35:33 GMT+0100 (Central European Standard Time)", + "zh": "Fri Jul 25 1980 01:35:33 GMT+0100 (中欧标准时间)", + "hi": "Fri Jul 25 1980 01:35:33 GMT+0100 (मध्य यूरोपीय मानक समय)", + "es": "Fri Jul 25 1980 01:35:33 GMT+0100 (hora estándar de Europa central)", + "fr": "Fri Jul 25 1980 01:35:33 GMT+0100 (heure normale d’Europe centrale)", + "ar": "Fri Jul 25 1980 01:35:33 GMT+0100 (توقيت وسط أوروبا الرسمي)", + "bn": "Fri Jul 25 1980 01:35:33 GMT+0100 (মধ্য ইউরোপীয় মানক সময়)", + "ru": "Fri Jul 25 1980 01:35:33 GMT+0100 (Центральная Европа, стандартное время)", + "pt": "Fri Jul 25 1980 01:35:33 GMT+0100 (Horário Padrão da Europa Central)", + "ur": "Fri Jul 25 1980 01:35:33 GMT+0100 (وسطی یورپ کا معیاری وقت)", + "id": "Fri Jul 25 1980 01:35:33 GMT+0100 (Waktu Standar Eropa Tengah)", + "de": "Fri Jul 25 1980 01:35:33 GMT+0100 (Mitteleuropäische Normalzeit)", + "ja": "Fri Jul 25 1980 01:35:33 GMT+0100 (中央ヨーロッパ標準時)", + "pcm": "Fri Jul 25 1980 01:35:33 GMT+0100 (Mídúl Yúrop Fíksd Taim)", + "mr": "Fri Jul 25 1980 01:35:33 GMT+0100 (मध्‍य युरोपियन प्रमाण वेळ)", + "te": "Fri Jul 25 1980 01:35:33 GMT+0100 (సెంట్రల్ యూరోపియన్ ప్రామాణిక సమయం)" + }, + "dateTimeFormats": { + "en": "7/25/1980, 1:35:33 AM", + "zh": "1980/7/25 01:35:33", + "hi": "25/7/1980, 1:35:33 am", + "es": "25/7/1980, 1:35:33", + "fr": "25/07/1980 01:35:33", + "ar": "25‏/7‏/1980، 1:35:33 ص", + "bn": "২৫/৭/১৯৮০, ১:৩৫:৩৩ AM", + "ru": "25.07.1980, 01:35:33", + "pt": "25/07/1980, 01:35:33", + "ur": "25/7/1980، 1:35:33 AM", + "id": "25/7/1980, 01.35.33", + "de": "25.7.1980, 01:35:33", + "ja": "1980/7/25 1:35:33", + "pcm": "25/7/1980 01:35:33", + "mr": "२५/७/१९८०, १:३५:३३ AM", + "te": "25/7/1980 1:35:33 AM" + }, + "dateFormats": { + "en": "7/25/1980", + "zh": "1980/7/25", + "hi": "25/7/1980", + "es": "25/7/1980", + "fr": "25/07/1980", + "ar": "25‏/7‏/1980", + "bn": "২৫/৭/১৯৮০", + "ru": "25.07.1980", + "pt": "25/07/1980", + "ur": "25/7/1980", + "id": "25/7/1980", + "de": "25.7.1980", + "ja": "1980/7/25", + "pcm": "25/7/1980", + "mr": "२५/७/१९८०", + "te": "25/7/1980" + }, + "displayNames": { + "en": "Switzerland", + "zh": "瑞士", + "hi": "स्विट्ज़रलैंड", + "es": "Suiza", + "fr": "Suisse", + "ar": "سويسرا", + "bn": "সুইজারল্যান্ড", + "ru": "Швейцария", + "pt": "Suíça", + "ur": "سوئٹزر لینڈ", + "id": "Swiss", + "de": "Schweiz", + "ja": "スイス", + "pcm": "Swítsaland", + "mr": "स्वित्झर्लंड", + "te": "స్విట్జర్లాండ్" + }, + "numberFormats": { + "en": "275,760.913", + "zh": "275,760.913", + "hi": "2,75,760.913", + "es": "275.760,913", + "fr": "275 760,913", + "ar": "275,760.913", + "bn": "২,৭৫,৭৬০.৯১৩", + "ru": "275 760,913", + "pt": "275.760,913", + "ur": "275,760.913", + "id": "275.760,913", + "de": "275.760,913", + "ja": "275,760.913", + "pcm": "275,760.913", + "mr": "२,७५,७६०.९१३", + "te": "2,75,760.913" + }, + "pluralRules": { + "en": "other", + "zh": "other", + "hi": "one", + "es": "other", + "fr": "one", + "ar": "zero", + "bn": "one", + "ru": "many", + "pt": "one", + "ur": "other", + "id": "other", + "de": "other", + "ja": "other", + "pcm": "one", + "mr": "other", + "te": "other" + }, + "relativeTime": { + "en": "586,920.617 hours ago", + "zh": "586,920.617小时前", + "hi": "5,86,920.617 घंटे पहले", + "es": "hace 586.920,617 horas", + "fr": "il y a 586 920,617 heures", + "ar": "قبل 586,920.617 ساعة", + "bn": "৫,৮৬,৯২০.৬১৭ ঘন্টা আগে", + "ru": "586 920,617 часа назад", + "pt": "há 586.920,617 horas", + "ur": "586,920.617 گھنٹے پہلے", + "id": "586.920,617 jam yang lalu", + "de": "vor 586.920,617 Stunden", + "ja": "586,920.617 時間前", + "pcm": "586,920.617 áwa wé dọ́n pas", + "mr": "५,८६,९२०.६१७ तासांपूर्वी", + "te": "5,86,920.617 గంటల క్రితం" + } +} diff --git a/test/fixtures/packages/cjs-main-no-index/main.js b/test/fixtures/packages/cjs-main-no-index/main.js new file mode 100644 index 00000000000000..888cae37af95c5 --- /dev/null +++ b/test/fixtures/packages/cjs-main-no-index/main.js @@ -0,0 +1 @@ +module.exports = 42; diff --git a/test/fixtures/packages/cjs-main-no-index/other.js b/test/fixtures/packages/cjs-main-no-index/other.js new file mode 100644 index 00000000000000..8fa48276783484 --- /dev/null +++ b/test/fixtures/packages/cjs-main-no-index/other.js @@ -0,0 +1,3 @@ +const answer = require('./'); + +module.exports = answer+1; diff --git a/test/fixtures/packages/cjs-main-no-index/package.json b/test/fixtures/packages/cjs-main-no-index/package.json new file mode 100644 index 00000000000000..897d3f99daf7a2 --- /dev/null +++ b/test/fixtures/packages/cjs-main-no-index/package.json @@ -0,0 +1,5 @@ +{ + "name": "main-no-index", + "main": "./main.js", + "type":"commonjs" +} diff --git a/test/fixtures/packages/nested/package.json b/test/fixtures/packages/nested/package.json new file mode 100644 index 00000000000000..0ce6c71db42e8c --- /dev/null +++ b/test/fixtures/packages/nested/package.json @@ -0,0 +1 @@ +{"name": "package-with-sub-package"} diff --git a/test/fixtures/packages/nested/sub-pkg-cjs/index.js b/test/fixtures/packages/nested/sub-pkg-cjs/index.js new file mode 100644 index 00000000000000..679ad2d65ed570 --- /dev/null +++ b/test/fixtures/packages/nested/sub-pkg-cjs/index.js @@ -0,0 +1,3 @@ +const { findPackageJSON } = require('node:module'); + +module.exports = findPackageJSON('..', __filename); diff --git a/test/fixtures/packages/nested/sub-pkg-cjs/package.json b/test/fixtures/packages/nested/sub-pkg-cjs/package.json new file mode 100644 index 00000000000000..2dec7591cd6db8 --- /dev/null +++ b/test/fixtures/packages/nested/sub-pkg-cjs/package.json @@ -0,0 +1 @@ +{"name": "sub-package", "type": "commonjs"} diff --git a/test/fixtures/packages/nested/sub-pkg-esm/index.js b/test/fixtures/packages/nested/sub-pkg-esm/index.js new file mode 100644 index 00000000000000..546390de4bf3a5 --- /dev/null +++ b/test/fixtures/packages/nested/sub-pkg-esm/index.js @@ -0,0 +1,3 @@ +import { findPackageJSON } from 'node:module'; + +export default findPackageJSON('..', import.meta.url); diff --git a/test/fixtures/packages/nested/sub-pkg-esm/package.json b/test/fixtures/packages/nested/sub-pkg-esm/package.json new file mode 100644 index 00000000000000..c294ec5158824a --- /dev/null +++ b/test/fixtures/packages/nested/sub-pkg-esm/package.json @@ -0,0 +1 @@ +{"name": "sub-package", "type": "module"} diff --git a/test/fixtures/packages/root-types-field/index.js b/test/fixtures/packages/root-types-field/index.js new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/test/fixtures/packages/root-types-field/package.json b/test/fixtures/packages/root-types-field/package.json new file mode 100644 index 00000000000000..4af68442446dd8 --- /dev/null +++ b/test/fixtures/packages/root-types-field/package.json @@ -0,0 +1,5 @@ +{ + "name": "package-with-unrecognised-fields", + "type": "module", + "types": "./index.d.ts" +} diff --git a/test/fixtures/permission/fs-read.js b/test/fixtures/permission/fs-read.js index b189af295793e6..fa4ea1207f50bd 100644 --- a/test/fixtures/permission/fs-read.js +++ b/test/fixtures/permission/fs-read.js @@ -282,6 +282,18 @@ const regularFile = __filename; permission: 'FileSystemRead', resource: path.toNamespacedPath(blockedFolder), })); + assert.throws(() => { + fs.readdirSync(blockedFolder, { recursive: true }); + }, common.expectsError({ + code: 'ERR_ACCESS_DENIED', + permission: 'FileSystemRead', + resource: path.toNamespacedPath(blockedFolder), + })); + fs.readdir(blockedFolder, { recursive: true }, common.expectsError({ + code: 'ERR_ACCESS_DENIED', + permission: 'FileSystemRead', + resource: path.toNamespacedPath(blockedFolder), + })); assert.throws(() => { fs.readdirSync(blockedFolder); }, common.expectsError({ diff --git a/test/fixtures/permission/main-module.js b/test/fixtures/permission/main-module.js new file mode 100644 index 00000000000000..cac52e04dddd24 --- /dev/null +++ b/test/fixtures/permission/main-module.js @@ -0,0 +1 @@ +require('./required-module'); \ No newline at end of file diff --git a/test/fixtures/permission/main-module.mjs b/test/fixtures/permission/main-module.mjs new file mode 100644 index 00000000000000..e7c28f7f6cab19 --- /dev/null +++ b/test/fixtures/permission/main-module.mjs @@ -0,0 +1 @@ +import './required-module.mjs'; \ No newline at end of file diff --git a/test/fixtures/permission/required-module.js b/test/fixtures/permission/required-module.js new file mode 100644 index 00000000000000..e8dbf442c5b1a2 --- /dev/null +++ b/test/fixtures/permission/required-module.js @@ -0,0 +1 @@ +console.log('ok'); \ No newline at end of file diff --git a/test/fixtures/permission/required-module.mjs b/test/fixtures/permission/required-module.mjs new file mode 100644 index 00000000000000..e8dbf442c5b1a2 --- /dev/null +++ b/test/fixtures/permission/required-module.mjs @@ -0,0 +1 @@ +console.log('ok'); \ No newline at end of file diff --git a/test/fixtures/process-env/define.js b/test/fixtures/process-env/define.js new file mode 100644 index 00000000000000..59d5744157696a --- /dev/null +++ b/test/fixtures/process-env/define.js @@ -0,0 +1,6 @@ +Object.defineProperty(process.env, 'FOO', { + configurable: true, + enumerable: true, + writable: true, + value: 'FOO', +}); diff --git a/test/fixtures/process-env/delete.js b/test/fixtures/process-env/delete.js new file mode 100644 index 00000000000000..19ea5160513f08 --- /dev/null +++ b/test/fixtures/process-env/delete.js @@ -0,0 +1 @@ +delete process.env.FOO; diff --git a/test/fixtures/process-env/enumerate.js b/test/fixtures/process-env/enumerate.js new file mode 100644 index 00000000000000..ea6f3972bf9470 --- /dev/null +++ b/test/fixtures/process-env/enumerate.js @@ -0,0 +1,3 @@ +Object.keys(process.env); + +const env = { ...process.env }; diff --git a/test/fixtures/process-env/get.js b/test/fixtures/process-env/get.js new file mode 100644 index 00000000000000..e0257a022be4eb --- /dev/null +++ b/test/fixtures/process-env/get.js @@ -0,0 +1,2 @@ +const foo = process.env.FOO; +const bar = process.env.BAR; diff --git a/test/fixtures/process-env/query.js b/test/fixtures/process-env/query.js new file mode 100644 index 00000000000000..1e3fc9b79c3d04 --- /dev/null +++ b/test/fixtures/process-env/query.js @@ -0,0 +1,3 @@ +const foo = 'FOO' in process.env; +const bar = Object.hasOwn(process.env, 'BAR'); +const baz = process.env.hasOwnProperty('BAZ'); diff --git a/test/fixtures/process-env/set.js b/test/fixtures/process-env/set.js new file mode 100644 index 00000000000000..a9863742d187e2 --- /dev/null +++ b/test/fixtures/process-env/set.js @@ -0,0 +1 @@ +process.env.FOO = "FOO"; diff --git a/test/fixtures/test-runner/output/abort_hooks.snapshot b/test/fixtures/test-runner/output/abort_hooks.snapshot index 278b5e5fd36ca5..e318e36d9d56a4 100644 --- a/test/fixtures/test-runner/output/abort_hooks.snapshot +++ b/test/fixtures/test-runner/output/abort_hooks.snapshot @@ -101,7 +101,7 @@ not ok 2 - 2 after describe * * * - async Promise.all (index 0) + * ... # Subtest: test 2 not ok 2 - test 2 @@ -122,7 +122,7 @@ not ok 2 - 2 after describe * * * - async Promise.all (index 0) + * ... 1..2 not ok 3 - 3 beforeEach describe diff --git a/test/fixtures/test-runner/output/global_after_should_fail_the_test.snapshot b/test/fixtures/test-runner/output/global_after_should_fail_the_test.snapshot index 3196f377b3d4bf..ee4d5f71072ba5 100644 --- a/test/fixtures/test-runner/output/global_after_should_fail_the_test.snapshot +++ b/test/fixtures/test-runner/output/global_after_should_fail_the_test.snapshot @@ -21,7 +21,6 @@ not ok 2 - /test/fixtures/test-runner/output/global_after_should_fail_the_test.j * * * - * ... 1..1 # tests 1 diff --git a/test/fixtures/test-runner/output/hooks.snapshot b/test/fixtures/test-runner/output/hooks.snapshot index 6b9d6d26a90e39..be8d1b210c60e4 100644 --- a/test/fixtures/test-runner/output/hooks.snapshot +++ b/test/fixtures/test-runner/output/hooks.snapshot @@ -77,7 +77,6 @@ not ok 3 - before throws * * * - * ... # Subtest: before throws - no subtests not ok 4 - before throws - no subtests @@ -97,7 +96,6 @@ not ok 4 - before throws - no subtests * * * - * ... # Subtest: after throws # Subtest: 1 @@ -129,6 +127,7 @@ not ok 5 - after throws * * * + * ... # Subtest: after throws - no subtests not ok 6 - after throws - no subtests @@ -149,6 +148,7 @@ not ok 6 - after throws - no subtests * * * + * ... # Subtest: beforeEach throws # Subtest: 1 @@ -167,9 +167,9 @@ not ok 6 - after throws - no subtests * * * - async Promise.all (index 0) * * + new Promise () ... # Subtest: 2 not ok 2 - 2 @@ -188,6 +188,8 @@ not ok 6 - after throws - no subtests * * * + * + async Promise.all (index 0) ... 1..2 not ok 7 - beforeEach throws @@ -482,7 +484,6 @@ not ok 15 - t.after throws * * * - * ... # Subtest: t.after throws - no subtests not ok 16 - t.after throws - no subtests @@ -502,7 +503,6 @@ not ok 16 - t.after throws - no subtests * * * - * ... # Subtest: t.beforeEach throws # Subtest: 1 @@ -765,7 +765,6 @@ not ok 24 - run after when before throws * * * - * ... # Subtest: test hooks - async # Subtest: 1 diff --git a/test/fixtures/test-runner/output/hooks_spec_reporter.snapshot b/test/fixtures/test-runner/output/hooks_spec_reporter.snapshot index b5c5ab7d1965e5..ea916c2ee754c4 100644 --- a/test/fixtures/test-runner/output/hooks_spec_reporter.snapshot +++ b/test/fixtures/test-runner/output/hooks_spec_reporter.snapshot @@ -26,7 +26,6 @@ * * * - * before throws - no subtests (*ms) Error: before @@ -38,7 +37,6 @@ * * * - * after throws 1 (*ms) @@ -55,6 +53,7 @@ * * * + * after throws - no subtests (*ms) Error: after @@ -67,6 +66,7 @@ * * * + * beforeEach throws 1 (*ms) @@ -78,9 +78,9 @@ * * * - at async Promise.all (index 0) * * + at new Promise () 2 (*ms) Error: beforeEach @@ -92,6 +92,8 @@ * * * + * + at async Promise.all (index 0) beforeEach throws (*ms) afterEach throws @@ -242,7 +244,6 @@ * * * - * t.after throws - no subtests (*ms) Error: after @@ -255,7 +256,6 @@ * * * - * t.beforeEach throws 1 (*ms) @@ -390,7 +390,6 @@ * * * - * test hooks - async 1 (*ms) @@ -430,7 +429,6 @@ * * * - * * before throws - no subtests (*ms) @@ -443,7 +441,6 @@ * * * - * * after throws (*ms) @@ -457,6 +454,7 @@ * * * + * * after throws - no subtests (*ms) @@ -470,6 +468,7 @@ * * * + * * 1 (*ms) @@ -481,9 +480,9 @@ * * * - at async Promise.all (index 0) * * + at new Promise () * 2 (*ms) @@ -496,6 +495,8 @@ * * * + * + at async Promise.all (index 0) * 1 (*ms) @@ -633,7 +634,6 @@ * * * - * * t.after throws - no subtests (*ms) @@ -647,7 +647,6 @@ * * * - * * 1 (*ms) @@ -776,4 +775,3 @@ * * * - * diff --git a/test/fixtures/typescript/ts/test-get-callsite-explicit.ts b/test/fixtures/typescript/ts/test-get-callsite-explicit.ts new file mode 100644 index 00000000000000..331495419addae --- /dev/null +++ b/test/fixtures/typescript/ts/test-get-callsite-explicit.ts @@ -0,0 +1,10 @@ +const { getCallSites } = require('node:util'); + +interface CallSite { + A; + B; +} + +const callSite = getCallSites({ sourceMap: false })[0]; + +console.log('mapCallSite: ', callSite); diff --git a/test/fixtures/typescript/ts/test-get-callsite.ts b/test/fixtures/typescript/ts/test-get-callsite.ts new file mode 100644 index 00000000000000..e3186ec88939d8 --- /dev/null +++ b/test/fixtures/typescript/ts/test-get-callsite.ts @@ -0,0 +1,10 @@ +const { getCallSites } = require('node:util'); + +interface CallSite { + A; + B; +} + +const callSite = getCallSites()[0]; + +console.log('getCallSite: ', callSite); diff --git a/test/fixtures/wpt/README.md b/test/fixtures/wpt/README.md index 3f684bf1b32c5e..cc97c8787a2c98 100644 --- a/test/fixtures/wpt/README.md +++ b/test/fixtures/wpt/README.md @@ -27,12 +27,12 @@ Last update: - performance-timeline: https://github.com/web-platform-tests/wpt/tree/94caab7038/performance-timeline - resource-timing: https://github.com/web-platform-tests/wpt/tree/22d38586d0/resource-timing - resources: https://github.com/web-platform-tests/wpt/tree/1e140d63ec/resources -- streams: https://github.com/web-platform-tests/wpt/tree/2bd26e124c/streams -- url: https://github.com/web-platform-tests/wpt/tree/6a39784534/url +- streams: https://github.com/web-platform-tests/wpt/tree/bc9dcbbf1a/streams +- url: https://github.com/web-platform-tests/wpt/tree/6fa3fe8a92/url - user-timing: https://github.com/web-platform-tests/wpt/tree/5ae85bf826/user-timing - wasm/jsapi: https://github.com/web-platform-tests/wpt/tree/cde25e7e3c/wasm/jsapi - wasm/webapi: https://github.com/web-platform-tests/wpt/tree/fd1b23eeaa/wasm/webapi -- WebCryptoAPI: https://github.com/web-platform-tests/wpt/tree/b81831169b/WebCryptoAPI +- WebCryptoAPI: https://github.com/web-platform-tests/wpt/tree/3e3374efde/WebCryptoAPI - webidl/ecmascript-binding/es-exceptions: https://github.com/web-platform-tests/wpt/tree/a370aad338/webidl/ecmascript-binding/es-exceptions - webmessaging/broadcastchannel: https://github.com/web-platform-tests/wpt/tree/6495c91853/webmessaging/broadcastchannel - webstorage: https://github.com/web-platform-tests/wpt/tree/9dafa89214/webstorage diff --git a/test/fixtures/wpt/WebCryptoAPI/crypto_key_cached_slots.https.any.js b/test/fixtures/wpt/WebCryptoAPI/crypto_key_cached_slots.https.any.js new file mode 100644 index 00000000000000..f573fac1c96606 --- /dev/null +++ b/test/fixtures/wpt/WebCryptoAPI/crypto_key_cached_slots.https.any.js @@ -0,0 +1,44 @@ +// META: title=WebCryptoAPI: CryptoKey cached ECMAScript objects + +// https://w3c.github.io/webcrypto/#dom-cryptokey-algorithm +// https://github.com/servo/servo/issues/33908 + +promise_test(function() { + return self.crypto.subtle.generateKey( + { + name: "AES-CTR", + length: 256, + }, + true, + ["encrypt"], + ).then( + function(key) { + let a = key.algorithm; + let b = key.algorithm; + assert_true(a === b); + }, + function(err) { + assert_unreached("generateKey threw an unexpected error: " + err.toString()); + } + ); +}, "CryptoKey.algorithm getter returns cached object"); + +promise_test(function() { + return self.crypto.subtle.generateKey( + { + name: "AES-CTR", + length: 256, + }, + true, + ["encrypt"], + ).then( + function(key) { + let a = key.usages; + let b = key.usages; + assert_true(a === b); + }, + function(err) { + assert_unreached("generateKey threw an unexpected error: " + err.toString()); + } + ); +}, "CryptoKey.usages getter returns cached object"); diff --git a/test/fixtures/wpt/WebCryptoAPI/cryptokey_algorithm_returns_cached_object.https.any.js b/test/fixtures/wpt/WebCryptoAPI/cryptokey_algorithm_returns_cached_object.https.any.js deleted file mode 100644 index b2d73fbab78d64..00000000000000 --- a/test/fixtures/wpt/WebCryptoAPI/cryptokey_algorithm_returns_cached_object.https.any.js +++ /dev/null @@ -1,24 +0,0 @@ -// META: title=WebCryptoAPI: CryptoKey.algorithm getter returns cached object - -// https://w3c.github.io/webcrypto/#dom-cryptokey-algorithm -// https://github.com/servo/servo/issues/33908 - -promise_test(function() { - return self.crypto.subtle.generateKey( - { - name: "AES-CTR", - length: 256, - }, - true, - ["encrypt"], - ).then( - function(key) { - let a = key.algorithm; - let b = key.algorithm; - assert_true(a === b); - }, - function(err) { - assert_unreached("generateKey threw an unexpected error: " + err.toString()); - } - ); -}, "CryptoKey.algorithm getter returns cached object"); \ No newline at end of file diff --git a/test/fixtures/wpt/WebCryptoAPI/digest/digest.https.any.js b/test/fixtures/wpt/WebCryptoAPI/digest/digest.https.any.js index 379d9311f30247..3b0972b1f2bf7d 100644 --- a/test/fixtures/wpt/WebCryptoAPI/digest/digest.https.any.js +++ b/test/fixtures/wpt/WebCryptoAPI/digest/digest.https.any.js @@ -118,6 +118,20 @@ }); }); + // Call digest() with empty algorithm object + Object.keys(sourceData).forEach(function(size) { + promise_test(function(test) { + var promise = subtle.digest({}, sourceData[size]) + .then(function(result) { + assert_unreached("digest() with missing algorithm name should have thrown a TypeError"); + }, function(err) { + assert_equals(err.name, "TypeError", "Missing algorithm name should cause TypeError") + }); + + return promise; + }, "empty algorithm object with " + size); + }); + done(); diff --git a/test/fixtures/wpt/WebCryptoAPI/generateKey/failures.js b/test/fixtures/wpt/WebCryptoAPI/generateKey/failures.js index e0f0279a69bb88..deaac636a99be5 100644 --- a/test/fixtures/wpt/WebCryptoAPI/generateKey/failures.js +++ b/test/fixtures/wpt/WebCryptoAPI/generateKey/failures.js @@ -166,6 +166,14 @@ function run_test(algorithmNames) { }); }); + // Empty algorithm should fail with TypeError + allValidUsages(["decrypt", "sign", "deriveBits"], true, []) // Small search space, shouldn't matter because should fail before used + .forEach(function(usages) { + [false, true, "RED", 7].forEach(function(extractable){ + testError({}, extractable, usages, "TypeError", "Empty algorithm"); + }); + }); + // Algorithms normalize okay, but usages bad (though not empty). // It shouldn't matter what other extractable is. Should fail diff --git a/test/fixtures/wpt/WebCryptoAPI/import_export/ec_importKey_failures_fixtures.js b/test/fixtures/wpt/WebCryptoAPI/import_export/ec_importKey_failures_fixtures.js index 796db364c2e2dc..a2d25e816cbd73 100644 --- a/test/fixtures/wpt/WebCryptoAPI/import_export/ec_importKey_failures_fixtures.js +++ b/test/fixtures/wpt/WebCryptoAPI/import_export/ec_importKey_failures_fixtures.js @@ -19,6 +19,14 @@ function getMismatchedJWKKeyData(algorithm) { return []; } +function getMismatchedKtyField(algorithm) { + return mismatchedKtyField[algorithm.name]; +} + +function getMismatchedCrvField(algorithm) { + return mismatchedCrvField[algorithm.name]; +} + var validKeyData = { "P-521": [ { @@ -201,3 +209,17 @@ var missingJWKFieldKeyData = { } ] }; + +// The 'kty' field doesn't match the key algorithm. +var mismatchedKtyField = { + "P-521": "OKP", + "P-256": "OKP", + "P-384": "OKP", +} + +// The 'kty' field doesn't match the key algorithm. +var mismatchedCrvField = { + "P-521": "P-256", + "P-256": "P-384", + "P-384": "P-521", +} diff --git a/test/fixtures/wpt/WebCryptoAPI/import_export/importKey_failures.js b/test/fixtures/wpt/WebCryptoAPI/import_export/importKey_failures.js index bba48401e616a5..453461a8771f51 100644 --- a/test/fixtures/wpt/WebCryptoAPI/import_export/importKey_failures.js +++ b/test/fixtures/wpt/WebCryptoAPI/import_export/importKey_failures.js @@ -192,4 +192,79 @@ function run_test(algorithmNames) { }); }); }); + + // Missing mandatory "name" field on algorithm + testVectors.forEach(function(vector) { + var name = vector.name; + // We just need *some* valid keydata, so pick the first available algorithm. + var algorithm = allAlgorithmSpecifiersFor(name)[0]; + getValidKeyData(algorithm).forEach(function(test) { + validUsages(vector, test.format, test.data).forEach(function(usages) { + [true, false].forEach(function(extractable) { + testError(test.format, {}, test.data, name, usages, extractable, "TypeError", "Missing algorithm name"); + }); + }); + }); + }); + + // The 'kty' field is not correct. + testVectors.forEach(function(vector) { + var name = vector.name; + allAlgorithmSpecifiersFor(name).forEach(function(algorithm) { + getValidKeyData(algorithm).forEach(function(test) { + if (test.format === "jwk") { + var data = {crv: test.data.crv, kty: test.data.kty, d: test.data.d, x: test.data.x, d: test.data.d}; + data.kty = getMismatchedKtyField(algorithm); + var usages = validUsages(vector, 'jwk', test.data); + testError('jwk', algorithm, data, name, usages, true, "DataError", "Invalid 'kty' field"); + } + }); + }); + }); + + // The 'ext' field is not correct. + testVectors.forEach(function(vector) { + var name = vector.name; + allAlgorithmSpecifiersFor(name).forEach(function(algorithm) { + getValidKeyData(algorithm).forEach(function(test) { + if (test.format === "jwk") { + var data = {crv: test.data.crv, kty: test.data.kty, d: test.data.d, x: test.data.x, d: test.data.d}; + data.ext = false; + var usages = validUsages(vector, 'jwk', test.data); + testError('jwk', algorithm, data, name, usages, true, "DataError", "Import from a non-extractable"); + } + }); + }); + }); + + // The 'use' field is incorrect. + testVectors.forEach(function(vector) { + var name = vector.name; + allAlgorithmSpecifiersFor(name).forEach(function(algorithm) { + getValidKeyData(algorithm).forEach(function(test) { + if (test.format === "jwk") { + var data = {crv: test.data.crv, kty: test.data.kty, d: test.data.d, x: test.data.x, d: test.data.d}; + data.use = "invalid"; + var usages = validUsages(vector, 'jwk', test.data); + if (usages.length !== 0) + testError('jwk', algorithm, data, name, usages, true, "DataError", "Invalid 'use' field"); + } + }); + }); + }); + + // The 'crv' field is incorrect. + testVectors.forEach(function(vector) { + var name = vector.name; + allAlgorithmSpecifiersFor(name).forEach(function(algorithm) { + getValidKeyData(algorithm).forEach(function(test) { + if (test.format === "jwk") { + var data = {crv: test.data.crv, kty: test.data.kty, d: test.data.d, x: test.data.x, d: test.data.d}; + data.crv = getMismatchedCrvField(algorithm) + var usages = validUsages(vector, 'jwk', test.data); + testError('jwk', algorithm, data, name, usages, true, "DataError", "Invalid 'crv' field"); + } + }); + }); + }); } diff --git a/test/fixtures/wpt/WebCryptoAPI/import_export/okp_importKey_failures_fixtures.js b/test/fixtures/wpt/WebCryptoAPI/import_export/okp_importKey_failures_fixtures.js index 88f552291cdf6b..9bedddc5c5944a 100644 --- a/test/fixtures/wpt/WebCryptoAPI/import_export/okp_importKey_failures_fixtures.js +++ b/test/fixtures/wpt/WebCryptoAPI/import_export/okp_importKey_failures_fixtures.js @@ -17,6 +17,14 @@ function getMismatchedJWKKeyData(algorithm) { return mismatchedJWKKeyData[algorithm.name]; } +function getMismatchedKtyField(algorithm) { + return mismatchedKtyField[algorithm.name]; +} + +function getMismatchedCrvField(algorithm) { + return mismatchedCrvField[algorithm.name]; +} + var validKeyData = { "Ed25519": [ { @@ -412,3 +420,19 @@ var mismatchedJWKKeyData = { }, ], } + +// The 'kty' field doesn't match the key algorithm. +var mismatchedKtyField = { + "Ed25519": "EC", + "X25519": "EC", + "Ed448": "EC", + "X448": "EC", +} + +// The 'kty' field doesn't match the key algorithm. +var mismatchedCrvField = { + "Ed25519": "X25519", + "X25519": "Ed448", + "Ed448": "X25519", + "X448": "Ed25519", +} diff --git a/test/fixtures/wpt/WebCryptoAPI/wrapKey_unwrapKey/wrapKey_unwrapKey.https.any.js b/test/fixtures/wpt/WebCryptoAPI/wrapKey_unwrapKey/wrapKey_unwrapKey.https.any.js index edb67d9e30fdba..9dd837b3bf60a9 100644 --- a/test/fixtures/wpt/WebCryptoAPI/wrapKey_unwrapKey/wrapKey_unwrapKey.https.any.js +++ b/test/fixtures/wpt/WebCryptoAPI/wrapKey_unwrapKey/wrapKey_unwrapKey.https.any.js @@ -1,238 +1,256 @@ // META: title=WebCryptoAPI: wrapKey() and unwrapKey() // META: timeout=long // META: script=../util/helpers.js +// META: script=wrapKey_unwrapKey_vectors.js // Tests for wrapKey and unwrapKey round tripping var subtle = self.crypto.subtle; - var wrappers = []; // Things we wrap (and upwrap) keys with - var keys = []; // Things to wrap and unwrap - - // Generate all the keys needed, then iterate over all combinations + var wrappers = {}; // Things we wrap (and upwrap) keys with + var keys = {}; // Things to wrap and unwrap + + // There are five algorithms that can be used for wrapKey/unwrapKey. + // Generate one key with typical parameters for each kind. + // + // Note: we don't need cryptographically strong parameters for things + // like IV - just any legal value will do. + var wrappingKeysParameters = [ + { + name: "RSA-OAEP", + importParameters: {name: "RSA-OAEP", hash: "SHA-256"}, + wrapParameters: {name: "RSA-OAEP", label: new Uint8Array(8)} + }, + { + name: "AES-CTR", + importParameters: {name: "AES-CTR", length: 128}, + wrapParameters: {name: "AES-CTR", counter: new Uint8Array(16), length: 64} + }, + { + name: "AES-CBC", + importParameters: {name: "AES-CBC", length: 128}, + wrapParameters: {name: "AES-CBC", iv: new Uint8Array(16)} + }, + { + name: "AES-GCM", + importParameters: {name: "AES-GCM", length: 128}, + wrapParameters: {name: "AES-GCM", iv: new Uint8Array(16), additionalData: new Uint8Array(16), tagLength: 128} + }, + { + name: "AES-KW", + importParameters: {name: "AES-KW", length: 128}, + wrapParameters: {name: "AES-KW"} + } + ]; + + var keysToWrapParameters = [ + {algorithm: {name: "RSASSA-PKCS1-v1_5", hash: "SHA-256"}, privateUsages: ["sign"], publicUsages: ["verify"]}, + {algorithm: {name: "RSA-PSS", hash: "SHA-256"}, privateUsages: ["sign"], publicUsages: ["verify"]}, + {algorithm: {name: "RSA-OAEP", hash: "SHA-256"}, privateUsages: ["decrypt"], publicUsages: ["encrypt"]}, + {algorithm: {name: "ECDSA", namedCurve: "P-256"}, privateUsages: ["sign"], publicUsages: ["verify"]}, + {algorithm: {name: "ECDH", namedCurve: "P-256"}, privateUsages: ["deriveBits"], publicUsages: []}, + {algorithm: {name: "Ed25519" }, privateUsages: ["sign"], publicUsages: ["verify"]}, + {algorithm: {name: "Ed448" }, privateUsages: ["sign"], publicUsages: ["verify"]}, + {algorithm: {name: "X25519" }, privateUsages: ["deriveBits"], publicUsages: []}, + {algorithm: {name: "X448" }, privateUsages: ["deriveBits"], publicUsages: []}, + {algorithm: {name: "AES-CTR", length: 128}, usages: ["encrypt", "decrypt"]}, + {algorithm: {name: "AES-CBC", length: 128}, usages: ["encrypt", "decrypt"]}, + {algorithm: {name: "AES-GCM", length: 128}, usages: ["encrypt", "decrypt"]}, + {algorithm: {name: "AES-KW", length: 128}, usages: ["wrapKey", "unwrapKey"]}, + {algorithm: {name: "HMAC", length: 128, hash: "SHA-256"}, usages: ["sign", "verify"]} + ]; + + // Import all the keys needed, then iterate over all combinations // to test wrapping and unwrapping. promise_test(function() { - return Promise.all([generateWrappingKeys(), generateKeysToWrap()]) + return Promise.all([importWrappingKeys(), importKeysToWrap()]) .then(function(results) { - var promises = []; - wrappers.forEach(function(wrapper) { - keys.forEach(function(key) { - promises.push(testWrapping(wrapper, key)); - }) + wrappingKeysParameters.filter((param) => Object.keys(wrappers).includes(param.name)).forEach(function(wrapperParam) { + var wrapper = wrappers[wrapperParam.name]; + keysToWrapParameters.filter((param) => Object.keys(keys).includes(param.algorithm.name)).forEach(function(toWrapParam) { + var keyData = keys[toWrapParam.algorithm.name]; + ["raw", "spki", "pkcs8"].filter((fmt) => Object.keys(keyData).includes(fmt)).forEach(function(keyDataFormat) { + var toWrap = keyData[keyDataFormat]; + [keyDataFormat, "jwk"].forEach(function(format) { + if (wrappingIsPossible(toWrap.originalExport[format], wrapper.parameters.name)) { + testWrapping(wrapper, toWrap, format); + if (canCompareNonExtractableKeys(toWrap.key)) { + testWrappingNonExtractable(wrapper, toWrap, format); + if (format === "jwk") { + testWrappingNonExtractableAsExtractable(wrapper, toWrap); + } + } + } + }); + }); + }); }); - return Promise.allSettled(promises); + return Promise.resolve("setup done"); + }, function(err) { + return Promise.reject("setup failed: " + err.name + ': "' + err.message + '"'); }); }, "setup"); - function generateWrappingKeys() { - // There are five algorithms that can be used for wrapKey/unwrapKey. - // Generate one key with typical parameters for each kind. - // - // Note: we don't need cryptographically strong parameters for things - // like IV - just any legal value will do. - var parameters = [ - { - name: "RSA-OAEP", - generateParameters: {name: "RSA-OAEP", modulusLength: 4096, publicExponent: new Uint8Array([1,0,1]), hash: "SHA-256"}, - wrapParameters: {name: "RSA-OAEP", label: new Uint8Array(8)} - }, - { - name: "AES-CTR", - generateParameters: {name: "AES-CTR", length: 128}, - wrapParameters: {name: "AES-CTR", counter: new Uint8Array(16), length: 64} - }, - { - name: "AES-CBC", - generateParameters: {name: "AES-CBC", length: 128}, - wrapParameters: {name: "AES-CBC", iv: new Uint8Array(16)} - }, - { - name: "AES-GCM", - generateParameters: {name: "AES-GCM", length: 128}, - wrapParameters: {name: "AES-GCM", iv: new Uint8Array(16), additionalData: new Uint8Array(16), tagLength: 128} - }, - { - name: "AES-KW", - generateParameters: {name: "AES-KW", length: 128}, - wrapParameters: {name: "AES-KW"} + function importWrappingKeys() { + // Using allSettled to skip unsupported test cases. + var promises = []; + wrappingKeysParameters.forEach(function(params) { + if (params.name === "RSA-OAEP") { // we have a key pair, not just a key + var algorithm = {name: "RSA-OAEP", hash: "SHA-256"}; + wrappers[params.name] = {wrappingKey: undefined, unwrappingKey: undefined, parameters: params}; + promises.push(subtle.importKey("spki", wrappingKeyData["RSA"].spki, algorithm, true, ["wrapKey"]) + .then(function(key) { + wrappers["RSA-OAEP"].wrappingKey = key; + })); + promises.push(subtle.importKey("pkcs8", wrappingKeyData["RSA"].pkcs8, algorithm, true, ["unwrapKey"]) + .then(function(key) { + wrappers["RSA-OAEP"].unwrappingKey = key; + })); + } else { + var algorithm = {name: params.name}; + promises.push(subtle.importKey("raw", wrappingKeyData["SYMMETRIC"].raw, algorithm, true, ["wrapKey", "unwrapKey"]) + .then(function(key) { + wrappers[params.name] = {wrappingKey: key, unwrappingKey: key, parameters: params}; + })); } - ]; - + }); // Using allSettled to skip unsupported test cases. - return Promise.allSettled(parameters.map(function(params) { - return subtle.generateKey(params.generateParameters, true, ["wrapKey", "unwrapKey"]) - .then(function(key) { - var wrapper; - if (params.name === "RSA-OAEP") { // we have a key pair, not just a key - wrapper = {wrappingKey: key.publicKey, unwrappingKey: key.privateKey, parameters: params}; - } else { - wrapper = {wrappingKey: key, unwrappingKey: key, parameters: params}; - } - wrappers.push(wrapper); - return true; - }) - })); + return Promise.allSettled(promises); } + async function importAndExport(format, keyData, algorithm, keyUsages, keyType) { + var importedKey; + try { + importedKey = await subtle.importKey(format, keyData, algorithm, true, keyUsages); + keys[algorithm.name][format] = { name: algorithm.name + " " + keyType, algorithm: algorithm, usages: keyUsages, key: importedKey, originalExport: {} }; + } catch (err) { + delete keys[algorithm.name][format]; + throw("Error importing " + algorithm.name + " " + keyType + " key in '" + format + "' -" + err.name + ': "' + err.message + '"'); + }; + try { + var exportedKey = await subtle.exportKey(format, importedKey); + keys[algorithm.name][format].originalExport[format] = exportedKey; + } catch (err) { + delete keys[algorithm.name][format]; + throw("Error exporting " + algorithm.name + " '" + format + "' key -" + err.name + ': "' + err.message + '"'); + }; + try { + var jwkExportedKey = await subtle.exportKey("jwk", importedKey); + keys[algorithm.name][format].originalExport["jwk"] = jwkExportedKey; + } catch (err) { + delete keys[algorithm.name][format]; + throw("Error exporting " + algorithm.name + " '" + format + "' key to 'jwk' -" + err.name + ': "' + err.message + '"'); + }; + } - function generateKeysToWrap() { - var parameters = [ - {algorithm: {name: "RSASSA-PKCS1-v1_5", modulusLength: 1024, publicExponent: new Uint8Array([1,0,1]), hash: "SHA-256"}, privateUsages: ["sign"], publicUsages: ["verify"]}, - {algorithm: {name: "RSA-PSS", modulusLength: 1024, publicExponent: new Uint8Array([1,0,1]), hash: "SHA-256"}, privateUsages: ["sign"], publicUsages: ["verify"]}, - {algorithm: {name: "RSA-OAEP", modulusLength: 1024, publicExponent: new Uint8Array([1,0,1]), hash: "SHA-256"}, privateUsages: ["decrypt"], publicUsages: ["encrypt"]}, - {algorithm: {name: "ECDSA", namedCurve: "P-256"}, privateUsages: ["sign"], publicUsages: ["verify"]}, - {algorithm: {name: "ECDH", namedCurve: "P-256"}, privateUsages: ["deriveBits"], publicUsages: []}, - {algorithm: {name: "Ed25519" }, privateUsages: ["sign"], publicUsages: ["verify"]}, - {algorithm: {name: "Ed448" }, privateUsages: ["sign"], publicUsages: ["verify"]}, - {algorithm: {name: "X25519" }, privateUsages: ["deriveBits"], publicUsages: []}, - {algorithm: {name: "X448" }, privateUsages: ["deriveBits"], publicUsages: []}, - {algorithm: {name: "AES-CTR", length: 128}, usages: ["encrypt", "decrypt"]}, - {algorithm: {name: "AES-CBC", length: 128}, usages: ["encrypt", "decrypt"]}, - {algorithm: {name: "AES-GCM", length: 128}, usages: ["encrypt", "decrypt"]}, - {algorithm: {name: "AES-KW", length: 128}, usages: ["wrapKey", "unwrapKey"]}, - {algorithm: {name: "HMAC", length: 128, hash: "SHA-256"}, usages: ["sign", "verify"]} - ]; - - // Using allSettled to skip unsupported test cases. - return Promise.allSettled(parameters.map(function(params) { - var usages; - if ("usages" in params) { - usages = params.usages; + function importKeysToWrap() { + var promises = []; + keysToWrapParameters.forEach(function(params) { + if ("publicUsages" in params) { + keys[params.algorithm.name] = {}; + var keyData = toWrapKeyDataFromAlg(params.algorithm.name); + promises.push(importAndExport("spki", keyData.spki, params.algorithm, params.publicUsages, "public key ")); + promises.push(importAndExport("pkcs8", keyData.pkcs8, params.algorithm, params.privateUsages, "private key ")); } else { - usages = params.publicUsages.concat(params.privateUsages); + keys[params.algorithm.name] = {}; + promises.push(importAndExport("raw", toWrapKeyData["SYMMETRIC"].raw, params.algorithm, params.usages, "")); } - - return subtle.generateKey(params.algorithm, true, usages) - .then(function(result) { - if (result.constructor === CryptoKey) { - keys.push({name: params.algorithm.name, algorithm: params.algorithm, usages: params.usages, key: result}); - } else { - keys.push({name: params.algorithm.name + " public key", algorithm: params.algorithm, usages: params.publicUsages, key: result.publicKey}); - keys.push({name: params.algorithm.name + " private key", algorithm: params.algorithm, usages: params.privateUsages, key: result.privateKey}); - } - return true; - }); - })); + }); + // Using allSettled to skip unsupported test cases. + return Promise.allSettled(promises); } // Can we successfully "round-trip" (wrap, then unwrap, a key)? - function testWrapping(wrapper, toWrap) { - var formats; + function testWrapping(wrapper, toWrap, fmt) { + promise_test(async() => { + try { + var wrappedResult = await subtle.wrapKey(fmt, toWrap.key, wrapper.wrappingKey, wrapper.parameters.wrapParameters); + var unwrappedResult = await subtle.unwrapKey(fmt, wrappedResult, wrapper.unwrappingKey, wrapper.parameters.wrapParameters, toWrap.algorithm, true, toWrap.usages); + assert_goodCryptoKey(unwrappedResult, toWrap.algorithm, true, toWrap.usages, toWrap.key.type); + var roundTripExport = await subtle.exportKey(fmt, unwrappedResult); + assert_true(equalExport(toWrap.originalExport[fmt], roundTripExport), "Post-wrap export matches original export"); + } catch (err) { + if (err instanceof AssertionError) { + throw err; + } + assert_unreached("Round trip for extractable key threw an error - " + err.name + ': "' + err.message + '"'); + } + }, "Can wrap and unwrap " + toWrap.name + "keys using " + fmt + " and " + wrapper.parameters.name); + } - if (toWrap.name.includes("private")) { - formats = ["pkcs8", "jwk"]; - } else if (toWrap.name.includes("public")) { - formats = ["spki", "jwk"] - } else { - formats = ["raw", "jwk"] - } + function testWrappingNonExtractable(wrapper, toWrap, fmt) { + promise_test(async() => { + try { + var wrappedResult = await subtle.wrapKey(fmt, toWrap.key, wrapper.wrappingKey, wrapper.parameters.wrapParameters); + var unwrappedResult = await subtle.unwrapKey(fmt, wrappedResult, wrapper.unwrappingKey, wrapper.parameters.wrapParameters, toWrap.algorithm, false, toWrap.usages); + assert_goodCryptoKey(unwrappedResult, toWrap.algorithm, false, toWrap.usages, toWrap.key.type); + var result = await equalKeys(toWrap.key, unwrappedResult); + assert_true(result, "Unwrapped key matches original"); + } catch (err) { + if (err instanceof AssertionError) { + throw err; + } + assert_unreached("Round trip for key unwrapped non-extractable threw an error - " + err.name + ': "' + err.message + '"'); + }; + }, "Can wrap and unwrap " + toWrap.name + "keys as non-extractable using " + fmt + " and " + wrapper.parameters.name); + } - return Promise.all(formats.map(function(fmt) { - var originalExport; - return subtle.exportKey(fmt, toWrap.key).then(function(exportedKey) { - originalExport = exportedKey; - const isPossible = wrappingIsPossible(originalExport, wrapper.parameters.name); - promise_test(function(test) { - if (!isPossible) { - return Promise.resolve().then(() => { - assert_false(false, "Wrapping is not possible"); - }) - } - return subtle.wrapKey(fmt, toWrap.key, wrapper.wrappingKey, wrapper.parameters.wrapParameters) - .then(function(wrappedResult) { - return subtle.unwrapKey(fmt, wrappedResult, wrapper.unwrappingKey, wrapper.parameters.wrapParameters, toWrap.algorithm, true, toWrap.usages); - }).then(function(unwrappedResult) { - assert_goodCryptoKey(unwrappedResult, toWrap.algorithm, true, toWrap.usages, toWrap.key.type); - return subtle.exportKey(fmt, unwrappedResult) - }).then(function(roundTripExport) { - assert_true(equalExport(originalExport, roundTripExport), "Post-wrap export matches original export"); - }, function(err) { - assert_unreached("Round trip for extractable key threw an error - " + err.name + ': "' + err.message + '"'); - }); - }, "Can wrap and unwrap " + toWrap.name + " keys using " + fmt + " and " + wrapper.parameters.name); - - if (canCompareNonExtractableKeys(toWrap.key)) { - promise_test(function(test){ - if (!isPossible) { - return Promise.resolve().then(() => { - assert_false(false, "Wrapping is not possible"); - }) - } - return subtle.wrapKey(fmt, toWrap.key, wrapper.wrappingKey, wrapper.parameters.wrapParameters) - .then(function(wrappedResult) { - return subtle.unwrapKey(fmt, wrappedResult, wrapper.unwrappingKey, wrapper.parameters.wrapParameters, toWrap.algorithm, false, toWrap.usages); - }).then(function(unwrappedResult){ - assert_goodCryptoKey(unwrappedResult, toWrap.algorithm, false, toWrap.usages, toWrap.key.type); - return equalKeys(toWrap.key, unwrappedResult); - }).then(function(result){ - assert_true(result, "Unwrapped key matches original"); - }).catch(function(err){ - assert_unreached("Round trip for key unwrapped non-extractable threw an error - " + err.name + ': "' + err.message + '"'); - }); - }, "Can wrap and unwrap " + toWrap.name + " keys as non-extractable using " + fmt + " and " + wrapper.parameters.name); - - if (fmt === "jwk") { - promise_test(function(test){ - if (!isPossible) { - return Promise.resolve().then(() => { - assert_false(false, "Wrapping is not possible"); - }) - } - var wrappedKey; - return wrapAsNonExtractableJwk(toWrap.key,wrapper).then(function(wrappedResult){ - wrappedKey = wrappedResult; - return subtle.unwrapKey("jwk", wrappedKey, wrapper.unwrappingKey, wrapper.parameters.wrapParameters, toWrap.algorithm, false, toWrap.usages); - }).then(function(unwrappedResult){ - assert_false(unwrappedResult.extractable, "Unwrapped key is non-extractable"); - return equalKeys(toWrap.key,unwrappedResult); - }).then(function(result){ - assert_true(result, "Unwrapped key matches original"); - }).catch(function(err){ - assert_unreached("Round trip for non-extractable key threw an error - " + err.name + ': "' + err.message + '"'); - }).then(function(){ - return subtle.unwrapKey("jwk", wrappedKey, wrapper.unwrappingKey, wrapper.parameters.wrapParameters, toWrap.algorithm, true, toWrap.usages); - }).then(function(unwrappedResult){ - assert_unreached("Unwrapping a non-extractable JWK as extractable should fail"); - }).catch(function(err){ - assert_equals(err.name, "DataError", "Unwrapping a non-extractable JWK as extractable fails with DataError"); - }); - }, "Can unwrap " + toWrap.name + " non-extractable keys using jwk and " + wrapper.parameters.name); - } + function testWrappingNonExtractableAsExtractable(wrapper, toWrap) { + promise_test(async() => { + var wrappedKey; + try { + var wrappedResult = await wrapAsNonExtractableJwk(toWrap.key,wrapper); + wrappedKey = wrappedResult; + var unwrappedResult = await subtle.unwrapKey("jwk", wrappedKey, wrapper.unwrappingKey, wrapper.parameters.wrapParameters, toWrap.algorithm, false, toWrap.usages); + assert_false(unwrappedResult.extractable, "Unwrapped key is non-extractable"); + var result = await equalKeys(toWrap.key,unwrappedResult); + assert_true(result, "Unwrapped key matches original"); + } catch (err) { + if (err instanceof AssertionError) { + throw err; } - }); - })); + assert_unreached("Round trip for non-extractable key threw an error - " + err.name + ': "' + err.message + '"'); + }; + try { + var unwrappedResult = await subtle.unwrapKey("jwk", wrappedKey, wrapper.unwrappingKey, wrapper.parameters.wrapParameters, toWrap.algorithm, true, toWrap.usages); + assert_unreached("Unwrapping a non-extractable JWK as extractable should fail"); + } catch (err) { + if (err instanceof AssertionError) { + throw err; + } + assert_equals(err.name, "DataError", "Unwrapping a non-extractable JWK as extractable fails with DataError"); + } + }, "Can unwrap " + toWrap.name + "non-extractable keys using jwk and " + wrapper.parameters.name); } // Implement key wrapping by hand to wrap a key as non-extractable JWK - function wrapAsNonExtractableJwk(key, wrapper){ + async function wrapAsNonExtractableJwk(key, wrapper) { var wrappingKey = wrapper.wrappingKey, encryptKey; - return subtle.exportKey("jwk",wrappingKey) - .then(function(jwkWrappingKey){ - // Update the key generation parameters to work as key import parameters - var params = Object.create(wrapper.parameters.generateParameters); - if(params.name === "AES-KW") { - params.name = "AES-CBC"; - jwkWrappingKey.alg = "A"+params.length+"CBC"; - } else if (params.name === "RSA-OAEP") { - params.modulusLength = undefined; - params.publicExponent = undefined; - } - jwkWrappingKey.key_ops = ["encrypt"]; - return subtle.importKey("jwk", jwkWrappingKey, params, true, ["encrypt"]); - }).then(function(importedWrappingKey){ - encryptKey = importedWrappingKey; - return subtle.exportKey("jwk",key); - }).then(function(exportedKey){ - exportedKey.ext = false; - var jwk = JSON.stringify(exportedKey) - if (wrappingKey.algorithm.name === "AES-KW") { - return aeskw(encryptKey, str2ab(jwk.slice(0,-1) + " ".repeat(jwk.length%8 ? 8-jwk.length%8 : 0) + "}")); - } else { - return subtle.encrypt(wrapper.parameters.wrapParameters,encryptKey,str2ab(jwk)); - } - }); + var jwkWrappingKey = await subtle.exportKey("jwk",wrappingKey); + // Update the key generation parameters to work as key import parameters + var params = Object.create(wrapper.parameters.importParameters); + if(params.name === "AES-KW") { + params.name = "AES-CBC"; + jwkWrappingKey.alg = "A"+params.length+"CBC"; + } else if (params.name === "RSA-OAEP") { + params.modulusLength = undefined; + params.publicExponent = undefined; + } + jwkWrappingKey.key_ops = ["encrypt"]; + var importedWrappingKey = await subtle.importKey("jwk", jwkWrappingKey, params, true, ["encrypt"]); + encryptKey = importedWrappingKey; + var exportedKey = await subtle.exportKey("jwk",key); + exportedKey.ext = false; + var jwk = JSON.stringify(exportedKey) + var result; + if (wrappingKey.algorithm.name === "AES-KW") { + result = await aeskw(encryptKey, str2ab(jwk.slice(0,-1) + " ".repeat(jwk.length%8 ? 8-jwk.length%8 : 0) + "}")); + } else { + result = await subtle.encrypt(wrapper.parameters.wrapParameters,encryptKey,str2ab(jwk)); + } + return result; } @@ -365,9 +383,9 @@ } // Compare two keys by using them (works for non-extractable keys) - function equalKeys(expected, got){ + async function equalKeys(expected, got){ if ( expected.algorithm.name !== got.algorithm.name ) { - return Promise.resolve(false); + return false; } var cryptParams, signParams, wrapParams, deriveParams; @@ -419,75 +437,60 @@ } if (cryptParams) { - return subtle.exportKey("jwk",expected) - .then(function(jwkExpectedKey){ - if (expected.algorithm.name === "RSA-OAEP") { - ["d","p","q","dp","dq","qi","oth"].forEach(function(field){ delete jwkExpectedKey[field]; }); - } - jwkExpectedKey.key_ops = ["encrypt"]; - return subtle.importKey("jwk", jwkExpectedKey, expected.algorithm, true, ["encrypt"]); - }).then(function(expectedEncryptKey){ - return subtle.encrypt(cryptParams, expectedEncryptKey, new Uint8Array(32)); - }).then(function(encryptedData){ - return subtle.decrypt(cryptParams, got, encryptedData); - }).then(function(decryptedData){ - var result = new Uint8Array(decryptedData); - return !result.some(x => x); - }); + var jwkExpectedKey = await subtle.exportKey("jwk", expected); + if (expected.algorithm.name === "RSA-OAEP") { + ["d","p","q","dp","dq","qi","oth"].forEach(function(field){ delete jwkExpectedKey[field]; }); + } + jwkExpectedKey.key_ops = ["encrypt"]; + var expectedEncryptKey = await subtle.importKey("jwk", jwkExpectedKey, expected.algorithm, true, ["encrypt"]); + var encryptedData = await subtle.encrypt(cryptParams, expectedEncryptKey, new Uint8Array(32)); + var decryptedData = await subtle.decrypt(cryptParams, got, encryptedData); + var result = new Uint8Array(decryptedData); + return !result.some(x => x); } else if (signParams) { var verifyKey; - return subtle.exportKey("jwk",expected) - .then(function(jwkExpectedKey){ - if (expected.algorithm.name === "RSA-PSS" || expected.algorithm.name === "RSASSA-PKCS1-v1_5") { - ["d","p","q","dp","dq","qi","oth"].forEach(function(field){ delete jwkExpectedKey[field]; }); - } - if (expected.algorithm.name === "ECDSA" || expected.algorithm.name.startsWith("Ed")) { - delete jwkExpectedKey["d"]; - } - jwkExpectedKey.key_ops = ["verify"]; - return subtle.importKey("jwk", jwkExpectedKey, expected.algorithm, true, ["verify"]); - }).then(function(expectedVerifyKey){ - verifyKey = expectedVerifyKey; - return subtle.sign(signParams, got, new Uint8Array(32)); - }).then(function(signature){ - return subtle.verify(signParams, verifyKey, signature, new Uint8Array(32)); - }); + var jwkExpectedKey = await subtle.exportKey("jwk",expected); + if (expected.algorithm.name === "RSA-PSS" || expected.algorithm.name === "RSASSA-PKCS1-v1_5") { + ["d","p","q","dp","dq","qi","oth"].forEach(function(field){ delete jwkExpectedKey[field]; }); + } + if (expected.algorithm.name === "ECDSA" || expected.algorithm.name.startsWith("Ed")) { + delete jwkExpectedKey["d"]; + } + jwkExpectedKey.key_ops = ["verify"]; + var expectedVerifyKey = await subtle.importKey("jwk", jwkExpectedKey, expected.algorithm, true, ["verify"]); + verifyKey = expectedVerifyKey; + var signature = await subtle.sign(signParams, got, new Uint8Array(32)); + var result = await subtle.verify(signParams, verifyKey, signature, new Uint8Array(32)); + return result; } else if (wrapParams) { var aKeyToWrap, wrappedWithExpected; - return subtle.importKey("raw", new Uint8Array(16), "AES-CBC", true, ["encrypt"]) - .then(function(key){ - aKeyToWrap = key; - return subtle.wrapKey("raw", aKeyToWrap, expected, wrapParams); - }).then(function(wrapResult){ - wrappedWithExpected = Array.from((new Uint8Array(wrapResult)).values()); - return subtle.wrapKey("raw", aKeyToWrap, got, wrapParams); - }).then(function(wrapResult){ - var wrappedWithGot = Array.from((new Uint8Array(wrapResult)).values()); - return wrappedWithGot.every((x,i) => x === wrappedWithExpected[i]); - }); + var key = await subtle.importKey("raw",new Uint8Array(16), "AES-CBC", true, ["encrypt"]) + aKeyToWrap = key; + var wrapResult = await subtle.wrapKey("raw", aKeyToWrap, expected, wrapParams); + wrappedWithExpected = Array.from((new Uint8Array(wrapResult)).values()); + wrapResult = await subtle.wrapKey("raw", aKeyToWrap, got, wrapParams); + var wrappedWithGot = Array.from((new Uint8Array(wrapResult)).values()); + return wrappedWithGot.every((x,i) => x === wrappedWithExpected[i]); } else if (deriveParams) { var expectedDerivedBits; - return subtle.generateKey(expected.algorithm, true, ['deriveBits']).then(({ publicKey }) => { - deriveParams.public = publicKey; - return subtle.deriveBits(deriveParams, expected, 128) - }) - .then(function(result){ - expectedDerivedBits = Array.from((new Uint8Array(result)).values()); - return subtle.deriveBits(deriveParams, got, 128); - }).then(function(result){ - var gotDerivedBits = Array.from((new Uint8Array(result)).values()); - return gotDerivedBits.every((x,i) => x === expectedDerivedBits[i]); - }); + var key = await subtle.generateKey(expected.algorithm, true, ['deriveBits']); + deriveParams.public = key.publicKey; + var result = await subtle.deriveBits(deriveParams, expected, 128); + expectedDerivedBits = Array.from((new Uint8Array(result)).values()); + result = await subtle.deriveBits(deriveParams, got, 128); + var gotDerivedBits = Array.from((new Uint8Array(result)).values()); + return gotDerivedBits.every((x,i) => x === expectedDerivedBits[i]); } } // Raw AES encryption - function aes( k, p ) { - return subtle.encrypt({name: "AES-CBC", iv: new Uint8Array(16) }, k, p).then(function(ciphertext){return ciphertext.slice(0,16);}); + async function aes(k, p) { + const ciphertext = await subtle.encrypt({ name: "AES-CBC", iv: new Uint8Array(16) }, k, p); + return ciphertext.slice(0, 16); } // AES Key Wrap - function aeskw(key, data) { + async function aeskw(key, data) { if (data.byteLength % 8 !== 0) { throw new Error("AES Key Wrap data must be a multiple of 8 bytes in length"); } @@ -501,7 +504,7 @@ R.push(new Uint8Array(data.slice(i,i+8))); } - function aeskw_step(j, i, final, B) { + async function aeskw_step(j, i, final, B) { A.set(new Uint8Array(B.slice(0,8))); Av.setUint32(4,Av.getUint32(4) ^ (n*j+i+1)); R[i] = new Uint8Array(B.slice(8,16)); @@ -516,18 +519,16 @@ } } - var p = new Promise(function(resolve){ - A.set(R[0],8); - resolve(aes(key,A)); - }); + A.set(R[0], 8); + let B = await aes(key, A); for(var j=0;j<6;++j) { for(var i=0;i { start(c) { controller = c; }, - async pull() { + pull() { byobRequestDefined.push(controller.byobRequest !== null); const initialByobRequest = controller.byobRequest; - const transferredView = await transferArrayBufferView(controller.byobRequest.view); + const transferredView = transferArrayBufferView(controller.byobRequest.view); transferredView[0] = 0x01; controller.byobRequest.respondWithNewView(transferredView); @@ -2288,7 +2288,7 @@ promise_test(async t => { await pullCalledPromise; // Transfer the original BYOB request's buffer, and respond with a new view on that buffer - const transferredView = await transferArrayBufferView(controller.byobRequest.view); + const transferredView = transferArrayBufferView(controller.byobRequest.view); const newView = transferredView.subarray(0, 1); newView[0] = 42; @@ -2328,7 +2328,7 @@ promise_test(async t => { await pullCalledPromise; // Transfer the original BYOB request's buffer, and respond with an empty view on that buffer - const transferredView = await transferArrayBufferView(controller.byobRequest.view); + const transferredView = transferArrayBufferView(controller.byobRequest.view); const newView = transferredView.subarray(0, 0); controller.close(); diff --git a/test/fixtures/wpt/streams/readable-byte-streams/patched-global.any.js b/test/fixtures/wpt/streams/readable-byte-streams/patched-global.any.js new file mode 100644 index 00000000000000..ce2e9e9993ae57 --- /dev/null +++ b/test/fixtures/wpt/streams/readable-byte-streams/patched-global.any.js @@ -0,0 +1,54 @@ +// META: global=window,worker,shadowrealm +// META: script=../resources/test-utils.js +'use strict'; + +// Tests which patch the global environment are kept separate to avoid +// interfering with other tests. + +promise_test(async (t) => { + let controller; + const rs = new ReadableStream({ + type: 'bytes', + start(c) { + controller = c; + } + }); + const reader = rs.getReader({mode: 'byob'}); + + const length = 0x4000; + const buffer = new ArrayBuffer(length); + const bigArray = new BigUint64Array(buffer, length - 8, 1); + + const read1 = reader.read(new Uint8Array(new ArrayBuffer(0x100))); + const read2 = reader.read(bigArray); + + let flag = false; + Object.defineProperty(Object.prototype, 'then', { + get: t.step_func(() => { + if (!flag) { + flag = true; + assert_equals(controller.byobRequest, null, 'byobRequest should be null after filling both views'); + } + }), + configurable: true + }); + t.add_cleanup(() => { + delete Object.prototype.then; + }); + + controller.enqueue(new Uint8Array(0x110).fill(0x42)); + assert_true(flag, 'patched then() should be called'); + + // The first read() is filled entirely with 0x100 bytes + const result1 = await read1; + assert_false(result1.done, 'result1.done'); + assert_typed_array_equals(result1.value, new Uint8Array(0x100).fill(0x42), 'result1.value'); + + // The second read() is filled with the remaining 0x10 bytes + const result2 = await read2; + assert_false(result2.done, 'result2.done'); + assert_equals(result2.value.constructor, BigUint64Array, 'result2.value constructor'); + assert_equals(result2.value.byteOffset, length - 8, 'result2.value byteOffset'); + assert_equals(result2.value.length, 1, 'result2.value length'); + assert_array_equals([...result2.value], [0x42424242_42424242n], 'result2.value contents'); +}, 'Patched then() sees byobRequest after filling all pending pull-into descriptors'); diff --git a/test/fixtures/wpt/streams/readable-byte-streams/tee.any.js b/test/fixtures/wpt/streams/readable-byte-streams/tee.any.js index 7dd5ba3f3fb013..60d82b9cf6a1fd 100644 --- a/test/fixtures/wpt/streams/readable-byte-streams/tee.any.js +++ b/test/fixtures/wpt/streams/readable-byte-streams/tee.any.js @@ -934,3 +934,36 @@ promise_test(async () => { assert_typed_array_equals(result4.value, new Uint8Array([0]).subarray(0, 0), 'second chunk from branch2 should be correct'); }, 'ReadableStream teeing with byte source: respond() and close() while both branches are pulling'); + +promise_test(async t => { + let pullCount = 0; + const arrayBuffer = new Uint8Array([0x01, 0x02, 0x03]).buffer; + const enqueuedChunk = new Uint8Array(arrayBuffer, 2); + assert_equals(enqueuedChunk.length, 1); + assert_equals(enqueuedChunk.byteOffset, 2); + const rs = new ReadableStream({ + type: 'bytes', + pull(c) { + ++pullCount; + if (pullCount === 1) { + c.enqueue(enqueuedChunk); + } + } + }); + + const [branch1, branch2] = rs.tee(); + const reader1 = branch1.getReader(); + const reader2 = branch2.getReader(); + + const [result1, result2] = await Promise.all([reader1.read(), reader2.read()]); + assert_equals(result1.done, false, 'reader1 done'); + assert_equals(result2.done, false, 'reader2 done'); + + const view1 = result1.value; + const view2 = result2.value; + // The first stream has the transferred buffer, but the second stream has the + // cloned buffer. + const underlying = new Uint8Array([0x01, 0x02, 0x03]).buffer; + assert_typed_array_equals(view1, new Uint8Array(underlying, 2), 'reader1 value'); + assert_typed_array_equals(view2, new Uint8Array([0x03]), 'reader2 value'); +}, 'ReadableStream teeing with byte source: reading an array with a byte offset should clone correctly'); diff --git a/test/fixtures/wpt/streams/readable-streams/crashtests/from-cross-realm.https.html b/test/fixtures/wpt/streams/readable-streams/crashtests/from-cross-realm.https.html new file mode 100644 index 00000000000000..58a4371186ece7 --- /dev/null +++ b/test/fixtures/wpt/streams/readable-streams/crashtests/from-cross-realm.https.html @@ -0,0 +1,18 @@ + + + diff --git a/test/fixtures/wpt/streams/readable-streams/owning-type-video-frame.any.js b/test/fixtures/wpt/streams/readable-streams/owning-type-video-frame.any.js index b652f9c5fcb4b6..ec01fda0b3c737 100644 --- a/test/fixtures/wpt/streams/readable-streams/owning-type-video-frame.any.js +++ b/test/fixtures/wpt/streams/readable-streams/owning-type-video-frame.any.js @@ -1,4 +1,4 @@ -// META: global=window,worker,shadowrealm +// META: global=window,worker // META: script=../resources/test-utils.js // META: script=../resources/rs-utils.js 'use strict'; diff --git a/test/fixtures/wpt/streams/resources/rs-utils.js b/test/fixtures/wpt/streams/resources/rs-utils.js index f1a014275a2fbc..0f7742a5b3b190 100644 --- a/test/fixtures/wpt/streams/resources/rs-utils.js +++ b/test/fixtures/wpt/streams/resources/rs-utils.js @@ -1,5 +1,42 @@ 'use strict'; (function () { + // Fake setInterval-like functionality in environments that don't have it + class IntervalHandle { + constructor(callback, delayMs) { + this.callback = callback; + this.delayMs = delayMs; + this.cancelled = false; + Promise.resolve().then(() => this.check()); + } + + async check() { + while (true) { + await new Promise(resolve => step_timeout(resolve, this.delayMs)); + if (this.cancelled) { + return; + } + this.callback(); + } + } + + cancel() { + this.cancelled = true; + } + } + + let localSetInterval, localClearInterval; + if (typeof globalThis.setInterval !== "undefined" && + typeof globalThis.clearInterval !== "undefined") { + localSetInterval = globalThis.setInterval; + localClearInterval = globalThis.clearInterval; + } else { + localSetInterval = function setInterval(callback, delayMs) { + return new IntervalHandle(callback, delayMs); + } + localClearInterval = function clearInterval(handle) { + handle.cancel(); + } + } class RandomPushSource { constructor(toPush) { @@ -18,12 +55,12 @@ } if (!this.started) { - this._intervalHandle = setInterval(writeChunk, 2); + this._intervalHandle = localSetInterval(writeChunk, 2); this.started = true; } if (this.paused) { - this._intervalHandle = setInterval(writeChunk, 2); + this._intervalHandle = localSetInterval(writeChunk, 2); this.paused = false; } @@ -37,7 +74,7 @@ if (source.toPush > 0 && source.pushed > source.toPush) { if (source._intervalHandle) { - clearInterval(source._intervalHandle); + localClearInterval(source._intervalHandle); source._intervalHandle = undefined; } source.closed = true; @@ -55,7 +92,7 @@ if (this.started) { this.paused = true; - clearInterval(this._intervalHandle); + localClearInterval(this._intervalHandle); this._intervalHandle = undefined; } else { throw new Error('Can\'t pause reading an unstarted source.'); @@ -178,15 +215,7 @@ } function transferArrayBufferView(view) { - const noopByteStream = new ReadableStream({ - type: 'bytes', - pull(c) { - c.byobRequest.respond(c.byobRequest.view.byteLength); - c.close(); - } - }); - const reader = noopByteStream.getReader({ mode: 'byob' }); - return reader.read(view).then((result) => result.value); + return structuredClone(view, { transfer: [view.buffer] }); } self.RandomPushSource = RandomPushSource; diff --git a/test/fixtures/wpt/streams/transferable/transfer-with-messageport.window.js b/test/fixtures/wpt/streams/transferable/transfer-with-messageport.window.js index 37f8c9df169607..3bfe634a6e153d 100644 --- a/test/fixtures/wpt/streams/transferable/transfer-with-messageport.window.js +++ b/test/fixtures/wpt/streams/transferable/transfer-with-messageport.window.js @@ -105,7 +105,7 @@ async function transferMessagePortWith(constructor) { await transferMessagePortWithOrder3(new constructor()); } -async function advancedTransferMesagePortWith(constructor) { +async function advancedTransferMessagePortWith(constructor) { await transferMessagePortWithOrder4(new constructor()); await transferMessagePortWithOrder5(new constructor()); await transferMessagePortWithOrder6(new constructor()); @@ -166,7 +166,7 @@ async function mixedTransferMessagePortWithOrder3() { ); } -async function mixedTransferMesagePortWith() { +async function mixedTransferMessagePortWith() { await mixedTransferMessagePortWithOrder1(); await mixedTransferMessagePortWithOrder2(); await mixedTransferMessagePortWithOrder3(); @@ -185,19 +185,19 @@ promise_test(async t => { }, "Transferring a MessagePort with a TransformStream should set `.ports`"); promise_test(async t => { - await transferMessagePortWith(ReadableStream); + await advancedTransferMessagePortWith(ReadableStream); }, "Transferring a MessagePort with a ReadableStream should set `.ports`, advanced"); promise_test(async t => { - await transferMessagePortWith(WritableStream); + await advancedTransferMessagePortWith(WritableStream); }, "Transferring a MessagePort with a WritableStream should set `.ports`, advanced"); promise_test(async t => { - await transferMessagePortWith(TransformStream); + await advancedTransferMessagePortWith(TransformStream); }, "Transferring a MessagePort with a TransformStream should set `.ports`, advanced"); promise_test(async t => { - await mixedTransferMesagePortWith(); + await mixedTransferMessagePortWith(); }, "Transferring a MessagePort with multiple streams should set `.ports`"); test(() => { diff --git a/test/fixtures/wpt/url/META.yml b/test/fixtures/wpt/url/META.yml index 094b266b64b61b..415bd0f094c6b9 100644 --- a/test/fixtures/wpt/url/META.yml +++ b/test/fixtures/wpt/url/META.yml @@ -3,5 +3,4 @@ suggested_reviewers: - mikewest - domenic - annevk - - GPHemsley - TimothyGu diff --git a/test/fixtures/wpt/url/README.md b/test/fixtures/wpt/url/README.md index fa5e3b0dc72385..50227bc4b330ef 100644 --- a/test/fixtures/wpt/url/README.md +++ b/test/fixtures/wpt/url/README.md @@ -1,11 +1,13 @@ -## urltestdata.json +## urltestdata.json / urltestdata-javascript-only.json -`resources/urltestdata.json` contains URL parsing tests suitable for any URL parser implementation. +[`resources/urltestdata.json`](resources/urltestdata.json) contains URL parsing tests suitable for any URL parser implementation. +[`resources/urltestdata-javascript-only.json`](resources/urltestdata-javascript-only.json) contains URL parsing tests specifically meant +for JavaScript's `URL()` class as well as other languages accepting non-scalar-value strings. -It's used as a source of tests by `a-element.html`, `failure.html`, `url-constructor.any.js`, and -other test files in this directory. +These files are used as a source of tests by `a-element.html`, `failure.html`, `url-constructor.any.js`, +and other test files in this directory. -The format of `resources/urltestdata.json` is a JSON array of comments as strings and test cases as +Both files share the same format. They consist of a JSON array of comments as strings and test cases as objects. The keys for each test case are: * `input`: a string to be parsed as URL. diff --git a/test/fixtures/wpt/url/a-element-origin-xhtml.xhtml b/test/fixtures/wpt/url/a-element-origin-xhtml.xhtml index effcf04bee3fb0..e68e68dda2ad6c 100644 --- a/test/fixtures/wpt/url/a-element-origin-xhtml.xhtml +++ b/test/fixtures/wpt/url/a-element-origin-xhtml.xhtml @@ -12,4 +12,8 @@ - + diff --git a/test/fixtures/wpt/url/a-element-origin.html b/test/fixtures/wpt/url/a-element-origin.html index 9cc8e94cbed060..7015f853f01a1d 100644 --- a/test/fixtures/wpt/url/a-element-origin.html +++ b/test/fixtures/wpt/url/a-element-origin.html @@ -5,4 +5,8 @@
    - + diff --git a/test/fixtures/wpt/url/a-element-xhtml.xhtml b/test/fixtures/wpt/url/a-element-xhtml.xhtml index 05bec4ce4b2f1e..610481a7819d62 100644 --- a/test/fixtures/wpt/url/a-element-xhtml.xhtml +++ b/test/fixtures/wpt/url/a-element-xhtml.xhtml @@ -17,4 +17,8 @@ - + diff --git a/test/fixtures/wpt/url/a-element.html b/test/fixtures/wpt/url/a-element.html index 3428fa00574c4d..a7621d2ded76c4 100644 --- a/test/fixtures/wpt/url/a-element.html +++ b/test/fixtures/wpt/url/a-element.html @@ -10,7 +10,11 @@
    - + Link with embedded \n is parsed correctly diff --git a/test/fixtures/wpt/url/failure.html b/test/fixtures/wpt/url/failure.html index e61f462f97456f..d95b1d52d67237 100644 --- a/test/fixtures/wpt/url/failure.html +++ b/test/fixtures/wpt/url/failure.html @@ -6,7 +6,10 @@